In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning) ## suppress annoying deprecation warnings

from datetime import datetime

import pandas as pd
import seaborn.objects as so
from matplotlib import style

import plotly.express as px

# Part 1: Time series and other simple plots

In [None]:
# Renaming columns for better axis labels in plots
col_rename = {
	'tavg': 'Temp_Avg_°C',
	'tmax': 'Temp_Max_°C',
	'tmin': 'Temp_Min_°C',
	'rhum': 'Rel_Humidity_%',
	'coco': 'Condition',
	'wspd': 'Wind_Speed_kmh',
	'prcp': 'Precipation_mm',
	'wdir': 'Wind_Direction_°',
	'pres': 'Air_pressure_hPa',
	'dwpt': 'Dew_point_°C'
}

In [None]:
weather_df = pd.read_csv('global_weather.csv', parse_dates=['time'], dtype={'wmo':str, 'station':str}) 
weather_df = weather_df.dropna()

weather_df.rename(columns=col_rename, inplace=True)
weather_df = weather_df.assign(Continent = weather_df["timezone"].str.split('/').str[0])  ## Get continent from timezone column

weather_df.loc[weather_df["name"] == "Berlin / Tempelhof",:].head()  ## Let's have a look at a single city (capital)

## My first seaborn.objects plot

In [None]:
so.Plot.config.display["scaling"] = 1.0 ## Adjust standard output size to your liking 

(
    so.Plot(
		weather_df.loc[weather_df["name"] == "Berlin / Tempelhof",:],   ## Data layer (required)
		x="time", y="Temp_Avg_°C")                          			## Axis mapping layer (required)
    .add(so.Dot())                                                  	## Geometry layer (at least one required)
	.add(so.Line())                                                 	# Connect with lines (optional geometry layer)
	.limit(y=(0, 40))                                               	# Coordinate layer (optional: problem avoid free y-axis)
    .label(title="Temperature curve for Berlin / Tempelhof")            # Theme and label layers (optional)
)

## Plotting distributions
### How is the temperature range over all cities?

In [None]:
miss_timepoint= weather_df.time == datetime(2024, 3, 6) ## Simulate a missing timepoint and see what happens in plots
(
	so.Plot(weather_df[~miss_timepoint], x="time", y="Temp_Avg_°C")  ## Can you spot the missing time point?
    .add(so.Band()) 										# Geometry: Min-Max Band
	.add(so.Line(), 										# Geometry: Line
  	#!#    so.Agg(func=??)    								# Statistic: Mean
	)
    .label(title="Average temperature for all cities")
)

### What are so.Band and so.Agg doing in the backgorund?

In [None]:
weather_agg = pd.concat(
	[weather_df.loc[:,["time","Temp_Avg_°C"]].groupby(["time"]).min(), 	## so.Band - min-part
	weather_df.loc[:,["time","Temp_Avg_°C"]].groupby(["time"]).mean(), 	## so.Line, so.Agg
	weather_df.loc[:,["time","Temp_Avg_°C"]].groupby(["time"]).max(),	## so.Band - max-part
	weather_df.loc[:,["time","Temp_Avg_°C"]].groupby(["time"]).count()	## Let's check how many cities are aggregated
	], 
	axis=1
)
weather_agg.columns = ["temp_min","temp_avg", "temp_max","nb_cities"]

weather_agg.head(n=10)

### Another visualization (date is categorical, show data range not as min-max but as errorbar)
Which visualization is better to see missing timepoint?

In [None]:
(
so.Plot(weather_df[~miss_timepoint], x="time", y="Temp_Avg_°C")	## Can you spot the missing time point?
	.add(so.Dot(), so.Agg()) 									# Geometry: Dot + Statistic: Mean
	#!# .add(so.???(), so.Est(errorbar=??)) 					# Geometry: Range + Statistic: Standard deviation
	.limit(y=(0, 40))
	.layout(size=(20, 6)) 										# Increase the figure size for a better view
	.scale(
		x=so.Temporal().tick(upto=21).label(concise=True) 		# Increase the tick size and adjust tick labels for legibility
	)
	.label(title="Average temperature for all cities")
)

## The problem of Spaghetti Plots

In [None]:
(
    #!# so.Plot(weather_df, x="time", y=??, color=??)
		.add(so.Dot(), so.Agg())
		.add(so.Line(), so.Agg())
		.label(title="Average wind speed across continents")
)

### Alternatives: (1) Highlighting

In [None]:
(
    so.Plot(weather_df, x="time", y="Wind_Speed_kmh", color="Continent")
		#!# .add(so.Dot(alpha=0.5), so.Agg(), marker=??)		# set transparency by alpha 
		.add(so.Line(alpha=0.5), so.Agg() )
		.scale(color=(							# Control the color scale
			"gray",								# Asia
		#!#	???,								# Highlight Europe
			"gray",								# Africa
			"gray",								# Pacific
			"gray",								# America
			"gray",								# Australia
			"gray",								# Atlantic
			"gray"								# Indian
			))
		.label(title="Average wind speed across continents")
)


### Alternatives: (2) Facet

In [None]:
(
    so.Plot(weather_df, x="time", y="Wind_Speed_kmh")
		.add(so.Dot(), so.Agg())
		.add(so.Line(), so.Agg())
		.add(so.Band())
		#!# .facet(??, wrap=4)			# All you need for subplots
		.layout(size=(25, 8))
)

## Example: relationship of temperature, date and latitude (south - north)
### How to not do it: spaghetti line plot

In [None]:
(
#!#	so.Plot(weather_df.loc[weather_df["Continent"] == ???], x="time", y="Temp_Avg_°C", color=??)
    .add(so.Dot())
	.add(so.Line())
    .layout(size=(12, 6))
    .label(title="Latitude effect on temperature")
)

### Alternative: Dot array and flip latitude and temperature axis

In [None]:
(
	so.Plot(weather_df.loc[weather_df["Continent"] == "Asia"], x="time", y="latitude", color="Temp_Avg_°C")
    #!# .add(so.??(alpha=0.5, pointsize=10, edgecolor="black"))
    .add(so.Dot(alpha=0.5, pointsize=10, edgecolor="black"))
	.scale(
	#!# color = so.Continuous(??).tick(upto=10) # Important: choosing an intuitive colormap (https://seaborn.pydata.org/tutorial/color_palettes.html)
        ) 
    .layout(size=(12, 6))
    .label(title="Latitude effect on temperature - dot array")
)

### Alternative: Heatmap-like (via Dash)

In [None]:
(
    so.Plot(weather_df.loc[weather_df["Continent"] == "Asia"], x="time", y="latitude", color="Temp_Avg_°C")
	#!# .add(so.??(alpha=0.8, width=0.8,  linewidth=8))
	.scale(
        color=so.Continuous("Spectral_r").tick(upto=10)
        )
    .layout(size=(12, 6))
#!#	.theme({**style.library[??]}) # Increasing visibility on screens?
    .label(title="Latitude effect on temperature - heatmap")
)

### On all continents? no problem: facet

In [None]:
(
    so.Plot(weather_df, x="time", y="latitude", color="Temp_Avg_°C")
    .add(so.Dash(alpha=0.8, width=0.8,  linewidth=4))
	.scale(
        color=so.Continuous("Spectral_r").tick(upto=10)
        )
	#!# .facet(??, wrap=4).layout(size=(25, 8))
	.theme({**style.library["dark_background"]}) 
)

## Example: what to do if you want to plot 3-4 variables?
### interactive 3D plot with plotly

In [None]:

fig = px.scatter_3d(weather_df, x="Dew_point_°C", y="latitude", z="Temp_Avg_°C",
                    #!# color=??, ##  for fourth dimension
					opacity=0.7) 
fig.update_traces(marker_size = 2)  # Make dots smaller
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0)) # Reduce figure margins
fig.show()