Imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
%matplotlib inline

In [None]:
df_city = pd.read_csv("./dataset/GlobalLandTemperaturesByCity.csv")
df_country = pd.read_csv("./dataset/GlobalLandTemperaturesByCountry.csv")
df_mcity = pd.read_csv("./dataset/GlobalLandTemperaturesByMajorCity.csv")
df_state = pd.read_csv("./dataset/GlobalLandTemperaturesByState.csv")
df_temp = pd.read_csv("./dataset/GlobalTemperatures.csv")

Ploting function

In [None]:
def plot(df, ax, y, title, xlabel, ylabel) -> None:
    yearly_data = df.resample('YE', on='dt').mean(numeric_only=True)
    upper_bound = yearly_data[y]+yearly_data[y+'Uncertainty']
    lower_bound = yearly_data[y]-yearly_data[y+'Uncertainty']

    ax = yearly_data.plot(kind='line', y=y, ax=ax)
    ax.fill_between(yearly_data.index, lower_bound, upper_bound, color='grey', alpha=0.5, label='Uncertainty Range')
    ax.set_title(title, fontsize=16, color='navy')
    ax.set_ylabel(ylabel)
    ax.set_xlabel(xlabel)
    ax.grid(axis='y', linestyle='--', alpha=0.7)

    # plt.show()

Temperature graphs by Cities

In [None]:
# df_city['dt'] = pd.to_datetime(df_city['dt'])
# cities = df_city['City'].unique()
# cols = 4
# rows = math.ceil(len(cities)/cols)
# fig, axes = plt.subplots(rows, cols, figsize=(20, rows*3))
# axes = axes.flatten()
# for i, city in enumerate(cities):
#     plot(df_city[df_city['City']==city], axes[i], 'AverageTemperature', city, 'Year', '°C')
# plt.tight_layout()
# plt.show()
# del df_city

Temperature graphs by Countries

In [None]:
df_country['dt'] = pd.to_datetime(df_country['dt'])
countries = df_country['Country'].unique()
cols = 4
rows = math.ceil(len(countries)/cols)
fig, axes = plt.subplots(rows, cols, figsize=(20, rows*2.5))
axes = axes.flatten()
for i, country in enumerate(countries):
    plot(df_country[df_country['Country']==country], axes[i], 'AverageTemperature', country, 'Year', '°C')
plt.tight_layout()
plt.show()

Temperature graphs by Major Cities

In [None]:
df_mcity['dt'] = pd.to_datetime(df_mcity['dt'])
mcities = df_mcity['City'].unique()
cols = 4
rows = math.ceil(len(mcities)/cols)
fig, axes = plt.subplots(rows, cols, figsize=(20, rows*3))
axes = axes.flatten()
for i, mcity in enumerate(mcities):
    plot(df_mcity[df_mcity['City']==mcity], axes[i], 'AverageTemperature', mcity, 'Year', '°C')
plt.tight_layout()
plt.show()

Temperature graphs by States

In [None]:
df_state['dt'] = pd.to_datetime(df_state['dt'])
states = df_state['State'].unique()
cols = 4
rows = math.ceil(len(states)/cols)
fig, axes = plt.subplots(rows, cols, figsize=(20, rows*2.5))
axes = axes.flatten()
for i, state in enumerate(states):
    plot(df_state[df_state['State']==state], axes[i], 'AverageTemperature', state, 'Year', '°C')
plt.tight_layout()
plt.show()

Global Temperature graphs

In [None]:
df_temp['dt'] = pd.to_datetime(df_temp['dt'])

fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5, 1, figsize=(16, 16))
plot(df_temp, ax1, 'LandAverageTemperature', 'Annual Land Average Temperature', 'Year', '°C')
plot(df_temp, ax2, 'LandMaxTemperature', 'Annual Land Maximum Temperature', 'Year', '°C')
plot(df_temp, ax3, 'LandMinTemperature', 'Annual Land Minimum Temperature', 'Year', '°C')
plot(df_temp, ax4, 'LandAndOceanAverageTemperature', 'Annual Land & Ocean Temperature', 'Year', '°C')

df_temp['TemperatureRange'] = df_temp['LandMaxTemperature']-df_temp['LandMinTemperature']
df_temp['TemperatureRangeUncertainty'] = np.sqrt(df_temp['LandMaxTemperatureUncertainty']**2 + df_temp['LandMinTemperatureUncertainty']**2)
plot(df_temp, ax5, 'TemperatureRange', 'Annual Land Temp Range', 'Year', '°C')
plt.tight_layout()
plt.show()

Heat map of major cities

In [None]:
df_city['Year'] = pd.to_datetime(df_city['dt']).dt.year
func = lambda x: -float(x.strip('EWNS')) if 'S' in x or 'W' in x else float(x.strip('EWNS'))
df_city['Latitude'] = df_city['Latitude'].apply(func)
df_city['Longitude'] = df_city['Longitude'].apply(func)
city_map_data = df_city.groupby(['Year', 'City'])[['Latitude', 'Longitude', 'AverageTemperature', 'AverageTemperatureUncertainty']].mean().reset_index()
city_map_data.dropna(inplace=True)
sorted_map_data = city_map_data.sort_values('Year')
sorted_map_data.head()

Average Temperature by City (Map Timelapse)

In [None]:
fig = px.scatter_mapbox(
    sorted_map_data,
    lat='Latitude',
    lon='Longitude',
    color='AverageTemperature',
    size='AverageTemperatureUncertainty',
    animation_frame='Year',
    animation_group='City',
    range_color=[-5,30],

    color_continuous_scale=px.colors.diverging.RdBu_r,
    size_max=15,
    zoom=1,
    hover_name='City',
    hover_data=['AverageTemperature']
)
fig.update_layout(mapbox_style='open-street-map')
fig.update_layout(margin=dict(t=0, r=0, b=0, l=0))
fig.show()
fig.write_html("GlobalLandTemperaturesByCity.html")

Average Temperature by City (Globe Timelapse)

In [None]:
df_city["dt"] = pd.to_datetime(df_city["dt"])
df_city["Year"] = df_city["dt"].dt.year

def to_signed(coord):
    if pd.isna(coord):
        return None
    if isinstance(coord, (int, float)):
        return float(coord)
    coord = str(coord).strip()
    return -float(coord[:-1]) if coord[-1] in ("S","W") else float(coord[:-1])


df_city["Latitude"] = df_city["Latitude"].apply(to_signed)
df_city["Longitude"] = df_city["Longitude"].apply(to_signed)

# Aggregate once for all years
df_anim = (
    df_city
    .dropna(subset=["AverageTemperature","Latitude","Longitude"])
    .groupby(["Year","City"], as_index=False)
    .agg(
        Latitude=("Latitude","mean"),
        Longitude=("Longitude","mean"),
        AverageTemperature=("AverageTemperature","mean")
    )
)

fig = px.scatter_geo(
    df_anim,
    lon="Longitude",
    lat="Latitude",
    color="AverageTemperature",
    hover_name="City",
    animation_frame="Year",
    color_continuous_scale="RdBu_r",
    range_color=[-5, 30],
    projection="orthographic"
)

fig.update_geos(
    showland=True,
    landcolor="rgb(230, 230, 230)",
    showocean=True,
    oceancolor="rgb(200, 220, 255)",
    showcountries=True
)

fig.update_layout(
    title="Average Temperature by City (Timelapse)",
    margin=dict(l=0, r=0, t=40, b=0),
    height=600
)

fig.show()

Average Temperature by Country (Map Timelapse)

In [None]:
df_country["dt"] = pd.to_datetime(df_country["dt"])
df_country["Year"] = df_country["dt"].dt.year

df_year = (
    df_country
    .groupby(["Year", "Country"], as_index=False)
    .agg(AverageTemperature=("AverageTemperature", "mean"))
    .dropna()
)

loc = "Country"
locmode = "country names"

fig = px.choropleth(
    df_year,
    locations=loc,
    locationmode=locmode,
    color="AverageTemperature",
    animation_frame="Year",
    color_continuous_scale="RdBu_r",
    range_color=[-5, 30],
    projection="equirectangular"  # 2D map
)

fig.update_geos(
    # projection_type="",
    showland=True,
    landcolor="rgb(230, 230, 230)",
    showocean=True,
    oceancolor="rgb(200, 220, 255)",
    showcountries=True
)

fig.update_layout(
    title="Average Land Temperature by Country (2D Timelapse)",
    margin=dict(l=0, r=0, t=40, b=0),
    height=600
)

fig.show()

Average Temperature by Country (Globe Timelapse)

In [None]:
df_country["dt"] = pd.to_datetime(df_country["dt"])
df_country["Year"] = df_country["dt"].dt.year

df_year = (
    df_country
    .groupby(["Year","Country"], as_index=False)
    .agg(AverageTemperature=("AverageTemperature", "mean"))
    .dropna()
)

locations = df_year["Country"]
locationmode = "country names"

fig = px.choropleth(
    df_year,
    locations=loc,
    locationmode=locmode,
    color="AverageTemperature",
    animation_frame="Year",
    color_continuous_scale="RdBu_r",
    range_color=[-5, 30],
)

fig.update_geos(
    projection_type="orthographic",
    showland=True,
    landcolor="rgb(230, 230, 230)",
    showocean=True,
    oceancolor="rgb(200, 220, 255)",
    showcountries=True
)

fig.update_layout(
    title=f"Average Land Temperature by Country (Timelapse)",
    margin=dict(l=0, r=0, t=40, b=0),
    height=600
)

fig.show()