## Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pycountry 

# Plotly
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
from IPython.display import display, HTML

## Importing data

In [2]:
data = pd.read_csv("data/GlobalLandTemperaturesByCountry.csv")
print("Size Before dropping NA values:", data.shape)
print(data.columns)

Size Before dropping NA values: (577462, 4)
Index(['dt', 'AverageTemperature', 'AverageTemperatureUncertainty', 'Country'], dtype='object')


## Finding and Dropping NA Values

In [3]:
print(data.isna().sum())
data.dropna(axis = 0, inplace = True)
copy = data.copy()
print("Size after dropping NA values:", data.shape)

dt                                   0
AverageTemperature               32651
AverageTemperatureUncertainty    31912
Country                              0
dtype: int64
Size after dropping NA values: (544811, 4)


## Formatting dates
rename columns and group data by year

In [4]:
# Updating data
data['Date'] = pd.to_datetime(data.dt)
data['Day'] = data['Date'].dt.day
data['Month'] = data['Date'].dt.month
data['Year'] = data['Date'].dt.year
data.drop(columns = ['dt'], axis = 1, inplace = True) 

# Updating  columns names
data.columns = [
    'Average Temperature', 'Average Temperature Uncertainty',
    'Country', 
    'Date', 'Day', 'Month','Year'
]

data.head()

Unnamed: 0,Average Temperature,Average Temperature Uncertainty,Country,Date,Day,Month,Year
0,4.384,2.294,Åland,1743-11-01,1,11,1743
5,1.53,4.68,Åland,1744-04-01,1,4,1744
6,6.702,1.789,Åland,1744-05-01,1,5,1744
7,11.609,1.577,Åland,1744-06-01,1,6,1744
8,15.342,1.41,Åland,1744-07-01,1,7,1744


In [5]:
# Group by year and calculate yearly uncertainty
def aggregate_by_year(df):
    grouped = df.groupby('Year').agg(
        Average_Temperature=('Average Temperature', 'mean'),
        Monthly_Data_Count=('Average Temperature', 'count'),
        Average_Uncertainty=('Average Temperature Uncertainty', lambda x: (x**2).sum()**0.5 / len(x))
    )
    return grouped

# Apply the aggregate function
earth_data = aggregate_by_year(data)
if earth_data[earth_data['Monthly_Data_Count']!=12].shape[0]!=0:
    earth_data[earth_data['Monthly_Data_Count']==12]

# Drop column 'month_data_count'
earth_data.drop('Monthly_Data_Count', axis=1, inplace=True)

# Grouping data
earth_data = data.groupby(by = ['Year', 'Country'])[[
    'Average Temperature',
    'Average Temperature Uncertainty',
    ]].mean().reset_index()
earth_data.head()
earth_data[['Average Temperature','Average Temperature Uncertainty']].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Average Temperature,45915.0,17.177337,9.403671,-22.616,9.395083,20.513083,25.707708,30.74475
Average Temperature Uncertainty,45915.0,1.026471,1.067285,0.115083,0.346583,0.594583,1.222792,7.281667


## Visualize temperature changes through maps and animations

In [6]:
def country_to_iso3(country_name):
    try:
        return pycountry.countries.lookup(country_name).alpha_3
    except LookupError:
        return None  # Handle missing cases

# Display the yearly average temperature by decades.
earth_data = earth_data[earth_data['Year'] % 10 == 0]
earth_data.reset_index(drop=True, inplace=True)

earth_data['Country_ISO3'] = earth_data['Country'].apply(country_to_iso3)

# Creates a choropleth figure object
fig = px.choropleth(
    earth_data,
    locations='Country_ISO3',              # 'Country' Column in the data contains the location identifiers.
    locationmode='ISO-3',     # Indicates full country names (e.g., "United States," "Belgium"). Other options are ISO codes or FIPS codes.
    color='Average Temperature',      # 'Average Temperature' column to be used for coloring the map/
    hover_name='Country',             # Info shows when hovering over.
    animation_frame='Year',           # Adds an animation slider based on the 'Year' column (the unit is ten years).
    title='Average Land Temperature in Countries',
    labels={'Average Temperature': 'Avg Temp (°C)'}
)

# Adjust the layout
fig.update_layout(
    width=1000,
    height=800,
    coloraxis=dict(
        colorscale='rdbu_r',
        colorbar=dict(
            title="Avg Temp (°C)",
            tickvals=[-10, 0, -5, 5, 10, 15, 20, 25, 30, 35, 40],
            ticktext=['-10', '-5', '0', '5', '10', '15', '20', '25','30', '35', '40']
        )
    ),
    title_font_size=28,
    title_x=0.5,
    geo=dict(
        showcoastlines=True,
        coastlinecolor="Black",
        projection_type="natural earth", # "orthographic"
        landcolor="white",
        subunitcolor="gray"
    ),
)

pio.write_image(fig, "images/choropleth_map.png", scale=2)  # Requires kaleido
display(HTML('<img src="images/choropleth_map.png" width="900">'))

fig.write_html("charts/choropleth_map.html")
display(HTML('<a href="charts/choropleth_map.html" target="_blank">View Interactive Choropleth Map</a>'))

In [7]:
def country_to_iso3(country_name):
    try:
        return pycountry.countries.lookup(country_name).alpha_3
    except LookupError:
        return None

# Calculate the temperature standard deviation by country for years after 1975.
earth_data_std = data[data['Year'] >= 1975].groupby(by=['Country'])[['Average Temperature']].std().reset_index()
earth_data_std = earth_data_std.rename(columns={'Average Temperature': 'Temperature STD'})
earth_data_std = earth_data_std.dropna(subset=['Temperature STD'])

earth_data_std['ISO3'] = earth_data_std['Country'].apply(country_to_iso3)

# Display the global temperature standard deviation by country
fig = px.scatter_geo(
    earth_data_std,
    locations="ISO3",
    locationmode="ISO-3",
    color="Temperature STD",
    size=earth_data_std["Temperature STD"].abs(),
    hover_name="Country",
    projection="natural earth",
    title="Global Temperatures Standard Deviation by Country",
    size_max=40
)

fig.update_layout(
    coloraxis_colorbar=dict(
        title="Std Temp (°C)"
    ),
    legend_title=dict(
        text="Standard Deviation Temperature Scale"
    ),
    width=1000,
    height=800
)

pio.write_image(fig, "images/temp_std_dev_map.png", scale=2)  # Requires kaleido
display(HTML('<img src="images/temp_std_dev_map.png" width="900">'))

fig.write_html("charts/temp_std_dev_map.html")
display(HTML('<a href="charts/temp_std_dev_map.html" target="_blank">View Interactive Std Temp Map</a>'))