In [22]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings 
warnings.filterwarnings('ignore')


In [23]:
df_merged = pd.read_csv("data\_2013\_merged\merged_data.csv")
df_merged 

Unnamed: 0,valid_time,latitude,longitude,max_temperature,mean_temperature,max_precipitation,mean_precipitation,mslp
0,01-01-2013,18.80,72.8,299.05432,298.09018,0.000000,0.000000e+00,101133.750
1,01-01-2013,19.05,72.8,299.36420,297.88724,0.000000,0.000000e+00,101130.880
2,02-01-2013,18.80,72.8,298.89770,297.47516,0.000002,2.900000e-07,101268.055
3,02-01-2013,19.05,72.8,299.39655,297.32275,0.000008,7.630000e-07,101269.910
4,03-01-2013,18.80,72.8,298.57944,296.75388,0.000000,0.000000e+00,101311.086
...,...,...,...,...,...,...,...,...
725,29-12-2013,19.05,72.8,298.80722,296.58868,0.000000,0.000000e+00,101294.930
726,30-12-2013,18.80,72.8,297.89938,296.77527,0.000000,0.000000e+00,101314.760
727,30-12-2013,19.05,72.8,298.11606,296.58182,0.000000,0.000000e+00,101315.160
728,31-12-2013,18.80,72.8,298.16754,296.70715,0.000000,0.000000e+00,101401.780


In [28]:
df_merged["valid_time"] = pd.to_datetime(df_merged["valid_time"], format="%d-%m-%y")
df_merged["month"] = df_merged["valid_time"].dt.month

# Group by month and calculate mean values for visualization
monthly_data = df_merged.groupby("month").mean()

# Set figure size
plt.figure(figsize=(2, 2))

KeyError: 'valid_time'

In [None]:
# Plot 1: Temperature Trends
plt.subplot(1, 1, 1)
plt.plot(monthly_data.index, monthly_data["max_temperature"], label="Max Temperature (K)", color="red", marker="o")
plt.plot(monthly_data.index, monthly_data["mean_temperature"], label="Mean Temperature (K)", color="green", marker="o")
plt.xlabel("Month")
plt.ylabel("Temperature (K)")
plt.title("Monthly Temperature Trends (Kelvin)")
plt.xticks(range(1, 13), labels=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"])
plt.grid(True)
plt.legend()
plt.show()


In [None]:
plt.subplot(1, 1, 1)
plt.plot(monthly_data.index, monthly_data["max_precipitation"], label="Max Precipitation (mm)", color="darkblue", marker="o")
plt.plot(monthly_data.index, monthly_data["mean_precipitation"], label="Mean Precipitation (mm)", color="lightblue", marker="o")
plt.xlabel("Month")
plt.ylabel("Precipitation (mm)")
plt.title("Monthly Precipitation Trends")
plt.xticks(range(1, 13), labels=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"])
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
plt.subplot(1, 1, 1)
plt.plot(monthly_data.index, monthly_data["mslp"], label="Mean Sea Level Pressure (Pa)", color="yellow", marker="o")
plt.xlabel("Month")
plt.ylabel("MSLP (Pa)")
plt.title("Monthly MSLP Trends")
plt.xticks(range(1, 13), labels=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"])
plt.legend()
plt.tight_layout()
plt.show()

In [25]:
# Ensure valid_time is in datetime format
df_merged["valid_time"] = pd.to_datetime(df_merged["valid_time"], format="%d-%m-%y", errors="coerce")

# Extract numeric features from date
df_merged["month"] = df_merged["valid_time"].dt.month  # Extract month (1-13)
df_merged["day_of_year"] = df_merged["valid_time"].dt.dayofyear 
df_merged["year"] = df_merged["valid_time"].dt.year         # Extract Year
 # Extract day of year (1-365)

# Drop the original datetime column
df_merged.drop(columns=["valid_time"], inplace=True)

In [26]:

heatwave_threshold = df_merged["max_temperature"].quantile(0.90)
# Create binary class label: 1 if max_temperature exceeds threshold, else 0
df_merged["heatwave_label"] = (df_merged["max_temperature"] > heatwave_threshold).astype(int)


In [27]:
df_merged

Unnamed: 0,latitude,longitude,max_temperature,mean_temperature,max_precipitation,mean_precipitation,mslp,month,day_of_year,year,heatwave_label
0,18.80,72.8,299.05432,298.09018,0.000000,0.000000e+00,101133.750,,,,0
1,19.05,72.8,299.36420,297.88724,0.000000,0.000000e+00,101130.880,,,,0
2,18.80,72.8,298.89770,297.47516,0.000002,2.900000e-07,101268.055,,,,0
3,19.05,72.8,299.39655,297.32275,0.000008,7.630000e-07,101269.910,,,,0
4,18.80,72.8,298.57944,296.75388,0.000000,0.000000e+00,101311.086,,,,0
...,...,...,...,...,...,...,...,...,...,...,...
725,19.05,72.8,298.80722,296.58868,0.000000,0.000000e+00,101294.930,,,,0
726,18.80,72.8,297.89938,296.77527,0.000000,0.000000e+00,101314.760,,,,0
727,19.05,72.8,298.11606,296.58182,0.000000,0.000000e+00,101315.160,,,,0
728,18.80,72.8,298.16754,296.70715,0.000000,0.000000e+00,101401.780,,,,0


In [None]:
if os.path.exists("data/_2013/_labelled/heat_labelled.csv"):
    print("Files already exist")
else:
    df_merged.to_csv("data/_2013/_labelled/heat_labelled.csv")
    print("File created") 