In [1]:
import pandas as pd
from sklearn.linear_model import Ridge

In [2]:
# Read the CSV file into Pandas DataFrame
df = pd.read_csv("Weather_data_2023_2024.csv", parse_dates=True, index_col="datetime")

Full year of data from 2023 May 1st to 2024 July 7th
Source: https://www.visualcrossing.com/weather/weather-data-services#

In [3]:
# Remove columns of no interest
df = df[['location', 'tempmax', 'tempmin', 'temp', 'feelslikemax', 'feelslikemin',
       'feelslike', 'dew', 'humidity', 'precip', 'precipprob', 'precipcover',
       'snow', 'snowdepth', 'windgust', 'windspeed', 'winddir',
       'sealevelpressure', 'cloudcover', 'visibility',
       'moonphase', 'conditions', 'description']]

# Full dataset predictions - not seperated by locations

In [4]:
# Define columns to drop and features
columns_to_drop = ['location', 'tempmax', 'tempmin', 'temp', 'feelslikemax', 'feelslikemin', 'feelslike', 'moonphase', 'conditions', 'description', 'predicted_tempmax', 'predicted_tempmin']
features = df.drop(columns=[col for col in columns_to_drop if col in df.columns])

# Define targets
target_max = df['tempmax']
target_min = df['tempmin']

# Train the model for tempmax on the entire dataset
ridge_max = Ridge()
ridge_max.fit(features, target_max)

# Train the model for tempmin on the entire dataset
ridge_min = Ridge()
ridge_min.fit(features, target_min)

# Predict tempmax and tempmin for the entire dataset
y_pred_max = ridge_max.predict(features)
y_pred_min = ridge_min.predict(features)

# Create the prediction columns if they don't exist
if 'predicted_tempmax' not in df.columns:
    df['predicted_tempmax'] = pd.Series([0.0] * len(df), index=df.index)

if 'predicted_tempmin' not in df.columns:
    df['predicted_tempmin'] = pd.Series([0.0] * len(df), index=df.index)

# Add predictions to the original dataframe
df['predicted_tempmax'] = y_pred_max
df['predicted_tempmin'] = y_pred_min

# Ensure alignment by resetting the indices
df = df.sort_index()

# Display the dataframe with predictions
df[['location', 'tempmax', 'predicted_tempmax', 'tempmin', 'predicted_tempmin']].head(10)

Unnamed: 0_level_0,location,tempmax,predicted_tempmax,tempmin,predicted_tempmin
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-05-01,Beijing,25.2,24.793316,8.0,12.334775
2023-05-01,London,17.6,19.210057,11.3,10.509117
2023-05-01,Moscow,9.2,9.025516,3.4,3.581998
2023-05-01,Berlin,18.9,17.22541,3.4,6.745169
2023-05-01,Paris,15.7,16.694965,10.6,9.833612
2023-05-01,Mexico City,32.1,26.003719,5.0,9.18485
2023-05-01,Ottawa,9.9,11.563681,6.8,6.325338
2023-05-01,Rome,19.1,19.479019,14.1,12.151549
2023-05-01,Washington DC,16.0,20.20718,9.9,9.452038
2023-05-02,Ottawa,11.1,11.591107,5.1,4.58335
