In [44]:
import numpy as np
import pandas as pd
import lightgbm as lgb
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import HuberRegressor, Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [45]:

weather_data = pd.read_csv('weather_data.csv')

weather_data['Date_Time'] = pd.to_datetime(weather_data['Date_Time'])

weather_data = weather_data.sort_values(by='Date_Time').reset_index(drop=True)

X = weather_data[['Humidity_pct', 'Precipitation_mm', 'Wind_Speed_kmh']]
y = weather_data['Temperature_C']

In [46]:
split_ratio = 0.8
split_index = int(len(weather_data) * split_ratio)

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [47]:
date_time_test = weather_data['Date_Time'][split_index:]

hours_extracted = date_time_test.dt.hour

print(hours_extracted)

800000     2
800001     2
800002     2
800003     2
800004     2
          ..
999995    19
999996    19
999997    19
999998    19
999999    19
Name: Date_Time, Length: 200000, dtype: int64


In [48]:
def smape(actual, forecast):
    actual = np.array(actual)
    forecast = np.array(forecast)
    numerator = np.abs(forecast - actual)
    denominator = (np.abs(actual) + np.abs(forecast)) / 2
    return 100 * np.mean(numerator / denominator)

In [49]:
def print_metrics(model, mse, mad, smape_val):
    print(f"\nMetrics for {model}:")
    print(f"\tMean Squared Error (MSE): {mse:.5f}")
    print(f"\tMean Absolute Deviation (MAD): {mad:.5f}")
    print(f"\tRoot Mean Squared Error (RMSE): {np.sqrt(mse):.5f}")
    print(f"\tSymmetric Mean Absolute Percentage Error (SMAPE): {smape_val:.5f}")

In [50]:
def plot_forecasts_vs_actual(dates, actual, forecasted, model, data_frame_name):
    hours = dates.dt.hour
    
    y_min = min(actual.min(), forecasted.min())
    y_max = max(actual.max(), forecasted.max())
    y_range = y_max - y_min
    min_range = 1e-2
    effective_range = max(y_range, min_range)
    y_padding = effective_range * 0.1
    y_min_adjusted = y_min - y_padding
    y_max_adjusted = y_max + y_padding

    plt.figure(figsize=(10, 5))
    plt.plot(dates, actual.values, marker='o', linestyle='-', label='Actual')
    plt.plot(dates, forecasted, marker='o', linestyle='-', label='Forecasted')
    
    plt.title(f'Actual vs Forecasted using {model} for {data_frame_name}')
    plt.xlabel('Hour (from Date_Time)')
    plt.ylabel('Temperature (C)')
    plt.ylim(y_min_adjusted, y_max_adjusted)
    
    plt.xticks(ticks=dates, labels=hours, rotation=45)
    
    plt.legend()
    plt.show()


In [51]:
def train_and_evaluate_model(model, model_name):
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    mad = mean_absolute_error(y_test, y_pred)
    smape_val = smape(y_test, y_pred)
    
    print_metrics(model_name, mse, mad, smape_val)
    
    plot_forecasts_vs_actual(date_time_test, y_test, y_pred, model_name, 'Weather Data')

In [None]:

models = [
    (HuberRegressor(), 'Huber Regressor'),
    (lgb.LGBMRegressor(), 'LightGBM Regressor'),
    (Ridge(), 'Ridge Regressor')
]

for model, name in models:
    train_and_evaluate_model(model, name)



Metrics for Huber Regressor:
	Mean Squared Error (MSE): 208.50815
	Mean Absolute Deviation (MAD): 12.50851
	Root Mean Squared Error (RMSE): 14.43981
	Symmetric Mean Absolute Percentage Error (SMAPE): 90.90884


In [42]:
weather_data

Unnamed: 0,Location,Date_Time,Temperature_C,Humidity_pct,Precipitation_mm,Wind_Speed_kmh
0,Phoenix,2024-01-01 00:00:06,0.618582,46.871717,7.379615,5.321995
1,Houston,2024-01-01 00:00:06,39.469465,34.789904,9.807685,20.475434
2,Los Angeles,2024-01-01 00:00:15,35.920611,80.786237,0.859060,21.634439
3,San Antonio,2024-01-01 00:00:18,32.048197,45.576250,2.894071,23.335641
4,San Diego,2024-01-01 00:00:31,-8.090262,81.472454,5.435076,7.609853
...,...,...,...,...,...,...
999995,Chicago,2024-05-18 19:43:30,-1.561114,35.861711,8.802004,7.852678
999996,Philadelphia,2024-05-18 19:43:50,8.597967,88.073644,5.137802,21.671935
999997,New York,2024-05-18 19:43:55,7.679884,89.701606,6.760624,21.417264
999998,Houston,2024-05-18 19:44:02,34.038278,57.755329,0.939044,4.530607
