In [18]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import os

# Load CSV
csv_path = 'all_temperature_cleaned.csv'
df = pd.read_csv(csv_path)
df.columns = df.columns.str.strip()

# Parse datetime
df['datetime'] = pd.to_datetime(df['timestamp'] + ' ' + df['time'], format='%Y-%m-%d %H:%M')

regions = ['Rakhiyal', 'Bopal', 'Ambawadi', 'Chandkheda', 'Vastral']

# Split data
train_df = df[(df['datetime'].dt.year >= 2019) & (df['datetime'].dt.year <= 2023)].copy()
test_df = df[df['datetime'].dt.year == 2024].copy()

# Generate hourly datetime range for full year 2025
date_range_2025 = pd.date_range(start='2025-01-01 00:00', end='2025-12-31 23:00', freq='H')
unseen_df = pd.DataFrame({'datetime': date_range_2025})

# Prepare output folder for 2025 predictions
output_folder_2025 = 'predictions_2025'
os.makedirs(output_folder_2025, exist_ok=True)

current_dir = os.getcwd()

metrics_list = []

for region in regions:
    # Train model
    X_train = train_df['datetime'].dt.hour.values.reshape(-1, 1)
    y_train = train_df[region].values
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predict on 2024 and evaluate
    X_test = test_df['datetime'].dt.hour.values.reshape(-1, 1)
    y_test_actual = test_df[region].values
    y_test_pred = model.predict(X_test)

    mse = mean_squared_error(y_test_actual, y_test_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test_actual, y_test_pred)

    print(f"Region: {region}")
    print(f"  2024 MSE: {mse:.4f}")
    print(f"  2024 RMSE: {rmse:.4f}")
    print(f"  2024 R2 Score: {r2:.4f}")

    # Save 2024 predictions with actuals
    pred_2024 = pd.DataFrame({
        'date': test_df['datetime'].dt.date,
        'hour': test_df['datetime'].dt.hour,
        'predicted_temperature': y_test_pred,
        'actual_temperature': y_test_actual
    })
    filename_2024 = f"lr_{region.lower()}_2024.csv"
    pred_2024.to_csv(os.path.join(current_dir, filename_2024), index=False)
    print(f"Saved 2024 predictions for {region} as {filename_2024}")

    # Store metrics
    metrics_list.append({
        'region': region,
        'mse_2024': mse,
        'rmse_2024': rmse,
        'r2_2024': r2
    })

    # Predict for 2025 using generated timestamps
    X_unseen = unseen_df['datetime'].dt.hour.values.reshape(-1, 1)
    y_unseen_pred = model.predict(X_unseen)
    pred_2025 = pd.DataFrame({
        'date': unseen_df['datetime'].dt.date,
        'hour': unseen_df['datetime'].dt.hour,
        'predicted_temperature': y_unseen_pred
    })
    filename_2025 = f"lr_{region.lower()}_2025.csv"
    pred_2025.to_csv(os.path.join(output_folder_2025, filename_2025), index=False)
    print(f"Saved 2025 predictions for {region} as {os.path.join(output_folder_2025, filename_2025)}")

# Save all metrics to CSV
metrics_df = pd.DataFrame(metrics_list)
metrics_filename = 'lr_model_metrics_2024.csv'
metrics_df.to_csv(os.path.join(current_dir, metrics_filename), index=False)
print(f"Saved error metrics for all regions as {metrics_filename}")


  date_range_2025 = pd.date_range(start='2025-01-01 00:00', end='2025-12-31 23:00', freq='H')


Region: Rakhiyal
  2024 MSE: 34.7917
  2024 RMSE: 5.8984
  2024 R2 Score: 0.0068
Saved 2024 predictions for Rakhiyal as lr_rakhiyal_2024.csv
Saved 2025 predictions for Rakhiyal as predictions_2025\lr_rakhiyal_2025.csv
Region: Bopal
  2024 MSE: 35.6966
  2024 RMSE: 5.9747
  2024 R2 Score: 0.0062
Saved 2024 predictions for Bopal as lr_bopal_2024.csv
Saved 2025 predictions for Bopal as predictions_2025\lr_bopal_2025.csv
Region: Ambawadi
  2024 MSE: 35.6966
  2024 RMSE: 5.9747
  2024 R2 Score: 0.0062
Saved 2024 predictions for Ambawadi as lr_ambawadi_2024.csv
Saved 2025 predictions for Ambawadi as predictions_2025\lr_ambawadi_2025.csv
Region: Chandkheda
  2024 MSE: 35.9852
  2024 RMSE: 5.9988
  2024 R2 Score: 0.0063
Saved 2024 predictions for Chandkheda as lr_chandkheda_2024.csv
Saved 2025 predictions for Chandkheda as predictions_2025\lr_chandkheda_2025.csv
Region: Vastral
  2024 MSE: 34.8495
  2024 RMSE: 5.9033
  2024 R2 Score: 0.0074
Saved 2024 predictions for Vastral as lr_vastral_2024