In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Load CSV
csv_path = 'all_temperature_cleaned.csv'
df = pd.read_csv(csv_path)
df.columns = df.columns.str.strip()

# Parse datetime
df['datetime'] = pd.to_datetime(df['timestamp'] + ' ' + df['time'], format='%Y-%m-%d %H:%M')

regions = ['Rakhiyal', 'Bopal', 'Ambawadi', 'Chandkheda', 'Vastral']

# Filter train and test data (hourly)
train_df = df[(df['datetime'].dt.year >= 2019) & (df['datetime'].dt.year <= 2023)].copy()
test_df = df[df['datetime'].dt.year == 2024].copy()

# Generate hourly datetime range for 2025
date_range_2025 = pd.date_range(start='2025-01-01 00:00', end='2025-12-31 23:00', freq='H')

# Prepare output folder for 2025 predictions
output_folder_2025 = 'predictions_2025'
os.makedirs(output_folder_2025, exist_ok=True)

current_dir = os.getcwd()
metrics_list = []

# Best LSTM parameters from tuning
seq_length = 15
lstm_units = 50
batch_size = 32
epochs = 50

def create_sequences(data, seq_len):
    X, y = [], []
    for i in range(seq_len, len(data)):
        X.append(data[i-seq_len:i])
        y.append(data[i])
    return np.array(X), np.array(y)

for region in regions:
    print(f"\nProcessing region: {region}")

    # Select region hourly data and interpolate missing values
    train_series = train_df.set_index('datetime')[region].interpolate(method='time')
    test_series = test_df.set_index('datetime')[region].interpolate(method='time')

    # Scale training data
    scaler = MinMaxScaler()
    scaled_train = scaler.fit_transform(train_series.values.reshape(-1, 1))

    # Create training sequences
    X_train, y_train = create_sequences(scaled_train, seq_length)

    # Prepare combined data for test sequences
    combined = pd.concat([train_series, test_series])
    scaled_combined = scaler.transform(combined.values.reshape(-1, 1))
    X_all, y_all = create_sequences(scaled_combined, seq_length)

    # Calculate test start index in sequences
    test_start_idx = len(train_series)

    # Extract test sequences and targets
    X_test = X_all[test_start_idx - seq_length:]
    y_test = y_all[test_start_idx - seq_length:]

    # Reshape for LSTM input
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

    # Build LSTM model
    model = Sequential()
    model.add(LSTM(units=lstm_units, activation='relu', input_shape=(seq_length, 1)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')

    # Train model
    print(f"Training LSTM for {region}...")
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)
    print(f"LSTM training complete for {region}.")

    # Predict on test set
    print(f"Predicting on test set for {region}...")
    y_pred = model.predict(X_test)
    y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
    y_pred_inv = scaler.inverse_transform(y_pred).flatten()
    print(f"Prediction complete for {region}.")

    # Calculate metrics
    mse = mean_squared_error(y_test_inv, y_pred_inv)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test_inv, y_pred_inv)

    print(f"  2024 MSE: {mse:.4f}")
    print(f"  2024 RMSE: {rmse:.4f}")
    print(f"  2024 R2 Score: {r2:.4f}")

    # Align datetime index for predictions using combined index
    combined_datetime = combined.index
    pred_dates = combined_datetime[test_start_idx : test_start_idx + len(y_pred_inv)]

    print(f"Length of predicted values: {len(y_pred_inv)}")
    print(f"Length of prediction dates: {len(pred_dates)}")

    # Create DataFrame for 2024 predictions
    pred_2024_df = pd.DataFrame({
        'date': pred_dates.date,
        'hour': pred_dates.hour,
        'predicted_temperature': y_pred_inv,
        'actual_temperature': y_test_inv
    })

    filename_2024 = f"lstm_{region.lower()}_2024.csv"
    pred_2024_df.to_csv(os.path.join(current_dir, filename_2024), index=False)
    print(f"Saved 2024 LSTM predictions for {region} as {filename_2024}")

    # Forecast 2025 hourly iteratively
    last_sequence = scaled_combined[-seq_length:].reshape((1, seq_length, 1))
    forecast_2025 = []
    n_hours_2025 = len(date_range_2025)

    print(f"Forecasting 2025 hourly for {region}...")
    for i in range(n_hours_2025):
        next_val = model.predict(last_sequence)
        forecast_2025.append(next_val[0, 0])
        last_sequence = np.roll(last_sequence, shift=-1)
        last_sequence[0, -1, 0] = next_val[0, 0]
        if (i + 1) % 1000 == 0:
            print(f"  Hour {i + 1} / {n_hours_2025} forecasted...")

    forecast_2025 = np.array(forecast_2025).reshape(-1, 1)
    forecast_2025_inv = scaler.inverse_transform(forecast_2025).flatten()

    pred_2025_df = pd.DataFrame({
        'date': date_range_2025.date,
        'hour': date_range_2025.hour,
        'predicted_temperature': forecast_2025_inv
    })

    filename_2025 = f"lstm_{region.lower()}_2025.csv"
    pred_2025_df.to_csv(os.path.join(output_folder_2025, filename_2025), index=False)
    print(f"Saved 2025 LSTM predictions for {region} as {os.path.join(output_folder_2025, filename_2025)}")

    # Store metrics
    metrics_list.append({
        'region': region,
        'mse_2024': mse,
        'rmse_2024': rmse,
        'r2_2024': r2
    })

# Save all metrics to CSV
metrics_df = pd.DataFrame(metrics_list)
metrics_filename = 'lstm_model_metrics_2024.csv'
metrics_df.to_csv(os.path.join(current_dir, metrics_filename), index=False)
print(f"\nSaved LSTM error metrics for all regions as {metrics_filename}")


  date_range_2025 = pd.date_range(start='2025-01-01 00:00', end='2025-12-31 23:00', freq='H')
  super().__init__(**kwargs)



Processing region: Rakhiyal
Training LSTM for Rakhiyal...
LSTM training complete for Rakhiyal.
Predicting on test set for Rakhiyal...
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Prediction complete for Rakhiyal.
  2024 MSE: 0.3267
  2024 RMSE: 0.5715
  2024 R2 Score: 0.9907
Length of predicted values: 8784
Length of prediction dates: 8784
Saved 2024 LSTM predictions for Rakhiyal as lstm_rakhiyal_2024.csv
Forecasting 2025 hourly for Rakhiyal...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32