In [29]:
import pandas as pd
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from statsmodels.tsa.statespace.sarimax import SARIMAX
import joblib
import os
import numpy as np

data=pd.read_csv('../data_store/final_data/final_data.csv')

In [30]:
data.head()

Unnamed: 0,timestamp,aqi,state,co,no,no2,o3,so2,pm2_5,pm10,nh3,temperature_2m,relative_humidity_2m,rain,wind_speed_10m,wind_direction_10m,soil_temperature_0_to_7cm,soil_moisture_0_to_7cm
0,2023-10-13 07:00:00,32.83,Alabama,198.6,0.0,1.05,56.51,0.13,2.84,3.31,0.19,16.929998,84.940186,0.0,7.594208,58.570484,18.279999,0.445
1,2023-10-13 08:00:00,31.95,Alabama,193.6,0.0,0.79,55.07,0.07,2.74,3.21,0.14,17.23,84.15374,0.0,7.704336,52.594578,18.23,0.445
2,2023-10-13 09:00:00,31.57,Alabama,191.93,0.0,0.67,53.64,0.09,2.81,3.3,0.11,17.529999,82.84113,0.0,8.287822,55.6196,18.279999,0.444
3,2023-10-13 10:00:00,31.25,Alabama,190.26,0.0,0.7,52.21,0.13,3.05,3.58,0.09,17.429998,83.90522,0.0,8.39657,59.03632,18.279999,0.443
4,2023-10-13 11:00:00,31.38,Alabama,191.93,0.0,0.79,50.78,0.17,3.35,3.92,0.08,17.38,84.44238,0.0,7.787991,56.3099,18.23,0.443


In [31]:
import numpy as np
import pandas as pd
import torch
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import joblib
import warnings
warnings.filterwarnings('ignore')

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y, sequence_length=24):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y)
        self.sequence_length = sequence_length

    def __len__(self):
        return len(self.X) - self.sequence_length + 1

    def __getitem__(self, idx):
        return (self.X[idx:idx + self.sequence_length], 
                self.y[idx + self.sequence_length - 1])

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size=128, num_layers=2, sequence_length=24):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.sequence_length = sequence_length
        
        self.batch_norm_input = nn.BatchNorm1d(sequence_length)
        
        # Simplified LSTM architecture
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
                           batch_first=True, dropout=0.2)
        
        # Simplified dense layers
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 1)
        )
        
    def forward(self, x):
        x = self.batch_norm_input(x)
        lstm_out, _ = self.lstm(x)
        return self.fc(lstm_out[:, -1, :])

class HybridAQIPredictor:
    def __init__(self, sequence_length=24, hidden_size=128, num_layers=2):
        self.sequence_length = sequence_length
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.scaler_X = StandardScaler()
        self.scaler_y = StandardScaler()
        self.sarimax_model = None
        self.lstm_model = None
        
    def prepare_data(self, data, state):
        state_data = data[data['state'] == state].copy()
        state_data = state_data.sort_values('timestamp')
        
        # Enhanced time-based features
        timestamp = pd.to_datetime(state_data['timestamp'])
        state_data['hour'] = timestamp.dt.hour
        state_data['day_of_week'] = timestamp.dt.dayofweek
        state_data['month'] = timestamp.dt.month
        state_data['day_of_year'] = timestamp.dt.dayofyear
        
        feature_cols = ['co', 'no', 'no2', 'o3', 'so2', 'pm2_5', 'pm10', 'nh3',
                       'temperature_2m', 'relative_humidity_2m', 'rain',
                       'wind_speed_10m', 'wind_direction_10m',
                       'soil_temperature_0_to_7cm', 'soil_moisture_0_to_7cm',
                       'hour', 'day_of_week', 'month', 'day_of_year']
        
        X = state_data[feature_cols].values
        y = state_data['aqi'].values
        
        # Split data
        train_size = int(len(state_data) * 0.8)
        test_size = len(state_data) - train_size
        
        X_train = X[:train_size]
        X_test = X[train_size:]
        y_train = y[:train_size]
        y_test = y[train_size:]
        
        # Scale data
        X_train_scaled = self.scaler_X.fit_transform(X_train)
        X_test_scaled = self.scaler_X.transform(X_test)
        
        y_train_scaled = self.scaler_y.fit_transform(y_train.reshape(-1, 1))
        y_test_scaled = self.scaler_y.transform(y_test.reshape(-1, 1))
        
        return (X_train_scaled, X_test_scaled,
                y_train_scaled, y_test_scaled,
                y_train, y_test)
    
    def train_sarimax(self, y_train, exog_train=None):
        # Simplified SARIMAX for hourly data
        self.sarimax_model = SARIMAX(y_train,
                                   exog=exog_train,
                                   order=(1, 1, 1),
                                   seasonal_order=(1, 1, 1, 24))
        self.sarimax_model = self.sarimax_model.fit(disp=False)
        return self.sarimax_model.predict()
    
    def train_lstm(self, X_train, residuals, batch_size=32, epochs=50):
        train_dataset = TimeSeriesDataset(X_train, residuals, self.sequence_length)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        
        input_size = X_train.shape[1]
        self.lstm_model = LSTMModel(input_size, 
                                  self.hidden_size,
                                  self.num_layers,
                                  self.sequence_length).to(self.device)
        
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(self.lstm_model.parameters(), lr=0.001)
        scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)
        
        for epoch in range(epochs):
            self.lstm_model.train()
            total_loss = 0
            for X_batch, y_batch in train_loader:
                X_batch = X_batch.to(self.device)
                y_batch = y_batch.to(self.device)
                
                optimizer.zero_grad()
                outputs = self.lstm_model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
                
                total_loss += loss.item()
            
            avg_loss = total_loss / len(train_loader)
            scheduler.step(avg_loss)
            
            if (epoch + 1) % 10 == 0:
                print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}')

    def predict(self, X, exog=None):
        if exog is not None:
            sarimax_pred = self.sarimax_model.predict(exog=exog)
        else:
            sarimax_pred = self.sarimax_model.forecast(len(X))
        
        self.lstm_model.eval()
        with torch.no_grad():
            sequences = []
            for i in range(len(X) - self.sequence_length + 1):
                sequence = X[i:i + self.sequence_length]
                sequences.append(sequence)
            
            if sequences:
                sequences = torch.FloatTensor(np.array(sequences)).to(self.device)
                lstm_preds = self.lstm_model(sequences).cpu().numpy()
                
                # Inverse transform predictions
                lstm_preds = self.scaler_y.inverse_transform(lstm_preds)
                sarimax_part = sarimax_pred[self.sequence_length-1:]
                
                # Ensure lengths match
                min_len = min(len(sarimax_part), len(lstm_preds))
                final_preds = sarimax_part[:min_len] + lstm_preds[:min_len].flatten()
                
                return final_preds
            return np.array([])

    def train_and_evaluate(self, data):
        results = {}
        states = data['state'].unique()
        
        for state in states:
            print(f"\nTraining model for {state}")
            
            (X_train_scaled, X_test_scaled,
             y_train_scaled, y_test_scaled,
             y_train, y_test) = self.prepare_data(data, state)
            
            # SARIMAX training
            sarimax_preds_train = self.train_sarimax(y_train)
            residuals = y_train - sarimax_preds_train
            residuals_scaled = self.scaler_y.transform(residuals.reshape(-1, 1))
            
            # LSTM training on residuals
            self.train_lstm(X_train_scaled, residuals_scaled)
            
            # Generate predictions
            y_pred = self.predict(X_test_scaled)
            
            # Adjust test data length to match predictions
            y_test_adj = y_test[self.sequence_length-1:self.sequence_length-1+len(y_pred)]
            
            if len(y_pred) > 0:
                metrics = {
                    'mse': mean_squared_error(y_test_adj, y_pred),
                    'mae': mean_absolute_error(y_test_adj, y_pred),
                    'r2': r2_score(y_test_adj, y_pred)
                }
                
                results[state] = metrics
                print(f"MSE: {metrics['mse']:.4f}")
                print(f"MAE: {metrics['mae']:.4f}")
                print(f"R2 Score: {metrics['r2']:.4f}")
        
        return results
    
    def save_model(self, path):
        model_data = {
            'sarimax_model': self.sarimax_model,
            'lstm_state_dict': self.lstm_model.state_dict(),
            'scaler_X': self.scaler_X,
            'scaler_y': self.scaler_y,
            'hidden_size': self.hidden_size,
            'num_layers': self.num_layers,
            'sequence_length': self.sequence_length
        }
        torch.save(model_data, path)
    
    def load_model(self, path, input_size):
        model_data = torch.load(path)
        self.sarimax_model = model_data['sarimax_model']
        self.scaler_X = model_data['scaler_X']
        self.scaler_y = model_data['scaler_y']
        self.hidden_size = model_data['hidden_size']
        self.num_layers = model_data['num_layers']
        self.sequence_length = model_data['sequence_length']
        
        self.lstm_model = LSTMModel(input_size, 
                                  self.hidden_size,
                                  self.num_layers,
                                  self.sequence_length).to(self.device)
        self.lstm_model.load_state_dict(model_data['lstm_state_dict'])

# Usage example
sequence_length = 24
model = HybridAQIPredictor(sequence_length=sequence_length)
results = model.train_and_evaluate(data)

# Save the model
model.save_model('hybrid_aqi_model.pth')

# Load the model
input_size = 19  # Number of features
new_model = HybridAQIPredictor(sequence_length=sequence_length)
new_model.load_model('hybrid_aqi_model.pth', input_size)


Training model for Alabama


KeyboardInterrupt: 

In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LassoCV, RidgeCV
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
import matplotlib.pyplot as plt
from datetime import datetime

def create_time_features(df):
    """Create time-based features from timestamp"""
    df = df.copy()
    df['hour'] = df['timestamp'].dt.hour
    df['day_of_week'] = df['timestamp'].dt.dayofweek
    df['month'] = df['timestamp'].dt.month
    df['day_of_year'] = df['timestamp'].dt.dayofyear
    df['is_weekend'] = df['timestamp'].dt.dayofweek.isin([5, 6]).astype(int)
    
    # Create cyclical features for time variables
    df['hour_sin'] = np.sin(2 * np.pi * df['hour']/24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour']/24)
    df['month_sin'] = np.sin(2 * np.pi * df['month']/12)
    df['month_cos'] = np.cos(2 * np.pi * df['month']/12)
    
    return df

def create_lag_features(df, columns, lags=[1, 2, 3]):
    """Create lag features for specified columns"""
    df = df.copy()
    for col in columns:
        for lag in lags:
            df[f'{col}_lag_{lag}'] = df[col].shift(lag)
    return df

def create_rolling_features(df, columns, windows=[3, 6, 12]):
    """Create rolling mean and std features for specified columns"""
    df = df.copy()
    for col in columns:
        for window in windows:
            df[f'{col}_rolling_mean_{window}'] = df[col].rolling(window=window).mean()
            df[f'{col}_rolling_std_{window}'] = df[col].rolling(window=window).std()
    return df

# Prepare the data
alabama_data = data[data['state'] == 'Alabama'].copy()
alabama_data['timestamp'] = pd.to_datetime(alabama_data['timestamp'])

# Feature engineering
alabama_data = create_time_features(alabama_data)

# Create lag and rolling features for important columns
important_columns = ['co', 'no2', 'o3', 'pm2_5', 'pm10', 'temperature_2m', 'wind_speed_10m']
alabama_data = create_lag_features(alabama_data, important_columns)
alabama_data = create_rolling_features(alabama_data, important_columns)

# Drop rows with NaN values created by lag/rolling features
alabama_data = alabama_data.dropna()

# Prepare features and target
features_to_drop = ['timestamp', 'state', 'aqi']
X = alabama_data.drop(features_to_drop, axis=1)
y = alabama_data['aqi']

# Create time-based train-test split (last 20% of data for testing)
split_idx = int(len(X) * 0.8)
X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize models with expanded parameters
models = {
    'Random Forest': RandomForestRegressor(
        n_estimators=500,
        max_depth=20,
        min_samples_split=2,
        min_samples_leaf=1,
        random_state=42,
        n_jobs=-1
    ),
    'XGBoost': XGBRegressor(
        n_estimators=500,
        max_depth=7,
        learning_rate=0.01,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        n_jobs=-1
    ),
    'Gradient Boosting': GradientBoostingRegressor(
        n_estimators=500,
        max_depth=7,
        learning_rate=0.01,
        subsample=0.8,
        random_state=42
    ),
    'Lasso': LassoCV(
        cv=5,
        random_state=42,
        n_jobs=-1
    )
}

# Train and evaluate models
results = {}
for name, model in models.items():
    # Train model
    model.fit(X_train_scaled, y_train)
    
    # Make predictions
    train_pred = model.predict(X_train_scaled)
    test_pred = model.predict(X_test_scaled)
    
    # Calculate metrics
    results[name] = {
        'train_r2': r2_score(y_train, train_pred),
        'test_r2': r2_score(y_test, test_pred),
        'train_mse': mean_squared_error(y_train, train_pred),
        'test_mse': mean_squared_error(y_test, test_pred),
        'train_mae': mean_absolute_error(y_train, train_pred),
        'test_mae': mean_absolute_error(y_test, test_pred)
    }

# Print results
for name, metrics in results.items():
    print(f"\n{name} Results:")
    print(f"Train R2: {metrics['train_r2']:.4f}")
    print(f"Test R2: {metrics['test_r2']:.4f}")
    print(f"Train MSE: {metrics['train_mse']:.4f}")
    print(f"Test MSE: {metrics['test_mse']:.4f}")
    print(f"Train MAE: {metrics['train_mae']:.4f}")
    print(f"Test MAE: {metrics['test_mae']:.4f}")
    print(f"R2 Gap (Train - Test): {metrics['train_r2'] - metrics['test_r2']:.4f}")

# Get feature importance from the best performing model (assuming it's tree-based)
best_model_name = max(results.items(), key=lambda x: x[1]['test_r2'])[0]
if best_model_name in ['Random Forest', 'XGBoost', 'Gradient Boosting']:
    best_model = models[best_model_name]
    feature_importance = pd.DataFrame({
        'feature': X.columns,
        'importance': best_model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print(f"\nTop 10 Most Important Features ({best_model_name}):")
    print(feature_importance.head(10))


Random Forest Results:
Train R2: 0.8829
Test R2: 0.0230
Train MSE: 30.0749
Test MSE: 1788.5972
Train MAE: 0.2009
Test MAE: 2.2868
R2 Gap (Train - Test): 0.8599

XGBoost Results:
Train R2: 0.9779
Test R2: -0.3041
Train MSE: 5.6809
Test MSE: 2387.5138
Train MAE: 0.2629
Test MAE: 3.9195
R2 Gap (Train - Test): 1.2820

Gradient Boosting Results:
Train R2: 0.9994
Test R2: 0.0234
Train MSE: 0.1546
Test MSE: 1787.9062
Train MAE: 0.1702
Test MAE: 2.0999
R2 Gap (Train - Test): 0.9760

Lasso Results:
Train R2: 0.9997
Test R2: 0.8220
Train MSE: 0.0703
Test MSE: 325.8685
Train MAE: 0.1860
Test MAE: 0.7842
R2 Gap (Train - Test): 0.1777


In [44]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.linear_model import LassoCV
import xgboost as xgb
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import warnings
warnings.filterwarnings('ignore')

# Filter data for Alabama
alabama_data = data[data['state'] == 'Alabama'].copy()

# Prepare features and target
feature_cols = ['co', 'no', 'no2', 'o3', 'so2', 'pm2_5', 'pm10', 'nh3', 
                'temperature_2m', 'relative_humidity_2m', 'rain', 
                'wind_speed_10m', 'wind_direction_10m', 
                'soil_temperature_0_to_7cm', 'soil_moisture_0_to_7cm']

X = alabama_data[feature_cols]
y = alabama_data['aqi']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define ML models and their parameter grids
models = {
    'RandomForest': {
        'model': RandomForestRegressor(random_state=42),
        'params': {
            'n_estimators': [100, 200],
            'max_depth': [10, 20, None],
            'min_samples_split': [2, 5]
        }
    },
    'XGBoost': {
        'model': xgb.XGBRegressor(random_state=42),
        'params': {
            'n_estimators': [100, 200],
            'max_depth': [3, 5, 7],
            'learning_rate': [0.01, 0.1]
        }
    },
    'Lasso': {
        'model': LassoCV(random_state=42),
        'params': {
            'eps': [1e-3, 1e-4],
            'n_alphas': [100, 200],
            'max_iter': [1000]
        }
    }
}

# Train and evaluate ML models
best_model = None
best_score = -float('inf')
results = {}

for name, model_info in models.items():
    print(f"\nTraining {name}...")
    grid_search = GridSearchCV(
        model_info['model'],
        model_info['params'],
        cv=5,
        scoring='r2',
        n_jobs=-1
    )
    grid_search.fit(X_train_scaled, y_train)
    
    # Get best model and score
    y_pred = grid_search.predict(X_test_scaled)
    score = r2_score(y_test, y_pred)
    results[name] = {
        'model': grid_search.best_estimator_,
        'params': grid_search.best_params_,
        'score': score
    }
    
    if score > best_score:
        best_score = score
        best_model = grid_search.best_estimator_
        best_name = name

print(f"\nBest ML Model: {best_name}")
print(f"Best R2 Score: {best_score:.4f}")


Training RandomForest...

Training XGBoost...

Training Lasso...

Best ML Model: Lasso
Best R2 Score: 0.9990


In [46]:
# Generate residuals using best ML model
y_pred_train = best_model.predict(X_train_scaled)
y_pred_test = best_model.predict(X_test_scaled)
residuals_train = y_train - y_pred_train
residuals_test = y_test - y_pred_test

# LSTM Dataset
class ResidualDataset(Dataset):
    def __init__(self, features, residuals, sequence_length=24):
        self.features = torch.FloatTensor(features)
        self.residuals = torch.FloatTensor(residuals.values.reshape(-1, 1))
        self.sequence_length = sequence_length
        
    def __len__(self):
        return len(self.features) - self.sequence_length
        
    def __getitem__(self, idx):
        X = self.features[idx:idx + self.sequence_length]
        y = self.residuals[idx + self.sequence_length]
        return X, y

# LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_hidden = lstm_out[:, -1, :]
        out = self.fc(last_hidden)
        return out

# Train LSTM
def train_lstm(train_loader, val_loader, model, epochs=50):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters())
    
    best_val_loss = float('inf')
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                val_loss += criterion(outputs, y_batch).item()
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_lstm_model.pth')
        
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}')

# Create LSTM datasets
sequence_length = 24  # 24 hours
batch_size = 32

train_dataset = ResidualDataset(X_train_scaled, residuals_train, sequence_length)
test_dataset = ResidualDataset(X_test_scaled, residuals_test, sequence_length)

train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size

train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Initialize and train LSTM
lstm_model = LSTMModel(input_size=len(feature_cols))
train_lstm(train_loader, val_loader, lstm_model)

# Function to make hybrid predictions
def make_hybrid_predictions(X, ml_model, lstm_model, sequence_length=24):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # ML predictions
    ml_pred = ml_model.predict(X)
    
    # Prepare data for LSTM
    dataset = ResidualDataset(X, pd.Series(np.zeros(len(X))), sequence_length)
    loader = DataLoader(dataset, batch_size=32)
    
    # LSTM predictions
    lstm_model.eval()
    lstm_preds = []
    with torch.no_grad():
        for X_batch, _ in loader:
            # Move input batch to the same device as the model
            X_batch = X_batch.to(device)
            outputs = lstm_model(X_batch)
            lstm_preds.extend(outputs.cpu().numpy())
    
    # Combine predictions
    final_predictions = ml_pred[sequence_length:] + np.array(lstm_preds).flatten()
    return final_predictions

# Make final predictions
final_predictions = make_hybrid_predictions(X_test_scaled, best_model, lstm_model)
final_r2 = r2_score(y_test[24:], final_predictions)
print(f"\nFinal Hybrid Model R2 Score: {final_r2:.4f}")

Epoch [10/50], Train Loss: 0.0647, Val Loss: 0.0413
Epoch [20/50], Train Loss: 0.0433, Val Loss: 0.0492
Epoch [30/50], Train Loss: 0.0362, Val Loss: 0.0483
Epoch [40/50], Train Loss: 0.0295, Val Loss: 0.0507
Epoch [50/50], Train Loss: 0.0173, Val Loss: 0.0642

Final Hybrid Model R2 Score: 0.9983
