In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
from tensorflow.keras import layers, models
import json




In [2]:
def create_lstm_model(timesteps, num_features):
    model = Sequential()
    model.add(LSTM(64, activation='tanh', input_shape=(timesteps, num_features), return_sequences=True))
    model.add(LSTM(32, activation='tanh', return_sequences=False))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(2, activation='linear'))
    model.compile(optimizer='adam', 
                  loss='mean_squared_error', 
                  metrics=['mae'])
    return model

In [3]:
from keras_tuner import RandomSearch
import tensorflow as tf

def build_model(hp):
    model = Sequential()
    model.add(LSTM(units=hp.Int('units_1', min_value=32, max_value=128, step=32), 
                   activation='tanh', 
                   input_shape=(timesteps, num_features), 
                   return_sequences=True))
    model.add(LSTM(units=hp.Int('units_2', min_value=16, max_value=64, step=16), activation='tanh'))
    model.add(Dense(units=hp.Int('dense_units', min_value=16, max_value=64, step=16), activation='relu'))
    model.add(Dense(2, activation='linear'))
    model.compile(optimizer=tf.keras.optimizers.Adam(
                      learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
                  loss='mean_squared_error',
                  metrics=['mae'])
    return model

In [4]:
from sklearn.model_selection import KFold

def perform_cross_validation(x_reshaped, y_reshaped, timesteps, num_features):
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    cv_scores = []
    
    for train_index, val_index in kf.split(x_reshaped):
        x_train_fold, x_val_fold = x_reshaped[train_index], x_reshaped[val_index]
        y_train_fold, y_val_fold = y_reshaped[train_index], y_reshaped[val_index]
        
        model = create_lstm_model(timesteps, num_features)
        
        model.fit(x_train_fold, y_train_fold, epochs=10, batch_size=32, verbose=0)
        
        val_loss = model.evaluate(x_val_fold, y_val_fold, verbose=0)
        cv_scores.append(val_loss[0])
    
    print(f"Cross-Validation Loss Scores: {cv_scores}")
    print(f"Mean CV Loss: {np.mean(cv_scores)}")
    
    return cv_scores

In [5]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

def evaluate_model(model, x_test, y_test, scaler_y):
    predictions = model.predict(x_test)
    
    y_test_original = scaler_y.inverse_transform(y_test)
    predicted_values = scaler_y.inverse_transform(predictions)
    
    mse = mean_squared_error(y_test_original, predicted_values)
    mae = mean_absolute_error(y_test_original, predicted_values)
    r2 = r2_score(y_test_original, predicted_values)
    
    print(f"Mean Squared Error (MSE): {mse}")
    print(f"Mean Absolute Error (MAE): {mae}")
    print(f"R² Score: {r2}")
    
    plt.figure(figsize=(10, 6))
    plt.scatter(y_test_original[:, 0], predicted_values[:, 0], label='Row Predictions', alpha=0.6)
    plt.scatter(y_test_original[:, 1], predicted_values[:, 1], label='Col Predictions', alpha=0.6, color='orange')
    plt.plot([y_test_original.min(), y_test_original.max()], 
             [y_test_original.min(), y_test_original.max()], 
             color='red', linestyle='--', label='Perfect Prediction')
    plt.xlabel('True Values')
    plt.ylabel('Predicted Values')
    plt.title('Prediction Errors Scatter Plot')
    plt.legend()
    plt.show()
    
    return predicted_values

In [10]:
def main():
    # Load CSV file
    df = pd.read_csv('newoutput.csv')
    
    # Extract features including temporal and spatial data
    x_data = df[['user_id', 'time', 'date', 'lat', 'long']].values
    y_data = df[['row', 'col']].values
    
    # Normalize features
    scaler_x = MinMaxScaler()
    x_data = scaler_x.fit_transform(x_data)
    
    scaler_y = MinMaxScaler()
    y_data = scaler_y.fit_transform(y_data)
    
    # Reshape data for LSTM
    timesteps = 10
    num_features = x_data.shape[1]
    x_reshaped = []
    y_reshaped = []
    
    for i in range(len(x_data) - timesteps):
        x_reshaped.append(x_data[i:i + timesteps])
        y_reshaped.append(y_data[i + timesteps])
    
    x_reshaped = np.array(x_reshaped)
    y_reshaped = np.array(y_reshaped)
    
    
     # Split data
    x_train, x_test, y_train, y_test = train_test_split(x_reshaped, y_reshaped, test_size=0.3, random_state=42)
    
    # Perform hyperparameter tuning
    tuner = RandomSearch(
        build_model,
        objective='val_loss',
        max_trials=10,
        executions_per_trial=1,
        directory='hyperparam_search',
        project_name='traffic_prediction_lstm'
    )
    tuner.search(x_train, y_train, epochs=10, validation_data=(x_test, y_test), batch_size=32)
    
    # Get best hyperparameters
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    print("Best Hyperparameters:", best_hps)
    
    # Perform cross-validation
    cv_scores = perform_cross_validation(x_reshaped, y_reshaped, timesteps, num_features)
    
    # Train final model with best hyperparameters
    model = create_lstm_model(timesteps, num_features)
    history = model.fit(
        x_train, y_train,
        epochs=50,
        batch_size=32,
        validation_data=(x_test, y_test),
        verbose=1
    )
    
    # Evaluate model
    predicted_values = evaluate_model(model, x_test, y_test, scaler_y)
    
    # Create regional heatmap
    create_regional_heatmap(predicted_values, scaler_y.inverse_transform(y_test), df)
    
    # Save the model
    model.save('lstm_traffic_predictor.h5')

if __name__ == '__main__':
    main()

MemoryError: Unable to allocate 9.27 GiB for an array with shape (24876968, 10, 5) and data type float64