In [1]:
import numpy as np
import pandas as pd
import pickle
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

def evaluate_classification(model, name, X_train, X_test, y_train, y_test):
    train_predictions = np.argmax(model.predict(X_train), axis=1)
    test_predictions = np.argmax(model.predict(X_test), axis=1)

    train_accuracy = accuracy_score(y_train, train_predictions)
    test_accuracy = accuracy_score(y_test, test_predictions)

    train_precision = precision_score(y_train, train_predictions, average='weighted')
    test_precision = precision_score(y_test, test_predictions, average='weighted')

    train_recall = recall_score(y_train, train_predictions, average='weighted')
    test_recall = recall_score(y_test, test_predictions, average='weighted')

    print("Training Set Metrics:")
    print("Training Accuracy {}: {:.2f}%".format(name, train_accuracy * 100))
    print("Training Precision {}: {:.2f}%".format(name, train_precision * 100))
    print("Training Recall {}: {:.2f}%".format(name, train_recall * 100))

    print("\nTest Set Metrics:")
    print("Test Accuracy {}: {:.2f}%".format(name, test_accuracy * 100))
    print("Test Precision {}: {:.2f}%".format(name, test_precision * 100))
    print("Test Recall {}: {:.2f}%".format(name, test_recall * 100))

def preprocess_data(df):
    scaler = StandardScaler()
    numerical_features = ['longitude', 'latitude', 'Speed_limit', 'hour', 'minute']
    df[numerical_features] = scaler.fit_transform(df[numerical_features])
    return df

def train_and_save_model(num_rows=None):
    start_time = time.time()
    print("Loading the dataset...")
    df = pd.read_csv("clean_df.csv")

    df[['hour', 'minute']] = df['Time'].str.split(':', expand=True).astype('int32')
    df = preprocess_data(df)

    features = ['longitude', 'latitude', 'Speed_limit', 'hour', 'minute']
    X = df[features]
    y = df['Accident_Severity']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)

    print("Model Training...")
    X_train_rnn = X_train.values.reshape(X_train.shape[0], 1, X_train.shape[1])  # Reshape for RNN
    X_test_rnn = X_test.values.reshape(X_test.shape[0], 1, X_test.shape[1])

    # Create and train the LSTM model
    model = Sequential()
    model.add(LSTM(50, input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')  # You can use other optimizers and loss functions based on your problem

    # Adjust the number of epochs and batch_size based on your data
    model.fit(X_train_rnn, y_train, epochs=10, batch_size=32)

    # Test the trained model on the testing data
    y_pred_lstm = model.predict(X_test_rnn)

    end_time = time.time()
    print(f"Model training took {end_time - start_time:.2f} seconds")
    evaluate_classification(model, "LSTM", X_train_rnn, X_test_rnn, y_train, y_test)

if __name__ == "__main__":
    num_rows = None  # Set the number of rows for training (e.g., num_rows = 1000000)
    train_and_save_model(num_rows)

Loading the dataset...
Model Training...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model training took 38.84 seconds
Training Set Metrics:
Training Accuracy LSTM: 0.00%
Training Precision LSTM: 0.00%
Training Recall LSTM: 0.00%

Test Set Metrics:
Test Accuracy LSTM: 0.00%
Test Precision LSTM: 0.00%
Test Recall LSTM: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
