In [1]:
import numpy as np
import pandas as pd
import pickle
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

def evaluate_classification(model, name, X_train, X_test, y_train, y_test):
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)

    train_accuracy = accuracy_score(y_train, train_predictions)
    test_accuracy = accuracy_score(y_test, test_predictions)

    train_precision = precision_score(y_train, train_predictions, average='weighted')
    test_precision = precision_score(y_test, test_predictions, average='weighted')

    train_recall = recall_score(y_train, train_predictions, average='weighted')
    test_recall = recall_score(y_test, test_predictions, average='weighted')

    print("Training Set Metrics:")
    print("Training Accuracy {}: {:.2f}%".format(name, train_accuracy * 100))
    print("Training Precision {}: {:.2f}%".format(name, train_precision * 100))
    print("Training Recall {}: {:.2f}%".format(name, train_recall * 100))

    print("\nTest Set Metrics:")
    print("Test Accuracy {}: {:.2f}%".format(name, test_accuracy * 100))
    print("Test Precision {}: {:.2f}%".format(name, test_precision * 100))
    print("Test Recall {}: {:.2f}%".format(name, test_recall * 100))

def preprocess_data(df):
    # Perform one-hot encoding for categorical columns if needed
    # Example: df = pd.get_dummies(df, columns=['categorical_column'])

    # Data transformation
    scaler = StandardScaler()
    numerical_features = ['longitude', 'latitude', 'Speed_limit', 'hour', 'minute']
    df[numerical_features] = scaler.fit_transform(df[numerical_features])
    return df

def train_and_save_model(num_rows=None):
    start_time = time.time()
    print("Loading the dataset...")
    df = pd.read_csv("clean_df.csv")

    df[['hour', 'minute']] = df['Time'].str.split(':', expand=True).astype('int32')
    df = preprocess_data(df)

    features = ['longitude', 'latitude', 'Speed_limit', 'hour', 'minute']
    X = df[features]
    y = df['Accident_Severity']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)

    print("Model Training...")
    X_train = np.expand_dims(X_train, axis=-1)
    X_test = np.expand_dims(X_test, axis=-1)

    model = Sequential([
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.1)

    print("Saving the model...")
    model.save("model.h5")

    end_time = time.time()
    print(f"Model training and saving took {end_time - start_time:.2f} seconds")
    evaluate_classification(model, "CNN", y_train, y_test)

if __name__ == "__main__":
    num_rows = None  # Set the number of rows for training (e.g., num_rows = 1000000)
    train_and_save_model(num_rows)


Loading the dataset...
Model Training...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Saving the model...
Model training and saving took 16.35 seconds


  saving_api.save_model(


TypeError: evaluate_classification() missing 2 required positional arguments: 'y_train' and 'y_test'

In [2]:
if __name__ == "__main__":
    num_rows = None  # Set the number of rows for training (e.g., num_rows = 1000000)
    train_and_save_model(num_rows)

Loading the dataset...
Model Training...
Epoch 1/10


  super().__init__(


[1m1348/1348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.0105 - loss: -9659.9268 - val_accuracy: 0.0115 - val_loss: -144380.2344
Epoch 2/10
[1m1348/1348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.0105 - loss: -286325.9375 - val_accuracy: 0.0115 - val_loss: -919661.3125
Epoch 3/10
[1m1348/1348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.0110 - loss: -1275657.1250 - val_accuracy: 0.0115 - val_loss: -2553155.5000
Epoch 4/10
[1m1348/1348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.0111 - loss: -3167534.0000 - val_accuracy: 0.0115 - val_loss: -5182503.5000
Epoch 5/10
[1m1348/1348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.0097 - loss: -6095093.5000 - val_accuracy: 0.0115 - val_loss: -8938492.0000
Epoch 6/10
[1m1348/1348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.0096 - loss: -10200841.00



Saving the model...
Model training and saving took 28.37 seconds
[1m1498/1498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Training Set Metrics:
Training Accuracy CNN: 1.06%
Training Precision CNN: 0.01%
Training Recall CNN: 1.06%

Test Set Metrics:
Test Accuracy CNN: 1.14%
Test Precision CNN: 0.01%
Test Recall CNN: 1.14%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
