In [22]:
!pip install pandas numpy scikit-learn tensorflow



In [23]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential, load_model # type: ignore
from tensorflow.keras.layers import Dense # type: ignore
from tensorflow.keras.utils import to_categorical # type: ignore
import os

1. **Data Loading and Preprocessing**:
- The dataset is loaded from a CSV file.
    - The `RoadSurface` column is mapped to numerical values.
    - Features are selected for training the model.
    - Labels are converted to categorical one-hot encoding.
    - The data is split into training and testing sets.
    - Feature scaling is applied to standardize the data.

In [24]:
def get_data(path):
    try:
        # Load dataset
        df = pd.read_csv(path)
    except FileNotFoundError:
        return "File not found"
    except Exception as e:
        return f"An error occurred while loading the dataset: {e}"
    
    # Data preprocessing
    df["RoadSurface"] = df["RoadSurface"].map({'Poor': 0, 'Avg': 1, 'Good': 2})

    # Feature selection
    features = ["IDMachines", "PeopleAtwork", "StreetLights", "Accidents", "DamagedMovers", "StRoadLength", "RoadCurvature", "HPBends", "RoadType", "RoadWidth", "AvgSpeed", "AgeOfRoad"]
    X = df[features]
    y = df["RoadSurface"]

    # Convert labels to categorical one-hot encoding
    y = to_categorical(y, num_classes=3)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

    # Feature scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test

2. **Neural Network Model**:
    - A neural network model is built using the `Sequential` API from TensorFlow Keras.
    - The model consists of multiple dense layers with ReLU activation functions and a softmax output layer.
    - The model is compiled with categorical cross-entropy loss and the Adam optimizer.
    - The model is trained on the training data with a validation split.

In [25]:
def train_nn():
    output = ""
    
    X_train, X_test, y_train, y_test = get_data('datasets/rmdataset.csv')
    # Build the neural network model
    model = Sequential()
    model.add(Dense(12, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(5, activation='relu'))
    model.add(Dense(3, activation='softmax'))

    # Compile the model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Train the model
    model.fit(X_train, y_train, epochs=15, batch_size=10, validation_split=0.2)

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Model accuracy on test data: {accuracy * 100:.2f}%\n")

    # Ensure the directory exists
    os.makedirs('models/nn', exist_ok=True)

    # Save the model and scaler
    model.save('models/nn/mlp_model.h5')
    print("Model trained and saved\n")

    return None


3. **Model Evaluation and Saving**:
    - The model is evaluated on the test data to determine its accuracy.
    - The trained model and the scaler used for feature scaling are saved to disk.

In [26]:
train_nn()

Epoch 1/15


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7000 - loss: 0.8233 - val_accuracy: 0.9011 - val_loss: 0.4876
Epoch 2/15
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9226 - loss: 0.4146 - val_accuracy: 0.9347 - val_loss: 0.2505
Epoch 3/15
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9442 - loss: 0.1964 - val_accuracy: 0.9496 - val_loss: 0.1647
Epoch 4/15
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9461 - loss: 0.1466 - val_accuracy: 0.9608 - val_loss: 0.1334
Epoch 5/15
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9654 - loss: 0.1095 - val_accuracy: 0.9627 - val_loss: 0.1186
Epoch 6/15
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9650 - loss: 0.1025 - val_accuracy: 0.9552 - val_loss: 0.1226
Epoch 7/15
[1m214/214[0m [32m━━━━━━━



Model accuracy on test data: 97.31%

Model trained and saved

