In [1]:
# python3.8 - 3.12 is required to install tensorflow
!pip install pandas numpy scikit-learn tensorflow

Collecting tensorflow
  Downloading tensorflow-2.18.0-cp311-cp311-macosx_12_0_arm64.whl.metadata (4.0 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-1-py2.py3-none-macosx_11_0_arm64.whl.metadata (5.2 kB)
Collecting opt-einsum>=2.3.2 (from tensorflow)
  Using cached opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential, load_model # type: ignore
from tensorflow.keras.layers import Dense # type: ignore
from tensorflow.keras.utils import to_categorical # type: ignore
import os

1. **Data Loading and Preprocessing**:
- The dataset is loaded from a CSV file.
    - The `RoadSurface` column is mapped to numerical values.
    - Features are selected for training the model.
    - Labels are converted to categorical one-hot encoding.
    - The data is split into training and testing sets.
    - Feature scaling is applied to standardize the data.

In [2]:
def get_data(path):
    try:
        # Load dataset
        df = pd.read_csv(path)
    except FileNotFoundError:
        return "File not found"
    except Exception as e:
        return f"An error occurred while loading the dataset: {e}"
    
    # Data preprocessing
    df["RoadSurface"] = df["RoadSurface"].map({'Poor': 0, 'Avg': 1, 'Good': 2})

    # Feature selection
    features = ["IDMachines", "PeopleAtwork", "StreetLights", "Accidents", "DamagedMovers", "StRoadLength", "RoadCurvature", "HPBends", "RoadType", "RoadWidth", "AvgSpeed", "AgeOfRoad"]
    X = df[features]
    y = df["RoadSurface"]

    # Convert labels to categorical one-hot encoding
    y = to_categorical(y, num_classes=3)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

    # Feature scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test

2. **Neural Network Model**:
    - A neural network model is built using the `Sequential` API from TensorFlow Keras.
    - The model consists of multiple dense layers with ReLU activation functions and a softmax output layer.
    - The model is compiled with categorical cross-entropy loss and the Adam optimizer.
    - The model is trained on the training data with a validation split.

In [3]:
def train_nn():
    output = ""
    
    X_train, X_test, y_train, y_test = get_data('rmdataset.csv')
    # Build the neural network model
    model = Sequential()
    model.add(Dense(12, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(5, activation='relu'))
    model.add(Dense(3, activation='softmax'))

    # Compile the model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Train the model
    model.fit(X_train, y_train, epochs=15, batch_size=10, validation_split=0.2)

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Model accuracy on test data: {accuracy * 100:.2f}%\n")

    # Ensure the directory exists
    os.makedirs('models/nn', exist_ok=True)

    # Save the model and scaler
    model.save('models/nn/mlp_model.h5')
    print("Model trained and saved\n")

    return None


3. **Model Evaluation and Saving**:
    - The model is evaluated on the test data to determine its accuracy.
    - The trained model and the scaler used for feature scaling are saved to disk.

In [4]:
result = train_nn()

Epoch 1/15


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.6571 - loss: 0.8391 - val_accuracy: 0.8918 - val_loss: 0.4708
Epoch 2/15
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 842us/step - accuracy: 0.9025 - loss: 0.3990 - val_accuracy: 0.9030 - val_loss: 0.3115
Epoch 3/15
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 830us/step - accuracy: 0.9155 - loss: 0.2623 - val_accuracy: 0.9030 - val_loss: 0.2740
Epoch 4/15
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 832us/step - accuracy: 0.9299 - loss: 0.2064 - val_accuracy: 0.9030 - val_loss: 0.2441
Epoch 5/15
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 879us/step - accuracy: 0.9363 - loss: 0.1672 - val_accuracy: 0.9160 - val_loss: 0.2053
Epoch 6/15
[1m214/214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 874us/step - accuracy: 0.9408 - loss: 0.1480 - val_accuracy: 0.9179 - val_loss: 0.1845
Epoch 7/15
[1m214/214[0m [



Model accuracy on test data: 96.26%

Model trained and saved

