In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import to_categorical

# Load the data from test.csv here sheet 1.csv
data = pd.read_csv('/content/testml2 - Sheet1 (1).csv')

# Drop any rows with missing values which can reduce accuracy
data = data.dropna()

# Exclude the 'Date' column from the features: as this contains alph numeric values
X = data.drop(['Accident_Severity', 'Date'], axis=1)

# Encode categorical features
label_encoders = {}
categorical_columns = ['Location', 'Weather', 'Light_Condition', 'Road_Type', 'Vehicle_Type', 'Driver_Behavior']
for column in categorical_columns:
    le = LabelEncoder()
    X[column] = le.fit_transform(X[column])
    label_encoders[column] = le

# Encode the 'Accident_Severity' column to integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data['Accident_Severity'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build a neural network model
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.5))  # Dropout layer to prevent overfitting
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))  # Dropout layer
model.add(Dense(3, activation='softmax'))  # Assuming 3 classes for accident severity

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model with early stopping
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(patience=10, restore_best_weights=True)

model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

# Predict values using the trained model
y_pred = model.predict(X_test)

# Convert predicted probabilities to class labels
predicted_labels = label_encoder.inverse_transform(np.argmax(y_pred, axis=1))

# Create a DataFrame for the actual vs. predicted values
results = pd.DataFrame({'Actual_Accident_Severity': label_encoder.inverse_transform(y_test),
                        'Predicted_Accident_Severity': predicted_labels})

# Display the results
print(results)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Test Loss: 0.9203, Test Accuracy: 1.0000
  Actual_Accident_Severity Predicted_Accident_Severity
0                   Severe                      Severe
1                   Severe                      Severe
