<h1 align="center">CAS Analysis</h1>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from patsy import dmatrices
from scipy import stats
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.metrics import multilabel_confusion_matrix
from lime import lime_tabular

import random
sns.set_theme()

In [None]:
pd.set_option('display.max_columns',None)

## Importing data

In [None]:
df=pd.read_csv("../input/cas-preprocessed/CAS Preprocessed.csv")
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
null_columns = df.columns[df.isnull().any()]
(df[null_columns].isnull().sum()/len(df))*100

In [None]:
df.describe()

In [None]:
df.crashSeverity.value_counts()

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(df['crashSeverity'])

In [None]:
le.classes_

In [None]:
df['cs'] = le.transform(df['crashSeverity'])

In [None]:
formula = "cs ~ bicycle+bridge+bus+carStationWagon+cliffBank+debris+ditch+fence+flatHill+guardRail+houseOrBuilding+kerb+moped+motorcycle+NumberOfLanes+objectThrownOrDropped+otherObject+otherVehicleType+overBank+parkedVehicle+phoneBoxEtc+postOrPole+roadworks+schoolBus+slipOrFlood+speedLimit+strayAnimal+streetLight+suv+taxi+trafficIsland+trafficSign+train+tree+truck+unknownVehicleType+urban+vanOrUtility+vehicle+waterRiver"
y, X = dmatrices(formula, df, return_type='dataframe')
X.drop("Intercept",axis=1,inplace=True)

In [None]:
y.value_counts()

In [None]:
y = to_categorical(y)
print(y)

In [None]:
X_train_, X_test_, y_train_, y_test_ = train_test_split(X, y, test_size=0.20, random_state=42)

In [None]:
X_train_.shape, y_train_.shape, X_test_.shape, y_test_.shape

In [None]:
val_mark = int((X_train_.shape[0] / 100) * 10)

In [None]:
X_valid_, y_valid_ = X_train_[-val_mark:], y_train_[-val_mark:]
X_train_, y_train_ = X_train_[:-val_mark], y_train_[:-val_mark]

In [None]:
X_train_.shape, y_train_.shape, X_valid_.shape, y_valid_.shape

In [None]:
train_loader = tf.data.Dataset.from_tensor_slices((X_train_, y_train_))
validation_loader = tf.data.Dataset.from_tensor_slices((X_valid_, y_valid_))

batch_size = 64
train_dataset = (train_loader.shuffle(len(X_train_))
                 .batch(batch_size))

validation_dataset = (validation_loader.shuffle(len(X_valid_))
                      .batch(batch_size))

In [None]:
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.InputLayer(input_shape=(42,)))
model.add(tf.keras.layers.experimental.preprocessing.Normalization(axis=-1))
model.add(tf.keras.layers.Dense(512, activation='relu'))
model.add(tf.keras.layers.Dropout(.3))
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dropout(.3))
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(.3))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(.25))
model.add(tf.keras.layers.Dense(32, activation='relu'))
model.add(tf.keras.layers.Dropout(.2))
model.add(tf.keras.layers.Dense(4,activation = 'softmax'))

model.summary()

In [None]:
from tensorflow import keras

initial_learning_rate = 0.0001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate,
                                                          decay_steps=100000,
                                                          decay_rate=0.96,
                                                          staircase=True)

model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
              optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
              metrics=['accuracy',
                      tf.keras.metrics.Precision(name='precision'),
                      tf.keras.metrics.Recall(name='recall')]
              )


checkpoint_cb = keras.callbacks.ModelCheckpoint("best_model.h5",
                                                save_best_only=True)

early_stopping_cb = keras.callbacks.EarlyStopping(monitor="val_loss", patience=15)

In [None]:
%%time

epochs = 50
model.fit(train_dataset,
          validation_data=validation_dataset,
          epochs=epochs,
          shuffle=True,
          verbose=1,
          callbacks=[checkpoint_cb, early_stopping_cb])

In [None]:
def plot_loss(loss,val_loss):
  plt.plot(loss, label='loss')
  plt.plot(val_loss, label='val_loss')
  plt.xlabel('Epoch')
  plt.ylabel('Error')
  plt.legend()
  plt.grid(True)

In [None]:
loss = model.history.history['loss']
val_loss = model.history.history['val_loss']
precsion = model.history.history['precision']
recall = model.history.history['recall']

In [None]:
plot_loss(loss,val_loss)

In [None]:
test_loss, test_accuracy,test_precision,test_recall = model.evaluate(X_test_, y_test_)

train_loss, train_accuracy,train_precision,train_recall = model.evaluate(X_train_, y_train_)

In [None]:
print(train_loss)
print(test_loss)

In [None]:
plt.plot(loss, label='loss')
#plt.plot(accuracy, label='Accuracy')
plt.plot(precsion, label='Precision')
plt.plot(recall, label='Recall')
plt.title('Evaluation metrics', size=20)
plt.xlabel('Epoch', size=14)
plt.legend();

In [None]:
test_labels = model.predict(X_test_)
test_class = np.argmax(test_labels, axis=1)

In [None]:
y_pred = to_categorical(test_class)
print(y)

In [None]:
cm = multilabel_confusion_matrix(y_test_,y_pred)
cm

In [None]:
predict = lambda x: model.predict(x).astype(float)
X = X_train_.values
explainer = lime_tabular.LimeTabularExplainer(X,feature_names = X_train_.columns,class_names=['0','1','2','3','4'],kernel_width=5)

In [None]:
idx = 100
print("Actual :     ", y_test_[idx])
print("predictions :     ", test_class[idx])
exp = explainer.explain_instance(X_test_.iloc[[idx]].values[0], predict,num_features=10,top_f)

In [None]:
exp.show_in_notebook()