# import modules

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error,accuracy_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import pandas as pd 
from sklearn.model_selection import GridSearchCV

# data preprocessing

In [6]:
# Read dataset
df = pd.read_csv('../input/alldriverdataset/25_features_in_one_row.csv')

# Drop rows with NA values
df = df.dropna(axis = 0)

# Divide dataset into input features, output labels
X, y = df.drop(['Unnamed: 0', 'DrivingStyle', 'DriverID'], axis = 1), df['DrivingStyle']

# encoding categorical values
X = pd.get_dummies(X)

scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

# split dataset into train & test
#X_train, X_rem, y_train, y_rem = train_test_split(X, y, test_size=0.40, shuffle = True, random_state=42)
#X_dev, X_test, y_dev, y_test = train_test_split(X_rem, y_rem, test_size=0.50, shuffle = True, random_state=42)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, shuffle = True, random_state=42)

In [7]:
MLP =MLPClassifier(hidden_layer_sizes = (100,100,100),activation = 'relu', solver = 'sgd',alpha = 0.1, batch_size = 128,learning_rate = 'constant',learning_rate_init = 1e-4,max_iter = 250,shuffle = False,momentum = 0, nesterovs_momentum = False,early_stopping = False,validation_fraction = 0, verbose  = True)
#MLP = MLPClassifier(hidden_layer_sizes = (100,100),activation = 'relu', solver = 'adam',alpha = 0.1, batch_size = 128,learning_rate = 'adaptive',learning_rate_init = 1e-4,max_iter = 500,shuffle = False,momentum = 0, nesterovs_momentum = False,early_stopping = False,validation_fraction = 0, verbose  = True)
#MLP =MLPRegressor(hidden_layer_sizes = (64,64,64),activation = 'relu', solver = 'sgd',alpha = 0, batch_size = 128,learning_rate = 'constant',learning_rate_init = 1e-6,max_iter = 500,shuffle = False,momentum = 0, nesterovs_momentum = False,early_stopping = False,validation_fraction = 0, verbose  = True)
MLP.fit(X_train, y_train)
#predictions_train = MLP.predict(X_train)
predictions_test = MLP.predict(X_test)

#prediction_dev_driver = np.vstack((Xtot_dev[:,0],predictions_dev)).T
print(confusion_matrix(y_test,predictions_test))
print(classification_report(y_test,predictions_test))

Iteration 1, loss = 1.31294284
Iteration 2, loss = 1.29314188
Iteration 3, loss = 1.27564465
Iteration 4, loss = 1.26011229
Iteration 5, loss = 1.24625037
Iteration 6, loss = 1.23376709
Iteration 7, loss = 1.22245612
Iteration 8, loss = 1.21213651
Iteration 9, loss = 1.20265261
Iteration 10, loss = 1.19388725
Iteration 11, loss = 1.18575704
Iteration 12, loss = 1.17815809
Iteration 13, loss = 1.17101638
Iteration 14, loss = 1.16427961
Iteration 15, loss = 1.15791494
Iteration 16, loss = 1.15188620
Iteration 17, loss = 1.14615232
Iteration 18, loss = 1.14068618
Iteration 19, loss = 1.13548186
Iteration 20, loss = 1.13052092
Iteration 21, loss = 1.12576965
Iteration 22, loss = 1.12121016
Iteration 23, loss = 1.11683755
Iteration 24, loss = 1.11263808
Iteration 25, loss = 1.10859768
Iteration 26, loss = 1.10470938
Iteration 27, loss = 1.10096264
Iteration 28, loss = 1.09734956
Iteration 29, loss = 1.09386120
Iteration 30, loss = 1.09049488
Iteration 31, loss = 1.08724163
Iteration 32, los



In [None]:
# Hidden Layer Sizes
hidden_layer_sizes = [(100),(100,100),(100,100,100),(50),(50,50),(50,50,50)]
#Solver
solver = ['sgd', 'adam']
# Maximum number of levels in tree
# max_depth = [2,4]
batch_size = [128,256]
#alpha = [0.1,0.01,0.001]
learning_rate = ['adaptive']

learning_rate_init = [0.01,0.001,0.0001]
# Minimum number of samples required to split a node
# min_samples_split = [2, 5]
# Minimum number of samples required at each leaf node
# min_samples_leaf = [1, 2]

# Method of selecting samples for training each tree
#bootstrap = [True, False]

param_grid = {'hidden_layer_sizes':hidden_layer_sizes,
               'solver': solver,
                'batch_size': batch_size,
                'learning_rate': learning_rate,
     #            'alpha': alpha,
               'learning_rate_init': learning_rate_init}

MLPC_Model =MLPClassifier()

MLPC_Grid = GridSearchCV(estimator = MLPC_Model, param_grid = param_grid, cv = 5, verbose=2, n_jobs = 4)

MLPC_Grid.fit(X_train, y_train)

MLPC_Grid.best_params_

In [None]:
print (f'Train Accuracy - : {MLPC_Grid.score(X_train,y_train):.3f}')
print (f'Test Accuracy - : {MLPC_Grid.score(X_test,y_test):.3f}')

In [None]:
MLPC_Grid.cv_results_