In [None]:
import numpy as np #handling data
import pandas as pd #handling data

import seaborn as sns #Data Visualisation
import matplotlib.pyplot as plt #Data Visualisation

import warnings # filter warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import RandomizedSearchCV #import GridSearch from sklearn

In [None]:
df_train = pd.read_csv('datasets/train.csv') # load the train dataset
df_train.head() #show first 5 rows


In [None]:
df_test = pd.read_csv('datasets/test.csv') # load the test dataset
df_test.head() #show first 5 rows

In [None]:
df_train.info() #training dataset info

In [None]:
plt.figure(figsize = (20,20))
sns.heatmap(df_train.corr(), annot = True)

In [None]:
df_test.info() #testing dataset info

In [None]:
df_train.columns

In [None]:
df_test.columns

In [None]:
df_train.drop(['Unnamed: 0',"id", 'Gate location','Food and drink','Seat comfort', 'Inflight entertainment', 'On-board service', 'Leg room service', 'Checkin service', 'Inflight service', 'Cleanliness' ],axis=1, inplace=True) # Drop Columns in training set


In [None]:
df_test.drop(['Unnamed: 0',"id", 'Gate location','Food and drink','Seat comfort', 'Inflight entertainment', 'On-board service', 'Leg room service', 'Checkin service', 'Inflight service', 'Cleanliness' ],axis=1, inplace=True) # Drop Columns in testing set


In [None]:
df_train.isnull().sum() #check for null values


In [None]:
df_test.isnull().sum() #check for null values


In [None]:
df_train.describe() #statistics of dataset


In [None]:
df_train.fillna(15.178678,inplace=True) #Fill all null values with mean of 'Arrival Delay in Minutes' column

In [None]:
df_train.isnull().sum() #check to find null values

In [None]:
df_train.shape # number of rows and columns training set


In [None]:
df_test.dropna(inplace=True) # Drop rows with null values
df_test.shape #number of rows and columns

In [None]:
df_test.reset_index(drop=True,inplace=True) #Re-index test dataset
df_test #current dataset

In [None]:
df_train.duplicated().sum() #check for duplicate values in training set 

In [None]:
df_train.drop_duplicates() #drop duplicate values in training set 


In [None]:
df_test.duplicated().sum() #checks for duplicate values in testing set


In [None]:
df_test.drop_duplicates() #drop duplicate values in testing set

EXPLORATORY DATA ANALYSIS:

In [None]:
## Visualisation of the correlation
#heatmap representing the correlation betweeen each of the features (training)
plt.figure(figsize = (10,10))
sns.heatmap(df_train.corr(), annot = True)

In [None]:
## Visualisation of the correlation
#heatmap representing the correlation betweeen each of the features (training)
plt.figure(figsize = (10,10))
sns.heatmap(df_test.corr(), annot = True)

In [None]:
df_train.shape

In [None]:
df_test.shape

In [None]:
#Satisfaction Training
print(df_train.satisfaction.value_counts())
plt.figure(figsize = (15, 6))
plt.subplot(121)
plt.title("In train data")
sns.countplot(data = df_train, x = "satisfaction" , palette = "rainbow", order = ["satisfied","neutral or dissatisfied"])

In [None]:
#Satisfaction Testing
print(df_test.satisfaction.value_counts())
plt.figure(figsize = (15, 6))
plt.subplot(121)
plt.title("In train data")
sns.countplot(data = df_train, x = "satisfaction" , palette = "rainbow", order = ["satisfied","neutral or dissatisfied"])

In [None]:
#Gender Training 
print(df_train.Gender.value_counts())
plt.figure(figsize = (15, 6))
plt.subplot(121)
plt.title("In test data")
sns.countplot(data = df_test, x = "Gender" , palette = "rainbow", order = ["Male","Female"])

In [None]:
#Gender Training
print(df_train.Gender.value_counts())
plt.figure(figsize = (15, 6))
plt.subplot(121)
plt.title("In train data")
sns.countplot(data = df_train, x = "Gender" , palette = "rainbow", order = ["Male","Female"])

In [None]:
#Gender Testing 
print(df_test.Gender.value_counts())
plt.figure(figsize = (15, 6))
plt.subplot(122)
plt.title("In test data")
sns.countplot(data = df_test, x = "Gender" , palette = "rainbow", order = ["Male","Female"])

In [None]:
plt.figure(figsize=(15,8))
plt.subplot(1,2,1)
plt.title('In training set')
sns.countplot('Class',data=df_train,hue='Customer Type',palette=['Black', 'Blue'])
plt.legend(loc='upper right')
plt.subplot(1,2,2)
plt.title('In testing set')
sns.countplot('Class',data=df_test,hue='Customer Type',palette=['Black', 'Blue'])
plt.legend(loc='upper right')


In [None]:
# Categorical plots onto a FacetGrid (Training)
graph_train = sns.catplot("Age", data=df_train, aspect=5.0, kind='count', order=range(6, 86), palette=['Red', 'Black'], hue='satisfaction' )
graph_train.set_ylabels('Satisfaction of Flight Passengers')

# Categorical plots onto a FacetGrid (Testing)
graph_test = sns.catplot("Age", data=df_test, aspect=5.0, kind='count', order=range(6, 86), palette=['Black', 'Red'], hue='satisfaction')
graph_test.set_ylabels('Satisfaction of Flight Passengers')

In [None]:
#sns.boxplot(y = k, data = df_train["Flight Distance","Age"], , ax = axs[idx]) 
df_train.boxplot()

OneHotEncoder - To convert The Catagorical data in Features (Training dataset)

In [None]:
#OneHotEncoder to convert the catagorical variables to numeric values (training set) 
from sklearn.preprocessing import OneHotEncoder # Import OneHotEncoder
oh = OneHotEncoder(drop='first', dtype=np.int64) #Instance #drop first column # only need n-1 columns
dfn = df_train[[ 'Gender', 'Customer Type', 'Type of Travel', 'Class', 'Inflight wifi service', 
                'Departure/Arrival time convenient', 'Ease of Online booking',  'Online boarding',  'Baggage handling']] #take a subset of df_train
dfn = oh.fit_transform(dfn).toarray() #fit_transform on subframe to make it a sparse matrix
dfn = pd.DataFrame(dfn) #converts matrix to dataframe
dfn

In [None]:
df_train = pd.concat([df_train, dfn], axis=1) #concating the features
df_train

In [None]:
df_train.drop([ 'Gender', 'Customer Type', 'Type of Travel', 'Class', 'Inflight wifi service', 'Departure/Arrival time convenient', 'Ease of Online booking',  'Online boarding',  'Baggage handling'],axis=1,inplace=True) #dropping converted columns

In [None]:
df_train #current training dataset

OneHotEncoder - To convert The Catagorical data in Features (Testing dataset)

In [None]:
#OneHotEncoder to convert the catagorical variables to numeric values (testing set)
from sklearn.preprocessing import OneHotEncoder # Import OneHotEncoder
oh = OneHotEncoder(drop='first', dtype=np.int64)  #Instance #drop first column # only need n-1 columns
dfn = df_test[[ 'Gender', 'Customer Type', 'Type of Travel', 'Class', 'Inflight wifi service', 
                'Departure/Arrival time convenient', 'Ease of Online booking',  'Online boarding',  'Baggage handling']] #take a subset of df_train
dfn = oh.fit_transform(dfn).toarray() #fit_transform on subframe to make it a sparse matrix
dfn = pd.DataFrame(dfn) #converts matrix to dataframe
dfn

In [None]:
df_test = pd.concat([df_test, dfn], axis=1) #concating the features
df_test

In [None]:
df_test.drop([ 'Gender', 'Customer Type', 'Type of Travel', 'Class', 'Inflight wifi service', 'Departure/Arrival time convenient', 'Ease of Online booking',  'Online boarding',  'Baggage handling'],axis=1,inplace=True) #dropping the original columns

In [None]:
df_test #current training dataset

Get Dummies Training

In [None]:
df_train['satisfaction'].value_counts() #number of values for each catagory before

In [None]:
#Get dummies - Training
satisfied = pd.get_dummies(df_train["satisfaction"],drop_first=True) #converting categorical features
satisfied 

In [None]:
df_train = pd.concat([df_train, satisfied], axis=1) #concating the categorical features
df_train

In [None]:
df_train.drop(['satisfaction'],axis=1,inplace=True) #dropping the original column
df_train

In [None]:
df_train['satisfied'].value_counts() #number of values for each catagory after

In [None]:
df_train

GET Dummies - Testing

In [None]:
df_test['satisfaction'].value_counts() #number of values for each catagory before

In [None]:
#Get dummies - Testing
satisfied = pd.get_dummies(df_test["satisfaction"],drop_first=True) #converting catagorical features
satisfied

In [None]:
df_test = pd.concat([df_test, satisfied], axis=1) #concating the categorical features
df_test

In [None]:
df_test.drop(['satisfaction'],axis=1,inplace=True) #dropping the original column
df_test

In [None]:
df_test['satisfied'].value_counts() #number of values for each catagory after

In [None]:
df_test

In [None]:
#training data
X_train = df_train.iloc[:,:-1] #all rows and columns except final column
y_train = df_train.iloc[:, -1] #all rows but only of final column
#testing data
X_test= df_test.iloc[:,:-1] #all rows and columns expect final column
y_test =df_test.iloc[:, -1] #all rows but only of final column

In [None]:
X_train

In [None]:
y_train

In [None]:
#heatmap representing the correlation betweeen each of the features (training)
plt.figure(figsize = (10,10))
sns.heatmap(df_train.corr(), annot = True)

In [None]:
#heatmap representing the correlation betweeen each of the features (testing)
plt.figure(figsize = (10,10))
sns.heatmap(df_test.corr(), annot = True)

In [None]:
# Rows and Columns in each instance
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

### FEATURE SCALING METHOD:

FEATURE SCALING ROBUST SCALER

In [None]:
from sklearn.preprocessing import RobustScaler

sc = RobustScaler() #instance
X_train_Rscaled = sc.fit_transform(X_train) # fit and transform the training features
#we only scale the features-X not the labels-y
X_test_Rscaled = sc.transform(X_test)  # not fit this method #transform the testing features - to make sure the same scaling is applied to both testing and training

FEATURE SCALING MINMAX SCALER

In [None]:
from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler() #instance
X_train_MMscaled = sc.fit_transform(X_train) # fit and transform the training features
#we only scale the features-X not the labels-y
X_test_MMscaled = sc.transform(X_test)  # not fit this method #transform the testing features - to make sure the same scaling is applied to both testing and training

### XGBOOST Algorithm (1):

In [None]:
from xgboost import XGBClassifier
#function for training the algorithm with training data
# 
# output: Algorithm Accuracy of testing and training
def fitAndTest(X_train, X_test, y_train, y_test):
    model = XGBClassifier()
    
    model.fit(X_train,y_train)
    
    accuracy = model.score(X_train, y_train)
    print("Training Score: {:.3f}".format(accuracy))
    
    model.fit(X_test, y_test)
    accuracy = model.score(X_test, y_test)
    print("Testing Score: {:.3f}".format(accuracy))


#### TRAINING AND TESTING ACCURACY:

In [None]:
#Accuracy before scaling - XGBoost
print("Training and Testing Score without feature scaling: ")
fitAndTest(X_train, X_test, y_train, y_test)

### Evaluation of XGBoost:

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
model = XGBClassifier()
model.fit(X_train,y_train)

#### Classification Report

In [None]:
y_test_pred = model.predict(X_test)
print(classification_report(y_test, y_test_pred))

#### Confusion Matrix

In [None]:
def cfm_matrix(label,feature):
    
    plt.figure(figsize = (10, 5))
    sns.heatmap(confusion_matrix(label,feature), annot = True, cmap = "Greens", fmt = ".0f",
                 xticklabels = ['satisfied', 'not satisfied'], yticklabels = ['satisfied','not satisfied'])
    plt.xlabel("Actual values")
    plt.ylabel("Predicted values")
    plt.show()

In [None]:
cfm_matrix(y_test, y_test_pred)

#### Cross Validation

In [None]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model,X_train,y_train,cv=5, scoring="accuracy")
print("Cross validaion scores: ")
print(scores)

In [None]:
XGB_average = scores.mean()
print("average model accuracy: {:.3f}".format(XGB_average))

### Model Tunning 

In [None]:
model = XGBClassifier() #instance for algorithm
print(model.get_params().keys()) #print parameters in the algorithm

In [None]:
# Parameters for model tunning
params_xgboost = {
    
    "n_estimators": [5, 10, 15, 20, 25],
    "eval_metric": ['logloss'],
    "learning_rate": [0.2, 0.4, 0.6, 0.8, 1],
    "max_depth": [2, 4, 6, 8, 10],
}

print(params_xgboost)

#### GRIDSEARCH for XGBoost

In [None]:
from sklearn.model_selection import GridSearchCV #import GridSearch from sklearn
xgboost_grid = GridSearchCV(model,params_xgboost, cv = 5, scoring = 'accuracy',return_train_score=False)

xgboost_grid.fit(X_train,y_train) #training the algorithm
pd.DataFrame(xgboost_grid.cv_results_)[['mean_test_score', 'params']] 
print("Best model accuracy: {:3f}".format(xgboost_grid.best_score_)) #best model accuracy
print("Best HyperParameter Values: ", xgboost_grid.best_params_) #best Hyperparameters values

#### RANDOMSEARCH for XGBoost

In [None]:
from sklearn.model_selection import RandomizedSearchCV #import GridSearch from sklearn
xgboost_rand = RandomizedSearchCV(model, params_xgboost, cv=5, scoring='accuracy', n_iter=10, random_state=0, return_train_score=False)
xgboost_rand.fit(X_train, y_train) #training the algorithm
pd.DataFrame(xgboost_rand.cv_results_)[['mean_test_score', 'params']]
print("Best model accuracy: {:3f}".format(xgboost_rand.best_score_)) #best model accuracy
print("Best HyperParameter Values: ", xgboost_rand.best_params_) #best Hyperparameters values

### Evaluation with Hyperparameters XGBoost
 

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
model = XGBClassifier(eval_metric= 'logloss', learning_rate = 0.4, max_depth = 8, n_estimators = 25)
model.fit(X_train,y_train)

#### Classification Report

In [None]:
y_test_pred = model.predict(X_test)
print(classification_report(y_test, y_test_pred))

#### Confusion Matrix

In [None]:
def cfm_matrix(label,feature):
    
    plt.figure(figsize = (10, 5))
    sns.heatmap(confusion_matrix(label,feature), annot = True, cmap = "Greens", fmt = ".0f",
                 xticklabels = ['satisfied', 'not satisfied'], yticklabels = ['satisfied','not satisfied'])
    plt.xlabel("Actual values")
    plt.ylabel("Predicted values")
    plt.show()

In [None]:
cfm_matrix(y_test, y_test_pred)


#### Cross Validation

In [None]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model,X_train,y_train,cv=5, scoring="accuracy")
print(scores)

In [None]:
XGB_average = scores.mean()
print("average model accuracy: {:.3f}".format(XGB_average))

### LOGISTIC REGRESSION Algorithm (2)

In [None]:
from sklearn.linear_model import LogisticRegression

#function for training the algorithm with training data
# 
# output: Algorithm Accuracy of testing and training

def fitAndTest(X_train, X_test, y_train, y_test):
    model = LogisticRegression()
    
    model.fit(X_train,y_train)

    y_test_pred = model.predict(X_test) #Prediction calculation

    print("Predicted values: ",y_test_pred)
    print("Actual values: ",y_test)

    accuracy = model.score(X_train, y_train)
    print("Training Score: {:.3f}".format(accuracy))
    
    model.fit(X_test, y_test)
    accuracy = model.score(X_test, y_test)
    print("Testing Score: {:.3f}".format(accuracy))


### TRAINING AND TESTING ACCURACY:

In [None]:
#Accuracy before scaling - LogisticRegression 
print("Before Feature Scaling")
fitAndTest(X_train, X_test, y_train, y_test)

In [None]:
#Accuracy after scaling - LogisticRegression
print("After Feature Scaling")
fitAndTest(X_train_MMscaled, X_test_MMscaled, y_train, y_test)

### Evaluation of Logistic Regression: 

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
model = LogisticRegression() #Using the hyperparameters suggest by 'GridSearch'
model.fit(X_train,y_train)


#### Classification Report

In [None]:
y_test_pred = model.predict(X_test)
print(classification_report(y_test, y_test_pred)) #prints the classification report

#### Confusion Matrix

In [None]:
def cfm_matrix(label,feature):
    
    plt.figure(figsize = (10, 5))
    sns.heatmap(confusion_matrix(label,feature), annot = True, cmap = "Greens", fmt = ".0f",
                 xticklabels = ['satisfied', 'not satisfied'], yticklabels = ['satisfied','not satisfied'])
    plt.xlabel("Actual values")
    plt.ylabel("Predicted values")
    plt.show()

In [None]:
cfm_matrix(y_test, y_test_pred)

#### Cross Validation

In [None]:
from sklearn.model_selection import cross_val_score
model = LogisticRegression()
scores = cross_val_score(model,X_train,y_train,cv=5, scoring="accuracy")
print(scores)

In [None]:
lr_average = scores.mean()
print("average model accuracy: {:.3f}".format(lr_average))

### Model Tunning

In [None]:
model = LogisticRegression()
print(model.get_params().keys())

In [None]:
params_logistic = {
    'solver' : ['newton-cg', 'lbfgs', 'liblinear'],
    'penalty' : ['l1'],
    'max_iter': list(range(1,31)),
    'C' : [ 0.01, 0.1, 1.0, 10, 100 ]   
}

print(params_logistic)

#### GRIDSEARCH for Logistic Regression

In [None]:
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(model,params_logistic,cv=5,scoring="accuracy",return_train_score=False)
grid.fit(X_train,y_train)
pd.DataFrame(grid.cv_results_)[['mean_test_score', 'params']]
print("Best model accuracy: {:3f}".format(grid.best_score_))
print("Best HyperParameter Values: ", grid.best_params_)

#### RANDOMSEARCH for Logisitc Regression

In [None]:
from sklearn.model_selection import RandomizedSearchCV
rand = RandomizedSearchCV(model,params_logistic,cv=5,random_state=0, n_iter=10, scoring="accuracy",return_train_score=False)
rand.fit(X_train,y_train)
pd.DataFrame(rand.cv_results_)[['mean_test_score', 'params']]
print("Best model accuracy: {:3f}".format(rand.best_score_))
print("Best HyperParameter Values: ", rand.best_params_)

### Evaluation with Hyperparameters

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
model = LogisticRegression(C= 0.1, max_iter= 30, penalty= 'l1', solver= 'liblinear') #Using the hyperparameters suggest by 'GridSearch'
model.fit(X_train,y_train)

#### Classification Report

In [None]:
y_test_pred = model.predict(X_test)
print(classification_report(y_test, y_test_pred)) #prints the classification report

#### Confusion Matrix

In [None]:
def cfm_matrix(label,feature):
    
    plt.figure(figsize = (10, 5))
    sns.heatmap(confusion_matrix(label,feature), annot = True, cmap = "Greens", fmt = ".0f",
                 xticklabels = ['satisfied', 'not satisfied'], yticklabels = ['satisfied','not satisfied'])
    plt.xlabel("Actual values")
    plt.ylabel("Predicted values")
    plt.show()

In [None]:
cfm_matrix(y_test, y_test_pred)

#### Cross Validation

In [None]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(model,X_train,y_train,cv=5, scoring="accuracy")
print(scores)

In [None]:
lr_average = scores.mean()
print("average model accuracy: {:.3f}".format(lr_average))

### MLP CLASSIFIER - Algorithm 3

In [None]:
from sklearn.neural_network import MLPClassifier

def fitAndTest(X_train, X_test, y_train, y_test):
#function for training the algorithm with training data
# 
# output: Algorithm Accuracy of testing and training
    model = MLPClassifier(random_state = 0)
    model.fit(X_train, y_train)
    
    train_accuracy = model.score(X_train, y_train)
    print("Training Accuracy: {:.3f}".format(train_accuracy))

    test_accuracy = model.score(X_test, y_test)
    print("Testing Accuracy: {:.3f}".format(test_accuracy))


### TRAINING AND TESTING ACCURACY:

In [None]:
#Accuracy before scaling - MLP
print("Before Feature Scaling")
fitAndTest(X_train, X_test, y_train, y_test)

In [None]:
#Accuracy after scaling - MLP
print("After Feature Scaling")
fitAndTest(X_train_Rscaled, X_test_Rscaled, y_train, y_test)

### Evaluation for MLPClassifier

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
model = MLPClassifier()
model.fit(X_train,y_train)


#### Classifcation Report

In [None]:
y_test_pred = model.predict(X_test)
print(classification_report(y_test, y_test_pred))

#### Confusion Matrix

In [None]:
def cfm_matrix(label,feature):
    
    plt.figure(figsize = (10, 5))
    sns.heatmap(confusion_matrix(label,feature), annot = True, cmap = "Reds", fmt = ".0f", xticklabels = ['satisfied', 'not satisfied'], yticklabels = ['satisfied','not satisfied'])
    plt.xlabel("Actual values")
    plt.ylabel("Predicted values")
    plt.show()

In [None]:
cfm_matrix(y_test, y_test_pred)

#### Cross Validation

In [None]:
#Cross validation for MLP:
#to calculate average model accuracy

from sklearn.model_selection import cross_val_score
model = MLPClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy') #to store the value of each fold
print(scores)
scores_train = scores.mean()
print("{:.3f}".format(scores_train))

In [None]:
average_scores_train = scores.mean()
print("Average model accuracy {:.3f}".format(average_scores_train))

### MODEL Tunning

In [None]:
model = MLPClassifier(max_iter=30)
print(model.get_params().keys())

In [None]:
parameters = {
    'learning_rate_init' : [10, 1, 0.1, 0.01, 0.001],
    'activation' : ['identity','logistic','tanh','relu'],
    "hidden_layer_sizes": [(1,),(10,),(15,),(20,),(30,)]

}
params_grid = parameters
print(params_grid)

#### GRID SEARCH FOR MLPClassifier

In [None]:
from sklearn.model_selection import GridSearchCV
#MLP - Grid Search CV
MLP_grid = GridSearchCV(model,params_grid,cv=5,scoring="accuracy",return_train_score=False)
MLP_grid.fit(X_train,y_train)

In [None]:
pd.DataFrame(MLP_grid.cv_results_)[["mean_test_score","params"]]

In [None]:
print("The best model's accuracy: {:.3f}".format(MLP_grid.best_score_))
print("Used values:",MLP_grid.best_params_)

#### RANDOM SEARCH FOR MLPClassifier

In [None]:
from sklearn.model_selection import RandomizedSearchCV
#MLP - RANDOM Search CV
rand = RandomizedSearchCV(model,params_grid,cv=5,scoring="accuracy",return_train_score=False, n_iter=10,random_state=0)
rand.fit(X_train,y_train)
pd.DataFrame(rand.cv_results_)[["mean_test_score","params"]]
print("The best model's accuracy: {:.3f}".format(rand.best_score_))
print("Used values:",rand.best_params_)

In [None]:
matrix=df_train.corr().round(2)
sns.set(rc={'figure.figsize': (15,10)})
sns.heatmap(data=matrix, annot=True)

### Evaluation with hyperparameters

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
model = MLPClassifier(activation = 'logistic', hidden_layer_sizes = (30,), learning_rate_init = 0.001) #Using the hyperparameters suggest by ''
model.fit(X_train,y_train)

#### Classification Report

In [None]:
y_test_pred = model.predict(X_test)
print(classification_report(y_test, y_test_pred))

#### Confusion Matrix

In [None]:
def cfm_matrix(label,feature):
    
    plt.figure(figsize = (10, 5))
    sns.heatmap(confusion_matrix(label,feature), annot = True, cmap = "Reds", fmt = ".0f", xticklabels = ['satisfied', 'not satisfied'], yticklabels = ['satisfied','not satisfied'])
    plt.xlabel("Actual values")
    plt.ylabel("Predicted values")
    plt.show()

In [None]:
cfm_matrix(y_test, y_test_pred)

#### Cross Validaton

In [None]:
#Cross validation for MLP:
#to calculate average model accuracy

from sklearn.model_selection import cross_val_score
model = MLPClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy') #to store the value of each fold
print(scores)
scores_train = scores.mean()
print("{:.3f}".format(scores_train))

In [None]:
average_scores_train = scores.mean()
print("Average model accuracy {:.3f}".format(average_scores_train))