# Artificial Neural Network

### Importing the libraries

In [1]:
from sklearn.neural_network import MLPClassifier
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

## Part 1 - Data Preprocessing

### Importing the dataset

In [2]:
dataset = pd.read_csv('interviews_episodes.csv')
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

In [3]:
print(X)

[[   0    1    1 ...    0   90 1470]
 [  -1   -1    2 ...   24  120 1790]
 [   0   -1    2 ...   24   90 1545]
 ...
 [   0    0    1 ...   17   90  300]
 [  -1   -1    2 ...   26  120 1710]
 [   0   -1    1 ...   25   90 1500]]


In [4]:
print(y)

['N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'M' 'N' 'N' 'N' 'N' 'N' 'N'
 'N' 'N' 'M' 'M' 'N' 'M' 'N' 'N' 'M' 'M' 'N' 'N' 'N' 'M' 'N' 'N' 'N' 'N'
 'N' 'M' 'M' 'N' 'M' 'N' 'N' 'N' 'N' 'N' 'M' 'N' 'N' 'N' 'M' 'N' 'N' 'M'
 'M' 'N' 'N' 'N' 'M' 'N' 'N' 'N' 'M' 'M' 'N' 'N' 'M' 'N' 'N' 'N' 'M' 'N'
 'N' 'M' 'N' 'N' 'N' 'M' 'M' 'N' 'N' 'M' 'N' 'N' 'N' 'N' 'M' 'D' 'N' 'D'
 'N' 'D' 'D' 'D' 'N' 'N' 'D' 'D' 'N' 'D' 'D' 'N' 'N' 'D' 'D' 'D' 'N' 'N'
 'D' 'D' 'D' 'D' 'N' 'N' 'D' 'D' 'N' 'N' 'D' 'N' 'D' 'D' 'N' 'N' 'N' 'N'
 'N' 'N' 'D' 'N' 'N' 'N' 'D' 'D' 'N' 'D' 'D' 'D' 'N' 'N' 'N' 'D' 'N' 'D'
 'D' 'D' 'D' 'N' 'D' 'N' 'D' 'N' 'D' 'D' 'N' 'D' 'D' 'D' 'N' 'N' 'N' 'D'
 'D' 'D' 'D' 'N' 'D' 'D' 'D' 'D' 'D' 'N' 'N' 'D' 'N' 'D' 'N' 'D' 'N' 'N'
 'N' 'D' 'N' 'D' 'N' 'N' 'D' 'N' 'N' 'N' 'D' 'N' 'D' 'N' 'D' 'N' 'N' 'N'
 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N'
 'N' 'N' 'N' 'D' 'D' 'N' 'N' 'D' 'D' 'N' 'N' 'D' 'N' 'D' 'N' 'N' 'D' 'D'
 'N' 'N' 'N' 'D' 'N' 'N' 'D' 'D' 'N' 'N' 'N' 'N' 'D

### Encoding categorical data

Label Encoding the "Gender" column

In [5]:
y= np.array(y.reshape(len(y),1))

In [6]:
print(y)

[['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['M']
 ['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['M']
 ['M']
 ['N']
 ['M']
 ['N']
 ['N']
 ['M']
 ['M']
 ['N']
 ['N']
 ['N']
 ['M']
 ['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['M']
 ['M']
 ['N']
 ['M']
 ['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['M']
 ['N']
 ['N']
 ['N']
 ['M']
 ['N']
 ['N']
 ['M']
 ['M']
 ['N']
 ['N']
 ['N']
 ['M']
 ['N']
 ['N']
 ['N']
 ['M']
 ['M']
 ['N']
 ['N']
 ['M']
 ['N']
 ['N']
 ['N']
 ['M']
 ['N']
 ['N']
 ['M']
 ['N']
 ['N']
 ['N']
 ['M']
 ['M']
 ['N']
 ['N']
 ['M']
 ['N']
 ['N']
 ['N']
 ['N']
 ['M']
 ['D']
 ['N']
 ['D']
 ['N']
 ['D']
 ['D']
 ['D']
 ['N']
 ['N']
 ['D']
 ['D']
 ['N']
 ['D']
 ['D']
 ['N']
 ['N']
 ['D']
 ['D']
 ['D']
 ['N']
 ['N']
 ['D']
 ['D']
 ['D']
 ['D']
 ['N']
 ['N']
 ['D']
 ['D']
 ['N']
 ['N']
 ['D']
 ['N']
 ['D']
 ['D']
 ['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['N']
 ['D']
 ['N']
 ['N']
 ['N']
 ['D']
 ['D']
 ['N']
 ['D']
 ['D']
 ['D']
 ['N']
 ['N']
 ['N']
 ['D']
 ['N']

One Hot Encoding the Y column

In [7]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])])
y = np.array(ct.fit_transform(y))

In [8]:
y

array([[0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       ...,
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.]])

### Splitting the dataset into the Training set and Test set

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [10]:
X_train

array([[  -1,    0,    2, ...,   34,   90, 1500],
       [  -1,   -1,    2, ...,   26,  120, 1515],
       [  -2,   -2,    1, ...,    0,   90, 1400],
       ...,
       [  -1,   -1,    3, ...,    0,   90, 1400],
       [   0,    0,    2, ...,   27,  150, 1870],
       [  -1,   -1,    1, ...,    0,   90, 1670]])

### Feature Scaling

In [11]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:
print(X_train)

[[-0.35725437  0.80699977  0.44180447 ...  1.21397781 -0.15111299
   0.2553038 ]
 [-0.35725437 -0.18874935  0.44180447 ...  0.59248849  0.44617612
   0.29329223]
 [-1.33970389 -1.18449847 -0.97690927 ... -1.42735181 -0.15111299
   0.0020476 ]
 ...
 [-0.35725437 -0.18874935  1.86051822 ... -1.42735181 -0.15111299
   0.0020476 ]
 [ 0.62519515  0.80699977  0.44180447 ...  0.67017466  1.04346522
   1.19235175]
 [-0.35725437 -0.18874935 -0.97690927 ... -1.42735181 -0.15111299
   0.68583935]]


In [13]:
y_train

array([[1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       ...,
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.]])

## Part 2 - Building the MLP

### Initializing the MLP

In [14]:
clf = MLPClassifier(hidden_layer_sizes=(81,27,9,3),activation="relu",solver='adam', random_state=1)

## Part 3 - Training the MLP

### Training the MLP on the Training set

In [15]:
clf.fit(X_train, y_train)



## Part 4 - Making the predictions and evaluating the model

### Predicting the Test set results

In [16]:
ytest_prediction=clf.predict(X_test)

In [17]:
clf.score(X_test, y_test)

0.7769230769230769

In [18]:
accuracy_score(y_test, ytest_prediction)

0.7769230769230769

In [19]:
from sklearn.metrics import f1_score
f1_score(y_test, ytest_prediction, average='micro')

0.7769230769230768