# Artificial Neural Network

### Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
tf.__version__

'2.12.0'

## Part 1 - Data Preprocessing

### Importing the dataset

In [3]:
dataset = pd.read_csv('interviews_episodes.csv')
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

In [4]:
print(X)

[[   0    1    1 ...    0   90 1470]
 [  -1   -1    2 ...   24  120 1790]
 [   0   -1    2 ...   24   90 1545]
 ...
 [   0    0    1 ...   17   90  300]
 [  -1   -1    2 ...   26  120 1710]
 [   0   -1    1 ...   25   90 1500]]


In [5]:
print(y)

['N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'M' 'N' 'N' 'N' 'N' 'N' 'N'
 'N' 'N' 'M' 'M' 'N' 'M' 'N' 'N' 'M' 'M' 'N' 'N' 'N' 'M' 'N' 'N' 'N' 'N'
 'N' 'M' 'M' 'N' 'M' 'N' 'N' 'N' 'N' 'N' 'M' 'N' 'N' 'N' 'M' 'N' 'N' 'M'
 'M' 'N' 'N' 'N' 'M' 'N' 'N' 'N' 'M' 'M' 'N' 'N' 'M' 'N' 'N' 'N' 'M' 'N'
 'N' 'M' 'N' 'N' 'N' 'M' 'M' 'N' 'N' 'M' 'N' 'N' 'N' 'N' 'M' 'D' 'N' 'D'
 'N' 'D' 'D' 'D' 'N' 'N' 'D' 'D' 'N' 'D' 'D' 'N' 'N' 'D' 'D' 'D' 'N' 'N'
 'D' 'D' 'D' 'D' 'N' 'N' 'D' 'D' 'N' 'N' 'D' 'N' 'D' 'D' 'N' 'N' 'N' 'N'
 'N' 'N' 'D' 'N' 'N' 'N' 'D' 'D' 'N' 'D' 'D' 'D' 'N' 'N' 'N' 'D' 'N' 'D'
 'D' 'D' 'D' 'N' 'D' 'N' 'D' 'N' 'D' 'D' 'N' 'D' 'D' 'D' 'N' 'N' 'N' 'D'
 'D' 'D' 'D' 'N' 'D' 'D' 'D' 'D' 'D' 'N' 'N' 'D' 'N' 'D' 'N' 'D' 'N' 'N'
 'N' 'D' 'N' 'D' 'N' 'N' 'D' 'N' 'N' 'N' 'D' 'N' 'D' 'N' 'D' 'N' 'N' 'N'
 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N'
 'N' 'N' 'N' 'D' 'D' 'N' 'N' 'D' 'D' 'N' 'N' 'D' 'N' 'D' 'N' 'N' 'D' 'D'
 'N' 'N' 'N' 'D' 'N' 'N' 'D' 'D' 'N' 'N' 'N' 'N' 'D

### Encoding categorical data

Label Encoding the "Gender" column

In [6]:
y= np.array(y.reshape(len(y),1))

In [7]:
#print(y)

One Hot Encoding the Y column

In [8]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])])
y = np.array(ct.fit_transform(y))

In [9]:
y

array([[0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       ...,
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.]])

### Splitting the dataset into the Training set and Test set

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [11]:
X_train

array([[  -1,    0,    2, ...,   34,   90, 1500],
       [  -1,   -1,    2, ...,   26,  120, 1515],
       [  -2,   -2,    1, ...,    0,   90, 1400],
       ...,
       [  -1,   -1,    3, ...,    0,   90, 1400],
       [   0,    0,    2, ...,   27,  150, 1870],
       [  -1,   -1,    1, ...,    0,   90, 1670]])

In [12]:

print(X_train[0].shape)
print(X_train[0])

(9,)
[  -1    0    2    1    2    1   34   90 1500]


### Feature Scaling

In [13]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [14]:
print(X_train.shape)

(517, 9)


In [15]:
sample_size = X_train.shape[0] # number of samples in train set
time_steps  = X_train.shape[1] # number of features in train set
input_dimension = 1            # each feature is represented by 1 number
X_train_reshaped = X_train.reshape(sample_size,time_steps,input_dimension)

In [16]:
print(X_train_reshaped.shape)

(517, 9, 1)


In [17]:
X_test_reshaped = X_test.reshape(X_test.shape[0],X_test.shape[1],1)

## Part 2 - Building the 1D-CNN

### Initializing the ANN

In [18]:
n_timesteps = X_train_reshaped.shape[1]
n_features  = X_train_reshaped.shape[2]
model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=(n_timesteps,n_features)))
model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(tf.keras.layers.Conv1D(filters=32, kernel_size=2, activation='relu'))
model.add(tf.keras.layers.Conv1D(filters=16, kernel_size=2, activation='relu'))
model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(96, activation='relu'))
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(3, activation='softmax'))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

## Part 3 - Training the ANN

### Compiling the ANN

In [19]:
model.fit(X_train_reshaped, y_train, batch_size = 5, epochs = 200)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x7a011410dfc0>

### Training the ANN on the Training set

In [20]:
model.evaluate(X_test_reshaped,y_test,batch_size = 5)



[1.863640546798706, 0.7076923251152039]

## Part 4 - Making the predictions and evaluating the model

### Predicting the Test set results

In [21]:
y_pred = model.predict(X_test_reshaped)



In [22]:
y_pred

array([[5.79334330e-04, 7.62766123e-01, 2.36654520e-01],
       [2.27581893e-07, 6.65589823e-08, 9.99999702e-01],
       [2.61679292e-01, 2.63337314e-01, 4.74983335e-01],
       [4.62476737e-06, 2.93786627e-07, 9.99995053e-01],
       [9.83197212e-01, 8.91554446e-05, 1.67134814e-02],
       [8.37182552e-02, 4.44232821e-01, 4.72048879e-01],
       [8.37182552e-02, 4.44232821e-01, 4.72048879e-01],
       [4.49000581e-05, 1.62196258e-07, 9.99954879e-01],
       [3.51735540e-02, 3.50222141e-02, 9.29804265e-01],
       [2.10798770e-25, 1.84956685e-16, 9.99999940e-01],
       [7.66250014e-01, 7.17733842e-14, 2.33749986e-01],
       [7.12620258e-01, 3.81000355e-05, 2.87341565e-01],
       [2.29715843e-05, 5.36702913e-08, 9.99976933e-01],
       [1.68941606e-05, 1.43901346e-07, 9.99982893e-01],
       [2.89135824e-06, 1.44073978e-12, 9.99997079e-01],
       [2.47077423e-06, 1.17976753e-10, 9.99997437e-01],
       [9.99944866e-01, 3.72121415e-13, 5.51282137e-05],
       [1.23062127e-04, 2.74816

In [23]:
ytest_predictionRounded= []
y_predEncoded = []


In [24]:
def round(predicted,rounded):
  for i in range(len(predicted)):
    if predicted[i,0] >  predicted[i,1] and predicted[i,0] >  predicted[i,2]:
      rounded.append([1.,0.,0.])
    elif predicted[i,1] >  predicted[i,0] and predicted[i,1] >  predicted[i,2]:
      rounded.append([0.,1.,0.])
    elif predicted[i,2] >  predicted[i,0] and predicted[i,2] >  predicted[i,1]:
      rounded.append([0.,0.,1.])
    else:
      rounded.append([0.,0.,1.])

def encode(predicted,encoded):
  for i in range(len(predicted)):
    if predicted[i,0] >  predicted[i,1] and predicted[i,0] >  predicted[i,2]:
      encoded.append(['D'])
    elif predicted[i,1] >  predicted[i,0] and predicted[i,1] >  predicted[i,2]:
      encoded.append(['M'])
    elif predicted[i,2] >  predicted[i,0] and predicted[i,2] >  predicted[i,1]:
      encoded.append(['N'])
    else:
      encoded.append(['N'])


In [25]:
round(y_pred,ytest_predictionRounded)

In [26]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, ytest_predictionRounded)

0.7076923076923077

In [27]:
from sklearn.metrics import f1_score
f1_score(y_test, ytest_predictionRounded, average='micro')

0.7076923076923077