## Data management

In [163]:
# Importing libraries 
import pandas as pd 
import numpy as np
import tensorflow as tf
from pathlib import Path
import os

In [164]:
tf.__version__

'2.7.0'

In [165]:
place = str (Path(os.getcwd())) + '\\data\\data.csv' # finding database
data = pd.read_csv(place, sep='\t')                  # opening database

In [166]:
df = data.copy()                                    # copying database

In [167]:
df.drop(['dateload'], axis=1, inplace=True)       # dropping dateload column

In [168]:
filt = ~( (df['IE'] == 0 ) | (df['IE'] == 3) | (df['gender'] == 0) )        # Filter for remove invalid rows in column IE and gender

In [169]:
df = df[filt]                           # Applying filter 

In [170]:
# Optimizing DataFrame

intcolumns = df.columns.drop('country')
df[intcolumns] = df[intcolumns].astype(np.int8)
df['country'] = df['country'].astype('category')

In [171]:
# Label Encoding in country column

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['country'] = le.fit_transform(df['country'].values)


In [172]:
# Cutting data in parameters and target

X = df.iloc[:, 0:280].values        # all columns except the last one
y = df.iloc[:,280].values           # getting the last column(target)

In [173]:
print("X >>\n",X)
print("y >>\n",y)

X >>
 [[   5   39  -46 ...    1    2   25]
 [   3   17   21 ...    1    2   19]
 [   5   41  -17 ...    1    1   23]
 ...
 [   3   29  -38 ...    2    2   28]
 [   4   15 -127 ...    2    1   19]
 [   5   57 -122 ...    2    1   25]]
y >>
 [2 1 1 ... 1 1 1]


In [174]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [175]:
# Splitting the data into training and test sets

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [176]:
# Feature Scaling\

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [177]:
print(np.shape(X),'\n',np.shape(X_train),'\n',np.shape(X_test))

(5323, 370) 
 (4258, 370) 
 (1065, 370)


## Neural networks

In [178]:
ann = tf.keras.models.Sequential()   # Initializing the ANN

In [179]:
# Adding the input layer and the hiddens layers
ann.add(tf.keras.layers.Dense(units=3, activation='relu'))
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))
ann.add(tf.keras.layers.Dense(units=3, activation='relu'))
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [180]:
ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # compiling the ANN

In [189]:
ann.fit(X_train, y_train, batch_size = 16, epochs = 50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x18c60a34460>

In [190]:
y_pred = ann.predict(X_test)

In [194]:
y_pred  = (y_pred > 1)
if y_pred > 1:
    print (y_pred)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [192]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[  0   0   0]
 [877   0   0]
 [188   0   0]]
