# Personality prediction model 
This notebook is used to train a model to predict the personality of a user. For see the results of each line, see the archive network on the folder draft.

## Data management

In [28]:
# Importing libraries 

import pandas as pd 
import numpy as np
import tensorflow as tf
from pathlib import Path
import os


In [29]:
# Path.__version__

In [30]:
place = str (Path(os.getcwd())) + '\\data\\data.csv' # finding database
data = pd.read_csv(place, sep='\t')                  # opening database

In [31]:
df = data.copy()                                    # copying database

In [32]:
df.drop(['dateload'], axis=1, inplace=True)       # dropping dateload column

In [33]:
filt = ~( (df['IE'] == 0 ) | (df['IE'] == 3) | (df['gender'] == 0) )        # Filter for remove invalid rows in column IE and gender

In [34]:
df = df[filt]                           # Applying filter 

In [35]:
# Optimizing DataFrame

intcolumns = df.columns.drop('country')
df[intcolumns] = df[intcolumns].astype(np.int8)
df['country'] = df['country'].astype('category')

## Data preprocessing

In [36]:
# Label Encoding the "country" and "IE" column

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['country'] = le.fit_transform(df['country'].values)
df['IE'] = le.fit_transform(df['IE'].values)


In [37]:
# Cutting data in parameters and target

X = df.iloc[:, 0:280].values        # all columns except the last one
y = df.iloc[:,280].values           # getting the last column(target)

In [38]:
# Splitting the data into training and test sets

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 

In [39]:
# Feature Scaling

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [40]:
print("X_train >>\n",X_train,
      "\nX_test >>\n",X_test)

X_train >>
 [[-1.90397584  0.83617006  0.01460497 ... -1.19829693  1.47572684
  -0.51535851]
 [ 0.81699383 -1.60304267 -0.10802687 ...  0.66796069 -0.66739897
  -0.76694216]
 [ 0.81699383  1.21729705  1.41805823 ... -1.19829693 -0.66739897
   0.23939247]
 ...
 [-1.22373342 -1.06946489 -1.63411196 ...  0.66796069  1.47572684
   0.32325368]
 [-0.54349101  0.72183196 -0.74843758 ...  0.66796069  1.47572684
  -0.85080338]
 [-1.22373342  0.34070497  0.91390512 ...  0.66796069 -0.66739897
  -0.76694216]] 
X_test >>
 [[-0.54349101 -0.11664741  1.49981279 ...  0.66796069 -0.66739897
  -0.51535851]
 [ 0.81699383 -0.15476011 -0.72118606 ...  0.66796069 -0.66739897
  -0.68308094]
 [ 0.81699383  1.44597324  0.6141384  ...  0.66796069 -0.66739897
   0.74255978]
 ...
 [ 0.13675141 -0.26909821 -0.42141934 ... -1.19829693 -0.66739897
  -0.51535851]
 [ 0.81699383  0.41693037 -0.20340719 ... -1.19829693 -0.66739897
  -0.51535851]
 [ 0.81699383  1.25540975  1.45893551 ... -1.19829693 -0.66739897
   3.258

## Neural network

In [41]:
ann = tf.keras.models.Sequential()   # Initializing the ANN

In [42]:
# ann.add(tf.keras.layers.Dense(units=70, activation='relu')) # Adding the first hidden layer

In [43]:
# ann.add(tf.keras.layers.Dense(units=35, activation='relu')) # Adding the second hidden layer

In [44]:
# ann.add(tf.keras.layers.Dense(units=15, activation='relu')) # adding the third hidden layer

In [45]:
ann.add(tf.keras.layers.Dense(units=3, activation='relu')) # Adding the fourth hidden layer

In [46]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid')) # Adding the output layer

In [47]:
ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # compiling the ANN

In [48]:
ann.fit(X_train, y_train, batch_size = 32, epochs = 8) # Fitting the ANN to the training set

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<tensorflow.python.keras.callbacks.History at 0x1d819666d00>

## Prediction test

In [49]:
y_pred = ann.predict(X_test)
y_pred

array([[0.80550265],
       [0.29992992],
       [0.29992992],
       ...,
       [0.3069834 ],
       [0.30590743],
       [0.29992992]], dtype=float32)

In [50]:
y_pred  = (y_pred > 0.5)
y_pred

array([[ True],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [54]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[834  43]
 [ 34 154]]


In [None]:
total = sum(sum(cm))
total

1065

In [None]:
erro = cm[0,1] + cm[1,0]
erro

96

In [None]:
accuracy = ((erro*(-100))/total)+100
accuracy

90.98591549295774