In [None]:
# --- DATA ANALISYS AND VISUALIZATION ---
import pandas                as pd
import numpy                 as np
import seaborn               as sns
import matplotlib.pyplot     as plt
import plotly.express        as px
import plotly.graph_objs     as go
import plotly.offline        as pyo

# ---  DATA TREATMENT  ---
from sklearn.preprocessing   import LabelEncoder

# --- MACHINE LEARNING ---
from sklearn                 import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics         import confusion_matrix, accuracy_score
#   - Naive Bayes
from sklearn.naive_bayes import GaussianNB
#   - K-means
from sklearn.cluster import KMeans
#   - Decision Tree and Random Forest
from sklearn.tree            import DecisionTreeClassifier
from sklearn.ensemble        import RandomForestClassifier
#   - Neural Network
from keras.models            import Sequential
from keras.layers            import Dense
from keras.utils             import np_utils

## ***Random Forest***
### Predicting if a client is **GOOD** or **BAD**

In [None]:
credit = pd.read_csv('/content/drive/MyDrive/Datasets/Credit.csv')
credit.shape

(1000, 21)

In [None]:
credit.head()

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,class
0,<0,6,'critical/other existing credit',radio/tv,1169,'no known savings',>=7,4,'male single',none,...,'real estate',67,none,own,2,skilled,1,yes,yes,good
1,0<=X<200,48,'existing paid',radio/tv,5951,<100,1<=X<4,2,'female div/dep/mar',none,...,'real estate',22,none,own,1,skilled,1,none,yes,bad
2,'no checking',12,'critical/other existing credit',education,2096,<100,4<=X<7,2,'male single',none,...,'real estate',49,none,own,1,'unskilled resident',2,none,yes,good
3,<0,42,'existing paid',furniture/equipment,7882,<100,4<=X<7,2,'male single',guarantor,...,'life insurance',45,none,'for free',1,skilled,2,none,yes,good
4,<0,24,'delayed previously','new car',4870,<100,1<=X<4,3,'male single',none,...,'no known property',53,none,'for free',2,skilled,2,none,yes,bad


> ### Obs: the dataset is composed of different `object` data. It's necessary to normalize it transforming into `numeric` data.

In [None]:
# Separating the class from the data
data = credit.iloc[:, 0:20].values
target = credit.iloc[:, 20].values


In [None]:
# Transforming into numeric data with LabelEncoder
labelencoder = LabelEncoder()

data[:, 0] = labelencoder.fit_transform(data[:, 0])
data[:, 2] = labelencoder.fit_transform(data[:, 2])
data[:, 3] = labelencoder.fit_transform(data[:, 3])
data[:, 5] = labelencoder.fit_transform(data[:, 5])
data[:, 6] = labelencoder.fit_transform(data[:, 6])
data[:, 8] = labelencoder.fit_transform(data[:, 8])
data[:, 9] = labelencoder.fit_transform(data[:, 9])
data[:, 11] = labelencoder.fit_transform(data[:, 11])
data[:, 13] = labelencoder.fit_transform(data[:, 13])
data[:, 14] = labelencoder.fit_transform(data[:, 14])
data[:, 16] = labelencoder.fit_transform(data[:, 16])
data[:, 18] = labelencoder.fit_transform(data[:, 18])
data[:, 19] = labelencoder.fit_transform(data[:, 19])

In [None]:
# Data transformed into number from 1 to 3.
# Each number represents one type of credit_history
data[:, 0]

In [None]:
# Preparing the data for the model (train, test -> 30% for test and 70% to train)
X_train, X_test, y_train, y_test = train_test_split(data, target,
                                                    test_size = 0.3,
                                                    random_state = 0)

In [None]:
# CREATING THE MODEL
forest = RandomForestClassifier(n_estimators=100)
forest.fit(X_train, y_train)

RandomForestClassifier()

In [None]:
# TESTING
predictions = forest.predict(X_test)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, predictions)
conf_matrix

# Model Accuracy
hit_rate = accuracy_score(y_test, predictions)
missing_rate = 1 - hit_rate

print(f'Confusion Matrix:\n{conf_matrix}\n')
print(f'HIT RATE: {hit_rate:.2f}\nMISSING RATE:{missing_rate:.2f}')

Confusion Matrix:
[[ 39  47]
 [ 17 197]]

HIT RATE: 0.79
MISSING RATE:0.21


In [None]:
predictions[0]

'bad'

## ***Neural Network***

In [None]:
# Loading IRIS dataset from 'datasets'
base = datasets.load_iris()

# Data already treated
data = base.data
target = base.target

target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [None]:
# Transforming target into dummy data (True = 1, False = 0)
# Setosa, Versicolor, Verginica
target_dummy = np_utils.to_categorical(target)
target_dummy

In [None]:
# Separating TRAIN and TEST data
X_train, X_test, y_train, y_test = train_test_split(data, target_dummy,
                                                    test_size = 0.3,
                                                    random_state = 0)

In [None]:
# CREATING THE NEURAL NETWORK
neural_network = Sequential()

# Conecting layers with Dense
# First layer with 5 neurons and 4 entries
neural_network.add(Dense(units = 5, input_dim = 4))

# Second layer
neural_network.add(Dense(units = 4))

# Third layer
neural_network.add(Dense(units = 3, activation = 'softmax'))

In [None]:
# Visualizing the Neural Network
neural_network.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 5)                 25        
                                                                 
 dense_1 (Dense)             (None, 4)                 24        
                                                                 
 dense_2 (Dense)             (None, 3)                 15        
                                                                 
Total params: 64
Trainable params: 64
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Compiling and training the neural network
neural_network.compile(optimizer = 'adam', loss = 'categorical_crossentropy',
                       metrics = ['accuracy'])

neural_network.fit(X_train, y_train,
                   epochs = 1000,
                   validation_data = (X_test, y_test))

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7f22c23ca690>

In [None]:
predictions = neural_network.predict(X_test)
predictions = (predictions > 0.5)

predictions

array([[False, False,  True],
       [False,  True, False],
       [ True, False, False],
       [False, False,  True],
       [ True, False, False],
       [False, False,  True],
       [ True, False, False],
       [False,  True, False],
       [False,  True, False],
       [False,  True, False],
       [False, False,  True],
       [False,  True, False],
       [False,  True, False],
       [False,  True, False],
       [False,  True, False],
       [ True, False, False],
       [False,  True, False],
       [False,  True, False],
       [ True, False, False],
       [ True, False, False],
       [False, False,  True],
       [False,  True, False],
       [ True, False, False],
       [ True, False, False],
       [False, False,  True],
       [ True, False, False],
       [ True, False, False],
       [False,  True, False],
       [False,  True, False],
       [ True, False, False],
       [False, False,  True],
       [False,  True, False],
       [ True, False, False],
       [Fa