Implementando Deep Learning

In [1]:
# Importação das bibliotecas
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

In [2]:
# Importando a base de dados
df = pd.read_csv('./data/survey_lung_cancer.csv')
df.head()

Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER
0,M,69,1,2,2,1,1,2,1,2,2,2,2,2,2,YES
1,M,74,2,1,1,1,2,2,2,1,1,1,2,2,2,YES
2,F,59,1,1,1,2,1,2,1,2,1,2,2,1,2,NO
3,M,63,2,2,2,1,1,1,1,1,2,1,1,2,2,NO
4,F,63,1,2,1,1,1,1,1,2,1,2,2,1,1,NO


In [3]:
# Visualizando  o tamanho da base de dados
df.shape

(309, 16)

In [4]:
# Verificando se á dados duplicados
df.duplicated().sum()

33

In [5]:
# Excluído e verificando se á dados duplicados
df.drop_duplicates(keep='first', inplace=True)
df.duplicated().sum()

0

In [6]:
# Visualizando  o tamanho da base de dados e as informação
print(df.shape)
df.info()

(276, 16)
<class 'pandas.core.frame.DataFrame'>
Int64Index: 276 entries, 0 to 283
Data columns (total 16 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   GENDER                 276 non-null    object
 1   AGE                    276 non-null    int64 
 2   SMOKING                276 non-null    int64 
 3   YELLOW_FINGERS         276 non-null    int64 
 4   ANXIETY                276 non-null    int64 
 5   PEER_PRESSURE          276 non-null    int64 
 6   CHRONIC DISEASE        276 non-null    int64 
 7   FATIGUE                276 non-null    int64 
 8   ALLERGY                276 non-null    int64 
 9   WHEEZING               276 non-null    int64 
 10  ALCOHOL CONSUMING      276 non-null    int64 
 11  COUGHING               276 non-null    int64 
 12  SHORTNESS OF BREATH    276 non-null    int64 
 13  SWALLOWING DIFFICULTY  276 non-null    int64 
 14  CHEST PAIN             276 non-null    int64 
 15  LUNG_CANCER  

In [7]:
#separação dos variáveis
X = df.iloc[:,0:15].values
y = df.iloc[:, 15].values
X

array([['M', 69, 1, ..., 2, 2, 2],
       ['M', 74, 2, ..., 2, 2, 2],
       ['F', 59, 1, ..., 2, 1, 2],
       ...,
       ['M', 55, 2, ..., 2, 1, 2],
       ['M', 46, 1, ..., 1, 2, 2],
       ['M', 60, 1, ..., 2, 2, 2]], dtype=object)

In [8]:
#atribui valores
labelencoder = LabelEncoder()
X[:,0] = labelencoder.fit_transform(X[:,0])
X

array([[1, 69, 1, ..., 2, 2, 2],
       [1, 74, 2, ..., 2, 2, 2],
       [0, 59, 1, ..., 2, 1, 2],
       ...,
       [1, 55, 2, ..., 2, 1, 2],
       [1, 46, 1, ..., 1, 2, 2],
       [1, 60, 1, ..., 2, 2, 2]], dtype=object)

In [9]:
#Label encoder com a classe
labelencoder_Y = LabelEncoder()
y = labelencoder_Y.fit_transform(y)
y

array([1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,
       0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1])

In [None]:
import numpy as np

np.reshape(X, (-1, 72, 72, 3)) 

In [10]:
# separação em treino e teste
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 0
)

In [11]:
#Feature Scalling, Padronização z-score
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)
X_test

array([[ 1.19782411, -0.66770971,  0.93094934, -1.03637545, -1.03637545,
        -1.15470054, -1.24316312,  0.63245553, -1.        , -0.93094934,
        -0.96490128, -0.93094934, -1.29099445, -1.07417231, -1.03637545],
       [-0.83484711, -1.36943144, -1.07417231,  0.96490128,  0.96490128,
         0.8660254 ,  0.80439967,  0.63245553,  1.        ,  1.07417231,
        -0.96490128,  1.07417231,  0.77459667,  0.93094934, -1.03637545],
       [ 1.19782411, -0.9684476 , -1.07417231,  0.96490128,  0.96490128,
        -1.15470054,  0.80439967, -1.58113883,  1.        ,  1.07417231,
         1.03637545,  1.07417231, -1.29099445,  0.93094934, -1.03637545],
       [ 1.19782411, -0.06623394,  0.93094934, -1.03637545, -1.03637545,
        -1.15470054, -1.24316312,  0.63245553,  1.        ,  1.07417231,
         1.03637545,  1.07417231,  0.77459667, -1.07417231,  0.96490128],
       [-0.83484711, -0.66770971,  0.93094934,  0.96490128,  0.96490128,
         0.8660254 ,  0.80439967,  0.63245553, 

In [12]:
# Classificando
classifier = Sequential()
classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu', input_dim=15))
classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu'))
classifier.add(Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
classifier.fit(X_train, y_train, batch_size=10, epochs=1024)

Epoch 1/1024
Epoch 2/1024
Epoch 3/1024
Epoch 4/1024
Epoch 5/1024
Epoch 6/1024
Epoch 7/1024
Epoch 8/1024
Epoch 9/1024
Epoch 10/1024
Epoch 11/1024
Epoch 12/1024
Epoch 13/1024
Epoch 14/1024
Epoch 15/1024
Epoch 16/1024
Epoch 17/1024
Epoch 18/1024
Epoch 19/1024
Epoch 20/1024
Epoch 21/1024
Epoch 22/1024
Epoch 23/1024
Epoch 24/1024
Epoch 25/1024
Epoch 26/1024
Epoch 27/1024
Epoch 28/1024
Epoch 29/1024
Epoch 30/1024
Epoch 31/1024
Epoch 32/1024
Epoch 33/1024
Epoch 34/1024
Epoch 35/1024
Epoch 36/1024
Epoch 37/1024
Epoch 38/1024
Epoch 39/1024
Epoch 40/1024
Epoch 41/1024
Epoch 42/1024
Epoch 43/1024
Epoch 44/1024
Epoch 45/1024
Epoch 46/1024
Epoch 47/1024
Epoch 48/1024
Epoch 49/1024
Epoch 50/1024
Epoch 51/1024
Epoch 52/1024
Epoch 53/1024
Epoch 54/1024
Epoch 55/1024
Epoch 56/1024
Epoch 57/1024
Epoch 58/1024
Epoch 59/1024
Epoch 60/1024
Epoch 61/1024
Epoch 62/1024
Epoch 63/1024
Epoch 64/1024
Epoch 65/1024
Epoch 66/1024
Epoch 67/1024
Epoch 68/1024
Epoch 69/1024
Epoch 70/1024
Epoch 71/1024
Epoch 72/1024
E

<keras.callbacks.History at 0x18fa091c1f0>

In [13]:
# Criando uma previsão
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
y_pred



array([[False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [False]])

In [14]:
# matriz de confusão
cm = confusion_matrix(y_test, y_pred)
cm

array([[ 5,  4],
       [ 3, 44]], dtype=int64)