In [1]:
import pandas as pd 
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score,log_loss
from imblearn.under_sampling import NearMiss
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn

In [2]:
train = pd.read_csv('train.csv')

In [3]:
X = train.drop(columns=['Response','id'])
Y = train['Response']
X = pd.get_dummies(X)

NM = NearMiss()
X,Y = NM.fit_sample(X,Y)

MMS = MinMaxScaler()
X[X.columns] = MMS.fit_transform(X[X.columns])
X = X.rename(columns={'Vehicle_Age_1-2 Year':'Veiculo_1_e_2','Vehicle_Age_< 1 Year':'Veiculo_Menor_1',
                     'Vehicle_Age_> 2 Years':'Veiculo_Maior_2'})

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.5)

In [5]:
torch.manual_seed(1)
np.random.seed(1)

In [6]:
X_train = torch.tensor(np.array(X_train), dtype=torch.float)
X_test = torch.tensor(np.array(X_test), dtype=torch.float)
Y_train = torch.tensor(np.array(Y_train), dtype=torch.float).view(-1,1)
Y_test = torch.tensor(np.array(Y_test), dtype=torch.float).view(-1,1)

In [7]:
dataset = torch.utils.data.TensorDataset(X_train,Y_train)

In [8]:
train_loader = torch.utils.data.DataLoader(dataset,batch_size=16)

In [9]:
classificador = nn.Sequential(
    nn.Linear(in_features=14,out_features=40),
    nn.Sigmoid(),
    nn.Linear(40,40),
    nn.Sigmoid(),
    nn.Linear(40,1),
    nn.Sigmoid(),
)

In [10]:
critetion = nn.BCELoss()

In [11]:
optimizer = torch.optim.Adam(classificador.parameters(), lr=0.001, weight_decay=0.0001)

In [12]:
for epoch in range(20):
    running_loss = 0.
    for data in train_loader:
        #print(data)
        inputs, labels = data
        optimizer.zero_grad()
        
        outputs = classificador(inputs)
        loss = critetion(outputs,labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    print('Época %3d: perda %.5f' % (epoch+1, running_loss/len(train_loader)))

Época   1: perda 0.44257
Época   2: perda 0.24742
Época   3: perda 0.22313
Época   4: perda 0.22081
Época   5: perda 0.21980
Época   6: perda 0.21925
Época   7: perda 0.21892
Época   8: perda 0.21870
Época   9: perda 0.21855
Época  10: perda 0.21844
Época  11: perda 0.21836
Época  12: perda 0.21830
Época  13: perda 0.21825
Época  14: perda 0.21821
Época  15: perda 0.21818
Época  16: perda 0.21815
Época  17: perda 0.21812
Época  18: perda 0.21810
Época  19: perda 0.21807
Época  20: perda 0.21805


In [13]:
classificador.eval()

Sequential(
  (0): Linear(in_features=14, out_features=40, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=40, out_features=40, bias=True)
  (3): Sigmoid()
  (4): Linear(in_features=40, out_features=1, bias=True)
  (5): Sigmoid()
)

In [14]:
previsoes = classificador.forward(X_test)

In [24]:
previsoes_ = np.array(previsoes > 0.4)

In [25]:
accuracy_score(Y_test, previsoes_)

0.9076429030186256

In [26]:
confusion_matrix(Y_test, previsoes_)

array([[23303,     6],
       [ 4308, 19093]], dtype=int64)