In [105]:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,accuracy_score,classification_report
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import pandas

In [106]:
# CHOOSE DATASET

# Regression dataset
#data = datasets.load_diabetes(as_frame=True)

# Classification dataset
data = datasets.fetch_openml("diabetes",version=1, as_frame=True) 

X = data.data.values
y = data.target.values 

# Converter labels em binário (0 = negativo, 1 = positivo) (só usado em classification)
y= np.array([1 if val == "tested_positive" else 0 for val in y])

# Converter para tensor PyTorch (coluna) ( só usado em classification)
y = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)


X.shape

(768, 8)

In [107]:
#train test spliting
test_size=0.2
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=test_size, random_state=42)

In [108]:
# Standardize features
scaler=StandardScaler()
Xtr= scaler.fit_transform(Xtr)
Xte= scaler.transform(Xte)

In [109]:
class MLP(nn.Module):
    def __init__(self, input_size, output_size=1, dropout_prob=0.5):
        super(MLP, self).__init__()
        
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 64)
        self.out = nn.Linear(64, output_size)
        
        self.dropout = nn.Dropout(p=dropout_prob)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        
        x = F.relu(self.fc4(x))
        x = self.dropout(x)
        
        x = self.out(x)
        return x

In [110]:
num_epochs=100
lr=0.00025
dropout=0.1
batch_size=64

In [111]:
Xtr = torch.tensor(Xtr, dtype=torch.float32)
ytr = torch.tensor(ytr, dtype=torch.float32)
Xte = torch.tensor(Xte, dtype=torch.float32)
yte = torch.tensor(yte, dtype=torch.float32)

# Wrap Xtr and ytr into a dataset
train_dataset = TensorDataset(Xtr, ytr)

# Create DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

  ytr = torch.tensor(ytr, dtype=torch.float32)
  yte = torch.tensor(yte, dtype=torch.float32)


In [112]:
# Model, Loss, Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = MLP(input_size=Xtr.shape[1], dropout_prob=dropout).to(device)
criterion = nn.BCEWithLogitsLoss()  # for binary classification
criterion = nn.MSELoss() #for regression
optimizer = optim.Adam(model.parameters(), lr=lr)

In [113]:
# Training loop
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0

    for batch_x, batch_y in train_dataloader:
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        logits = model(batch_x)
        loss = criterion(logits, batch_y.view(-1, 1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

Epoch [1/100], Loss: 0.2716
Epoch [2/100], Loss: 0.2480
Epoch [3/100], Loss: 0.2309
Epoch [4/100], Loss: 0.2135
Epoch [5/100], Loss: 0.2027
Epoch [6/100], Loss: 0.1948
Epoch [7/100], Loss: 0.1877
Epoch [8/100], Loss: 0.1765
Epoch [9/100], Loss: 0.1717
Epoch [10/100], Loss: 0.1659
Epoch [11/100], Loss: 0.1676
Epoch [12/100], Loss: 0.1665
Epoch [13/100], Loss: 0.1620
Epoch [14/100], Loss: 0.1599
Epoch [15/100], Loss: 0.1559
Epoch [16/100], Loss: 0.1575
Epoch [17/100], Loss: 0.1509
Epoch [18/100], Loss: 0.1586
Epoch [19/100], Loss: 0.1562
Epoch [20/100], Loss: 0.1522
Epoch [21/100], Loss: 0.1603
Epoch [22/100], Loss: 0.1532
Epoch [23/100], Loss: 0.1576
Epoch [24/100], Loss: 0.1461
Epoch [25/100], Loss: 0.1510
Epoch [26/100], Loss: 0.1503
Epoch [27/100], Loss: 0.1491
Epoch [28/100], Loss: 0.1480
Epoch [29/100], Loss: 0.1485
Epoch [30/100], Loss: 0.1470
Epoch [31/100], Loss: 0.1454
Epoch [32/100], Loss: 0.1447
Epoch [33/100], Loss: 0.1517
Epoch [34/100], Loss: 0.1474
Epoch [35/100], Loss: 0

In [114]:
y_pred=model(Xte)
print(f'ACC:{accuracy_score(yte.detach().numpy(),y_pred.detach().numpy()>0.5)}') #classification
#print(f'MSE:{mean_squared_error(yte.detach().numpy(),y_pred.detach().numpy())}') #regression

ACC:0.7337662337662337
