## 1. Load the data

In [2]:
import pandas as pd
import numpy as np

# Reload the standardized dataset
data_path = '../Database/standardized_diabetes_data.csv'
standardized_data = pd.read_csv(data_path)

# Split the dataset into features and target variable
X = standardized_data.drop(columns='Outcome')
y = standardized_data['Outcome']

# Display the first few rows of features and target variable
X.head(), y.head()


(   Pregnancies   Glucose  BloodPressure  SkinThickness   Insulin       BMI  \
 0     0.639530  0.865481      -0.031969       0.670206 -0.181423  0.166511   
 1    -0.844335 -1.204281      -0.527975      -0.012293 -0.181423 -0.851645   
 2     1.233077  2.015348      -0.693310      -0.012293 -0.181423 -1.331632   
 3    -0.844335 -1.072868      -0.527975      -0.694792 -0.540290 -0.633469   
 4    -1.141108  0.504094      -2.677331       0.670206  0.316360  1.548294   
 
    DiabetesPedigreeFunction       Age  
 0                  0.468187  1.425067  
 1                 -0.364823 -0.190548  
 2                  0.604004 -0.105515  
 3                 -0.920163 -1.040871  
 4                  5.481337 -0.020483  ,
 0    1
 1    0
 2    1
 3    0
 4    1
 Name: Outcome, dtype: int64)

## 2. Partition data

In [3]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Display the shape of the training and testing sets to confirm the split
(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


((614, 8), (154, 8), (614,), (154,))

## 3. Build the model

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset

# Define the Perceptron model
class Perceptron(nn.Module):
    def __init__(self, input_dim):
        super(Perceptron, self).__init__()
        # Define the single linear layer
        self.fc1 = nn.Linear(input_dim, 1)
    
    def forward(self, x):
        # Pass the input through the linear layer
        # and then through the sigmoid activation function
        out = torch.sigmoid(self.fc1(x))
        return out


  from .autonotebook import tqdm as notebook_tqdm


## 4. Build method how to run the model

In [2]:
class Trainer:
    def __init__(
        self,
        model, 
        data, 
        learning_rate=0.001,
        batch_size=32,
        model_p = dict()
        ):

        self.training_loss = []
        self.training_accuracy = []
        self.testing_accuracy = []

        X_train, X_test, y_train, y_test = data
        # Prepare the data for PyTorch
        X_train_tensor = Variable(torch.Tensor(X_train.values))
        y_train_tensor = Variable(torch.Tensor(y_train.values))
        X_test_tensor = Variable(torch.Tensor(X_test.values))
        y_test_tensor = Variable(torch.Tensor(y_test.values))
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)

        self.train_loader = DataLoader(
            dataset=train_dataset, 
            batch_size=batch_size, 
            shuffle=True)
        self.test_data = X_test_tensor, y_test_tensor
        
        self.model = model(X_train.shape[1],**model_p)
        self.criterion = nn.BCELoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)


    def train(self):
        total_loss = 0
        correct_train_preds = 0
        total_train_samples = 0

        for inputs, targets in self.train_loader:
            # Zero the parameter gradients
            self.optimizer.zero_grad()
            # Forward pass
            outputs = self.model(inputs)
            # Compute the loss
            loss = self.criterion(outputs, targets.view(-1, 1))
            total_loss += loss.item() * len(targets)
            # Backward pass and optimization
            loss.backward()
            self.optimizer.step()
            # Compute the number of correct predictions for training accuracy
            correct_train_preds += ((outputs > 0.5).type(torch.FloatTensor).view(-1) == targets).sum().item()
            total_train_samples += len(targets)

        return total_loss, total_train_samples, correct_train_preds

    def evaluate(self, total_loss, total_train_samples, correct_train_preds, epoch, num_epochs):
        # Compute training loss and accuracy
        avg_train_loss = total_loss / total_train_samples
        train_acc = correct_train_preds / total_train_samples
        self.training_loss.append(avg_train_loss)
        self.training_accuracy.append(train_acc)

        # Compute testing accuracy
        with torch.no_grad():
            test_outputs = self.model(self.test_data[0])
            test_preds = (test_outputs > 0.5).type(torch.FloatTensor)
            test_acc = accuracy_score(self.test_data[1], test_preds)
            self.testing_accuracy.append(test_acc)

        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_train_loss:.4f}, Training Accuracy: {train_acc * 100:.2f}%, Testing Accuracy: {test_acc * 100:.2f}%')

    def run(self, num_epochs, evaluation_interval):
        for epoch in range(num_epochs):
            total_loss, total_train_samples, correct_train_preds = self.train()
            # Evaluate the model every evaluation_interval epochs
            if (epoch + 1) % evaluation_interval == 0:
                self.evaluate(total_loss, total_train_samples, correct_train_preds, epoch, num_epochs)
                self.model.train()  # Switch back to training mode

# Usage:
# trainer = Trainer(model, train_loader, (X_test_tensor, y_test_tensor))
# trainer.run(num_epochs=1000, evaluation_interval=100)


In [22]:
trainer = Trainer(
    Perceptron,
    (X_train, X_test, y_train, y_test),
    )

trainer.run(num_epochs=200, evaluation_interval=10)

Epoch [10/200], Loss: 0.6237, Training Accuracy: 66.78%, Testing Accuracy: 58.44%
Epoch [20/200], Loss: 0.5508, Training Accuracy: 74.10%, Testing Accuracy: 68.18%
Epoch [30/200], Loss: 0.5139, Training Accuracy: 75.73%, Testing Accuracy: 70.13%
Epoch [40/200], Loss: 0.4936, Training Accuracy: 76.71%, Testing Accuracy: 70.78%
Epoch [50/200], Loss: 0.4811, Training Accuracy: 77.20%, Testing Accuracy: 70.78%
Epoch [60/200], Loss: 0.4728, Training Accuracy: 77.36%, Testing Accuracy: 70.13%
Epoch [70/200], Loss: 0.4673, Training Accuracy: 77.85%, Testing Accuracy: 70.78%
Epoch [80/200], Loss: 0.4634, Training Accuracy: 77.85%, Testing Accuracy: 70.78%
Epoch [90/200], Loss: 0.4606, Training Accuracy: 78.18%, Testing Accuracy: 70.13%
Epoch [100/200], Loss: 0.4587, Training Accuracy: 78.50%, Testing Accuracy: 70.78%
Epoch [110/200], Loss: 0.4571, Training Accuracy: 78.66%, Testing Accuracy: 70.78%
Epoch [120/200], Loss: 0.4561, Training Accuracy: 78.99%, Testing Accuracy: 70.13%
Epoch [130/20

## 5. make a MLP for test

In [28]:
import torch
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim = 16):
        super(MLP, self).__init__()
        # Define the first linear layer
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)  # New Linear Layer
        # Define the last linear layer
        self.fc_last = nn.Linear(hidden_dim, 1)
    
    def forward(self, x):
        # Pass the input through the first linear layer
        # and then through the ReLU activation function
        x1 = torch.relu(self.fc1(x))
        x2 = torch.relu(self.fc2(x1))
        x3 = torch.relu(self.fc3(x2))  # Passing through new Linear Layer
        # Pass the output through the last linear layer
        # and then through the sigmoid activation function
        out = torch.sigmoid(self.fc_last(x3))
        return out

# Assuming input_dim is the number of features in your input data
input_dim = 8  # Replace with the actual number of features
hidden_dim = 16  # You can choose a different size for the hidden layer

# Create an instance of the MLP class
mlp = MLP(input_dim, hidden_dim)


In [None]:
trainer = Trainer(
    MLP, 
    (X_train, X_test, y_train, y_test),
    model_p={
        'hidden_dim':32
    }
    )
trainer.run(num_epochs=200, evaluation_interval=10)

### 6.Test for more fixable MLP

In [33]:
import torch
import torch.nn as nn

class MLP_fix(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(MLP_fix, self).__init__()
        self.layers = nn.ModuleList()
        # 添加第一个线性层（从输入维度到隐藏维度）
        self.layers.append(nn.Linear(input_dim, hidden_dim))
        # 添加剩下的隐藏层
        for i in range(num_layers - 1):
            self.layers.append(nn.Linear(hidden_dim, hidden_dim))
        # 添加最后一个线性层（从隐藏维度到输出维度）
        self.fc_last = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        for layer in self.layers:
            # Pass the input through the linear layer
            # and then through the ReLU activation function
            x = torch.relu(layer(x))
        # Pass the output through the last linear layer
        # and then through the sigmoid activation function
        out = torch.sigmoid(self.fc_last(x))
        return out




In [37]:
trainer = Trainer(
    MLP_fix, 
    (X_train, X_test, y_train, y_test),
    model_p={
        'hidden_dim':128,
        'num_layers':1
    }
    )
trainer.run(num_epochs=200, evaluation_interval=10)

Epoch [10/200], Loss: 0.4385, Training Accuracy: 78.99%, Testing Accuracy: 70.13%
Epoch [20/200], Loss: 0.4173, Training Accuracy: 79.80%, Testing Accuracy: 70.13%
Epoch [30/200], Loss: 0.4038, Training Accuracy: 80.78%, Testing Accuracy: 71.43%
Epoch [40/200], Loss: 0.3929, Training Accuracy: 81.92%, Testing Accuracy: 72.08%
Epoch [50/200], Loss: 0.3836, Training Accuracy: 81.60%, Testing Accuracy: 73.38%
Epoch [60/200], Loss: 0.3750, Training Accuracy: 82.25%, Testing Accuracy: 74.03%
Epoch [70/200], Loss: 0.3672, Training Accuracy: 82.90%, Testing Accuracy: 73.38%
Epoch [80/200], Loss: 0.3604, Training Accuracy: 83.88%, Testing Accuracy: 72.73%
Epoch [90/200], Loss: 0.3546, Training Accuracy: 84.04%, Testing Accuracy: 75.32%
Epoch [100/200], Loss: 0.3479, Training Accuracy: 84.20%, Testing Accuracy: 74.68%
Epoch [110/200], Loss: 0.3418, Training Accuracy: 84.04%, Testing Accuracy: 73.38%
Epoch [120/200], Loss: 0.3364, Training Accuracy: 84.85%, Testing Accuracy: 73.38%
Epoch [130/20