In [9]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

# Load the dataset, split into input (X) and output (y) variables
dataset = np.loadtxt('pima-indians-diabetes.csv', delimiter=',')
X = dataset[:, 0:8]
y = dataset[:, 8]

# Define a function to train and evaluate the model
def train_and_evaluate(X, y, train_size):
    # Split dataset into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Use a portion of the training data as specified by train_size
    train_index = int(len(X_train) * train_size)
    X_train = X_train[:train_index]
    y_train = y_train[:train_index]

    X = torch.tensor(X_train, dtype=torch.float32)
    y = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)

    # Define the model
    model = PimaClassifier()

    # Train the model
    loss_fn = nn.BCELoss()  # Binary Cross Entropy
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    n_epochs = 100
    batch_size = 10

    for epoch in range(n_epochs):
        for i in range(0, len(X), batch_size):
            Xbatch = X[i:i + batch_size]
            y_pred = model(Xbatch)
            ybatch = y[i:i + batch_size]
            loss = loss_fn(y_pred, ybatch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # Compute accuracy on the test set
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)
    y_pred_test = model(X_test)
    accuracy = (y_pred_test.round() == y_test).float().mean()
    return accuracy.item()

# Define the model
class PimaClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(8, 12)
        self.act1 = nn.ReLU()
        self.hidden2 = nn.Linear(12, 8)
        self.act2 = nn.ReLU()
        self.output = nn.Linear(8, 1)
        self.act_output = nn.Sigmoid()

    def forward(self, x):
        x = self.act1(self.hidden1(x))
        x = self.act2(self.hidden2(x))
        x = self.act_output(self.output(x))
        return x

# Train and evaluate the model for different training data sizes
train_sizes = [0.6, 0.7, 0.8]

for size in train_sizes:
    accuracy = train_and_evaluate(X, y, size)
    print(f"Training Size: {size * 100}%")
    print(f"Accuracy: {accuracy}")
    print()

Training Size: 60.0%
Accuracy: 0.7077922224998474

Training Size: 70.0%
Accuracy: 0.7142857313156128

Training Size: 80.0%
Accuracy: 0.7402597665786743



In [4]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

# Load the dataset, split into input (X) and output (y) variables
dataset = np.loadtxt('pima-indians-diabetes.csv', delimiter=',')
X = dataset[:, 0:8]
y = dataset[:, 8]

# Define the model
class PimaClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(8, 12)
        self.act1 = nn.ReLU()
        self.hidden2 = nn.Linear(12, 8)
        self.act2 = nn.ReLU()
        self.output = nn.Linear(8, 1)
        self.act_output = nn.Sigmoid()

    def forward(self, x):
        x = self.act1(self.hidden1(x))
        x = self.act2(self.hidden2(x))
        x = self.act_output(self.output(x))
        return x

# Train and evaluate the model for different training data sizes
for size in [0.6, 0.7, 0.8]:
    # Split dataset into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Use a portion of the training data as specified by train_size
    train_index = int(len(X_train) * size)
    X_train = X_train[:train_index]
    y_train = y_train[:train_index]

    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)

    model = PimaClassifier()

    loss_fn = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    n_epochs = 100
    batch_size = 10

    for epoch in range(n_epochs):
        for i in range(0, len(X_train), batch_size):
            Xbatch = X_train[i:i + batch_size]
            y_pred = model(Xbatch)
            ybatch = y_train[i:i + batch_size]
            loss = loss_fn(y_pred, ybatch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # Compute accuracy on the test set
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)
    y_pred_test = model(X_test)
    accuracy = (y_pred_test.round() == y_test).float().mean()
    
    print(f"Training Size: {size * 100}%")
    print(f"Accuracy: {accuracy.item()}")
    print()


Training Size: 60.0%
Accuracy: 0.7337662577629089

Training Size: 70.0%
Accuracy: 0.7402597665786743

Training Size: 80.0%
Accuracy: 0.7272727489471436

