In [211]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib as plt
from torch.utils.data import DataLoader
from sklearn.model_selection import StratifiedKFold
import numpy as np

In [212]:

class BinaryClassifier(nn.Module):
    def __init__(self, input_dim, hidden_layers):
        super(BinaryClassifier, self).__init__()
        layers = []
        #add fully connected layers with the input dim dividing by two each time the input shape
        output_dim = max(input_dim//(2), 1)
        for i in range(hidden_layers):
            i += 1
            layers.append(nn.Linear(input_dim,output_dim))
            layers.append(nn.ReLU())
            input_dim = output_dim
            output_dim = max(input_dim//(2*i), 1)
        #add final layer with sigmiod output and final linear layer
        layers.append(nn.Linear(input_dim,1))
        layers.append(nn.Sigmoid())

        #create a Seqeuntial implementation, no additional layers but provides one line to run through all layers
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        #apply the neural net
        return self.network(x)

In [213]:
#input shape is not correct
input_shape = 30
hidden_layers = 5
model_1 = BinaryClassifier(input_shape, hidden_layers)

In [214]:
optimizer = optim.Adam(model_1.parameters(), lr=1e-3)
loss_fn = nn.BCELoss()

In [215]:
from sklearn.model_selection import train_test_split
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset

In [216]:

def dataset_to_numpy(dataset: TensorDataset):
    X, y = dataset.tensors
    # Ensure on CPU and detached from computation graph
    X_np = X.detach().numpy()
    y_np = y.detach().numpy()
    return X_np, y_np


In [231]:
def load_data():
    # Read data
    df = pd.read_csv('br_cancer_normalized.csv', encoding='latin-1')

    # Separate features and labels
    X = df.drop(columns=['Diagnosis']).to_numpy()
    y = df['Diagnosis'].to_numpy()
    return X, y

In [233]:
#import whatever data loader function is needed
#should output train dataloader and test dataloader  as the full train and test datasets
X_np, y_np = load_data()
print(X_np.shape, y_np.shape)

(569, 30) (569,)


In [235]:
#turn train data into epochs

In [237]:
# Training loop
# def train(loss_fn,optimizer, data, model):
#     num_epochs = 10
#     epochs_loss = []
#     epochs_acr = []
#     for epoch in range(num_epochs):
#         model.train()
#         epoch_loss = 0.0
#         correct = 0
#         total = 0
        
#         for batch_X, batch_y in data:
#             optimizer.zero_grad()
            
#             outputs = model(batch_X)            # forward
#             loss = loss_fn(outputs, batch_y)  # compute loss
#             loss.backward()                     # backprop
#             optimizer.step()                    # update weights
            
#             epoch_loss += loss.item() * batch_X.size(0)
            
#             # accuracy calculation
#             preds = (outputs > 0.5).float()
#             correct += (preds == batch_y).sum().item()
#             total += batch_y.size(0)
        
#         avg_loss = epoch_loss / total
#         accuracy = correct / total
#         epochs_loss.append(avg_loss)
#         epochs_acr.append(accuracy)
#         print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {avg_loss:.4f} Acc: {accuracy:.4f}")
#     return model, epochs_loss, epochs_acr

In [240]:
from statistics import mean, stdev
from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold
from sklearn import linear_model
from sklearn import datasets

In [254]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, Subset
from sklearn.model_selection import StratifiedKFold
import copy

def test(model, data, criterion):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for batch_X, batch_y in data:
            batch_y = batch_y.unsqueeze(1)
            logits = model(batch_X)
            loss = criterion(logits, batch_y)
            test_loss += loss.item() * batch_X.size(0)

            preds = (torch.sigmoid(logits) >= 0.5).float()
            correct += (preds == batch_y).sum().item()
            total += batch_y.size(0)
    
    avg_loss = test_loss / total
    accuracy = correct / total
    print(f"Test Loss: {avg_loss:.4f} | Test Acc: {accuracy:.4f}")
    return avg_loss, accuracy
    

def train_with_stratified_kfold(X, y, model_fn, input_shape, hidden_layers, batch_size=32, lr=1e-3, k_folds=7, num_epochs=300):
    # Wrap into dataset
    dataset = TensorDataset(torch.tensor(X, dtype=torch.float32),
                            torch.tensor(y, dtype=torch.float32))
    
    skf = StratifiedKFold(n_splits=k_folds, shuffle=True)
    all_fold_results = []

    for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        print(f"\n--- Fold {fold+1} ---")

        train_subset = Subset(dataset, train_idx)
        val_subset = Subset(dataset, val_idx)

        train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)
     # fresh model for each fold
        cur_model = BinaryClassifier(30,5)
        criterion = nn.BCEWithLogitsLoss()
        optimizer = optim.Adam(cur_model.parameters(), lr=lr)

        for epoch in range(num_epochs):
            cur_model.train()
            epoch_loss = 0.0
            correct = 0
            total = 0

            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = cur_model(batch_X).squeeze()
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

                epoch_loss += loss.item() * batch_X.size(0)
                preds = (torch.sigmoid(outputs) >= 0.5).float()
                correct += (preds == batch_y).sum().item()
                total += batch_y.size(0)

            train_acc = correct / total
            train_loss = epoch_loss / total
            print(f"Epoch {epoch+1}: Train Loss {train_loss:.4f} | Train Acc {train_acc:.4f}")

        avg_loss, accuracy = test(cur_model, val_loader, criterion)
        all_fold_results.append((avg_loss, accuracy))
    
    return all_fold_results


In [256]:
# def test(model, data, criterion):
#     model.eval()  # evaluation mode
#     test_loss = 0.0
#     correct = 0
#     total = 0
    
#     with torch.no_grad():
#         for batch_X, batch_y in data:
            
#             logits = model(batch_X)                # raw outputs
#             loss = criterion(logits, batch_y)
            
#             test_loss += loss.item() * batch_X.size(0)
            
#             # convert logits -> probabilities -> predictions
#             preds = torch.sigmoid(logits) > 0.5
#             correct += (preds.float() == batch_y).sum().item()
#             total += batch_y.size(0)
    
#     avg_loss = test_loss / total
#     accuracy = correct / total
#     print(f"Test Loss: {avg_loss:.4f} | Test Acc: {accuracy:.4f}")
#     return avg_loss, accuracy

In [258]:
#training model
# trained_model_1,epochs_loss,epochs_arc = train(loss_fn,optimizer, train_epoch_data, model_1)

In [260]:
#testing model
# avg_loss, accuracy = test(trained_model, train_data, loss_fn)
# print(f'testing loss {avg_loss}, model accuracy {accuracy}')

In [262]:
#training with 
fold_results = train_with_stratified_kfold(X_np,y_np,model_1,30,5)


--- Fold 1 ---
Epoch 1: Train Loss 0.8226 | Train Acc 0.3717
Epoch 2: Train Loss 0.8214 | Train Acc 0.3717
Epoch 3: Train Loss 0.8202 | Train Acc 0.3717
Epoch 4: Train Loss 0.8189 | Train Acc 0.3717
Epoch 5: Train Loss 0.8176 | Train Acc 0.3717
Epoch 6: Train Loss 0.8162 | Train Acc 0.3717
Epoch 7: Train Loss 0.8146 | Train Acc 0.3717
Epoch 8: Train Loss 0.8128 | Train Acc 0.3717
Epoch 9: Train Loss 0.8106 | Train Acc 0.3717
Epoch 10: Train Loss 0.8080 | Train Acc 0.3717
Epoch 11: Train Loss 0.8048 | Train Acc 0.3717
Epoch 12: Train Loss 0.8010 | Train Acc 0.3717
Epoch 13: Train Loss 0.7967 | Train Acc 0.3717
Epoch 14: Train Loss 0.7920 | Train Acc 0.3717
Epoch 15: Train Loss 0.7873 | Train Acc 0.3717
Epoch 16: Train Loss 0.7832 | Train Acc 0.3717
Epoch 17: Train Loss 0.7789 | Train Acc 0.3717
Epoch 18: Train Loss 0.7754 | Train Acc 0.3717
Epoch 19: Train Loss 0.7720 | Train Acc 0.3717
Epoch 20: Train Loss 0.7694 | Train Acc 0.3717
Epoch 21: Train Loss 0.7664 | Train Acc 0.3717
Epoch 

In [251]:
print(fold_results)

[(0.7252551288139529, 0.3780487804878049), (0.7309585533490995, 0.3780487804878049), (0.7765065560370316, 0.37037037037037035), (0.8181366453200211, 0.37037037037037035), (0.7937315099033309, 0.37037037037037035), (0.7308317474377009, 0.37037037037037035), (0.7735817494951649, 0.37037037037037035)]


In [None]:
# plt.plot(epochs_loss)
# plt.ylabel('training loss')
# plt.xlabel('epochs')
# plt.show()

In [None]:
# # Compatibility check
# print("=== COMPATIBILITY CHECK ===")
# print(f"Input shape: {input_shape}")
# print(f"Hidden layers: {hidden_layers}")
# print(f"Train data type: {type(train_data)}")

# # Test model creation
# test_model = BinaryClassifier(input_shape, hidden_layers)
# print(f"Model created successfully: {test_model}")

# # Test with one batch
# for batch_X, batch_y in train_data:
#     print(f"Batch X shape: {batch_X.shape}")
#     print(f"Batch y shape: {batch_y.shape}")
#     print(f"Batch y values: {batch_y.unique()}")
    
#     # Test forward pass
#     test_output = test_model(batch_X)
#     print(f"Model output shape: {test_output.shape}")
#     print(f"Model output range: {test_output.min().item():.4f} to {test_output.max().item():.4f}")
#     break

# print("✅ All compatibility checks passed!")