Programmer: Jacob Maurer
date: 9/21/2024
Description: This file is meant to make models that are trained on data other than the MNIST data set.

In [245]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification, make_regression
import copy
device = "cuda"

For these experiments, we are going to assume about half of the features are informative, with no redundant or repeated features. The samples are going to be (5*number of features) * 10. This is going to be constant across regression and classification. The final sets will be balanced, seperate experiments will be run with unbalanced sets. The batch size will be 10.The split between train and test sets will be 80% training and 20% testing

In [228]:
small_classification_problem = make_classification(n_samples = 5000, n_features=100, n_informative=50)
medium_classification_problem = make_classification(n_samples = 15000, n_features=300, n_informative=150)
large_classification_problem = make_classification(n_samples = 25000, n_features=500, n_informative = 250)
small_regression_problem = make_regression(n_samples = 1000, n_features=20, n_informative=10)
medium_regression_problem = make_regression(n_samples = 2500, n_features=50, n_informative=25)
large_regression_problem = make_regression(n_samples = 5000, n_features=100, n_informative = 50)

In [229]:
class GeneratedDataset(Dataset):
    def __init__(self, x_data, y_data):
        self.x = copy.deepcopy(x_data)
        self.y = copy.deepcopy(y_data)
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        return self.x[index], np.float32(self.y[index])

small_classification_train_data = GeneratedDataset(small_classification_problem[0][int(len(small_classification_problem[0])*.2):], small_classification_problem[1][int(len(small_classification_problem[1])*.2):])
small_classification_test_data = GeneratedDataset(small_classification_problem[0][:int(len(small_classification_problem[0])*.2)], small_classification_problem[1][:int(len(small_classification_problem[1])*.2)])
medium_classification_train_data = GeneratedDataset(medium_classification_problem[0][int(len(medium_classification_problem[0])*.2):], medium_classification_problem[1][int(len(medium_classification_problem[1])*.2):])
medium_classification_test_data = GeneratedDataset(medium_classification_problem[0][:int(len(medium_classification_problem[0])*.2)], medium_classification_problem[1][:int(len(medium_classification_problem[1])*.2)])
large_classification_train_data = GeneratedDataset(large_classification_problem[0][int(len(large_classification_problem[0])*.2):], large_classification_problem[1][int(len(large_classification_problem[1])*.2):])
large_classification_test_data = GeneratedDataset(large_classification_problem[0][:int(len(large_classification_problem[0])*.2)], large_classification_problem[1][:int(len(large_classification_problem[1])*.2)])
small_train_loader = DataLoader(small_classification_train_data, batch_size=10, shuffle=True)
small_test_loader = DataLoader(small_classification_test_data, batch_size=10, shuffle=True)
medium_train_loader = DataLoader(medium_classification_train_data, batch_size=10)
medium_test_loader = DataLoader(medium_classification_test_data, batch_size=10)
large_train_loader = DataLoader(large_classification_train_data, batch_size=10)
large_test_loader = DataLoader(large_classification_test_data, batch_size=10)

In [230]:
class SmallClassifyNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(100, 128),
            nn.Sigmoid(),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x.type(torch.float))
        return logits

class MediumClassifyNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(300, 128),
            nn.Sigmoid(),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        logits = self.linear_relu_stack(x.type(torch.float))
        return logits

class LargeClassifyNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(500, 256),
            nn.Sigmoid(),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x.type(torch.float))
        return logits

In [246]:
small_model = SmallClassifyNetwork().to(device)
medium_model = MediumClassifyNetwork().to(device)
large_model = LargeClassifyNetwork().to(device)
loss_fn = nn.BCELoss()
optimizer = torch.optim.SGD(large_model.parameters(), lr=1e-2)

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y.unsqueeze(1))

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        """
        if batch % 10 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        """

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y.unsqueeze(1)).item()
            pred = (pred > 0.5).type(torch.float)
            correct += (pred == y.unsqueeze(1)).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [247]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(large_train_loader, large_model, loss_fn, optimizer)
    test(large_test_loader, large_model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
Test Error: 
 Accuracy: 81.9%, Avg loss: 0.401364 

Epoch 2
-------------------------------
Test Error: 
 Accuracy: 82.5%, Avg loss: 0.389523 

Epoch 3
-------------------------------
Test Error: 
 Accuracy: 83.2%, Avg loss: 0.382163 

Epoch 4
-------------------------------
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.375246 

Epoch 5
-------------------------------
Test Error: 
 Accuracy: 83.7%, Avg loss: 0.367895 

Epoch 6
-------------------------------
Test Error: 
 Accuracy: 84.4%, Avg loss: 0.358071 

Epoch 7
-------------------------------
Test Error: 
 Accuracy: 84.8%, Avg loss: 0.352950 

Epoch 8
-------------------------------
Test Error: 
 Accuracy: 85.2%, Avg loss: 0.341708 

Epoch 9
-------------------------------
Test Error: 
 Accuracy: 86.1%, Avg loss: 0.339110 

Epoch 10
-------------------------------
Test Error: 
 Accuracy: 87.0%, Avg loss: 0.326802 

Done!


In [239]:
large_model_pre = copy.deepcopy(large_model.state_dict())

In [241]:
large_model_post = copy.deepcopy(large_model.state_dict())

In [242]:
large_model_pre

OrderedDict([('linear_relu_stack.0.weight',
              tensor([[ 0.0379, -0.0064, -0.0266,  ..., -0.0334,  0.0407,  0.0027],
                      [ 0.0253,  0.0159, -0.0326,  ...,  0.0147,  0.0414, -0.0191],
                      [ 0.0030, -0.0338, -0.0050,  ..., -0.0092, -0.0066, -0.0229],
                      ...,
                      [-0.0281,  0.0161,  0.0443,  ...,  0.0396,  0.0011,  0.0139],
                      [ 0.0197,  0.0180, -0.0068,  ..., -0.0189, -0.0062, -0.0197],
                      [-0.0294,  0.0242, -0.0334,  ...,  0.0082, -0.0394,  0.0273]])),
             ('linear_relu_stack.0.bias',
              tensor([ 0.0158, -0.0231,  0.0004, -0.0303, -0.0023,  0.0165,  0.0293, -0.0235,
                       0.0132,  0.0101, -0.0253, -0.0303, -0.0115,  0.0231,  0.0201, -0.0059,
                      -0.0267, -0.0158, -0.0380, -0.0224,  0.0093,  0.0437,  0.0047,  0.0325,
                       0.0248,  0.0388,  0.0434, -0.0063, -0.0243,  0.0328,  0.0254,  0.0311,
    

In [243]:
large_model_post

OrderedDict([('linear_relu_stack.0.weight',
              tensor([[ 6.2502e-02, -1.1271e-02, -3.2048e-02,  ..., -3.0521e-02,
                        5.2069e-02,  3.7101e-03],
                      [ 1.7841e-02,  1.6986e-02, -4.0750e-02,  ...,  2.2018e-02,
                        4.7628e-02, -1.3162e-02],
                      [-1.2945e-02, -2.6445e-02, -6.4124e-03,  ..., -1.7109e-02,
                       -8.6872e-03, -1.5072e-02],
                      ...,
                      [-4.8798e-02,  1.7136e-02,  3.7464e-02,  ...,  5.0496e-02,
                       -3.5864e-02,  1.4923e-02],
                      [ 2.6517e-02,  2.0910e-02, -8.5393e-04,  ..., -1.4728e-02,
                        1.5843e-02, -1.4095e-02],
                      [-2.4148e-02,  2.0616e-02, -3.3068e-02,  ...,  7.4630e-05,
                       -7.4698e-02,  2.4198e-02]])),
             ('linear_relu_stack.0.bias',
              tensor([ 2.4380e-02, -1.7776e-02, -1.5122e-02, -3.0732e-02, -2.1950e-03,
           

In [244]:
mean_pre_0 = np.mean(large_model_pre["linear_relu_stack.0.weight"].numpy())
mean_post_0 = np.mean(large_model_post["linear_relu_stack.0.weight"].numpy())
std_pre_0 = np.std(large_model_pre["linear_relu_stack.0.weight"].numpy())
std_post_0 = np.std(large_model_post["linear_relu_stack.0.weight"].numpy())
mean_pre_1 = np.mean(large_model_pre["linear_relu_stack.2.weight"].numpy())
mean_post_1 = np.mean(large_model_post["linear_relu_stack.2.weight"].numpy())
std_pre_1 = np.std(large_model_pre["linear_relu_stack.2.weight"].numpy())
std_post_1 = np.std(large_model_post["linear_relu_stack.2.weight"].numpy())
print("Pre: ")
print(" Layer 1: " + str(mean_pre_0) + ", " + str(std_pre_0))
print(" Layer 2: " + str(mean_pre_1) + ", " + str(std_pre_1))
print("Post: ")
print(" Layer 1: " + str(mean_post_0) + ", " + str(std_post_0))
print(" Layer 2: " + str(mean_post_1) + ", " + str(std_post_1))

Pre: 
 Layer 1: -3.1782398e-05, 0.025849877
 Layer 2: -0.0011988243, 0.03633489
Post: 
 Layer 1: -2.958553e-05, 0.032174457
 Layer 2: -0.0013781609, 0.33564273
