Programmer: Jacob Maurer
Date: 9/24/2024
Description: Testing the idea that the opposite weighted network can give insight into a better weight network

In [2]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification, make_regression, make_multilabel_classification
import copy
device = "cuda"

In [19]:
large_classification_problem = make_classification(n_samples = 25000, n_features=500, n_informative = 250)
large_regression_problem = make_regression(n_samples = 25000, n_features=500, n_informative = 250)

In [20]:
class GeneratedDataset(Dataset):
    def __init__(self, x_data, y_data):
        self.x = copy.deepcopy(x_data)
        self.y = copy.deepcopy(y_data)
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        return self.x[index], np.float32(self.y[index])

large_classification_train_data = GeneratedDataset(large_classification_problem[0][int(len(large_classification_problem[0])*.2):], large_classification_problem[1][int(len(large_classification_problem[1])*.2):])
large_classification_test_data = GeneratedDataset(large_classification_problem[0][:int(len(large_classification_problem[0])*.2)], large_classification_problem[1][:int(len(large_classification_problem[1])*.2)])
large_train_loader_class = DataLoader(large_classification_train_data, batch_size=100, shuffle=True)
large_test_loader_class = DataLoader(large_classification_test_data, batch_size=100, shuffle=True)
large_regression_train_data = GeneratedDataset(large_regression_problem[0][int(len(large_regression_problem[0])*.2):], large_regression_problem[1][int(len(large_regression_problem[1])*.2):])
large_regression_test_data = GeneratedDataset(large_regression_problem[0][:int(len(large_regression_problem[0])*.2)], large_regression_problem[1][:int(len(large_regression_problem[1])*.2)])
large_train_loader_regress = DataLoader(large_regression_train_data, batch_size=100, shuffle=True)
large_test_loader_regress = DataLoader(large_regression_test_data, batch_size=100, shuffle=True)

In [62]:
class LargeClassifyNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(500, 256),
            nn.Sigmoid(),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x.type(torch.float))
        return logits
class LargeRegressNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(500, 128),
            nn.Sigmoid(),
            nn.Linear(128, 1)
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x.type(torch.float))
        return logits
large_model_class = LargeClassifyNetwork().to(device)
large_model_regress = LargeRegressNetwork().to(device)

In [47]:
opposite_dict = copy.deepcopy(large_model_class.cpu().state_dict())
for key in opposite_dict:
    opposite_dict[key] *= -1 + .2
large_model_class.to(device)
large_model_class2 = LargeClassifyNetwork().to(device)
large_model_class2.load_state_dict(opposite_dict)

<All keys matched successfully>

In [59]:
opposite_dict = copy.deepcopy(large_model_regress.cpu().state_dict())
for key in opposite_dict:
    opposite_dict[key] *= -1
large_model_regress.to(device)
large_model_regress2 = LargeRegressNetwork().to(device)
large_model_regress2.load_state_dict(opposite_dict)

<All keys matched successfully>

In [63]:
loss_fn_class = nn.BCELoss()
optimizer_class = torch.optim.SGD(large_model_class.parameters(), lr=1e-2)
loss_fn_class2 = nn.BCELoss()
optimizer_class2 = torch.optim.SGD(large_model_class2.parameters(), lr=1e-2)
loss_fn_regress = nn.MSELoss()
optimizer_regress = torch.optim.SGD(large_model_regress.parameters(), lr=1e-2)
loss_fn_regress2 = nn.MSELoss()
optimizer_regress2 = torch.optim.SGD(large_model_regress2.parameters(), lr=1e-2)
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y.unsqueeze(1))

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        """
        if batch % 10 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        """

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y.unsqueeze(1)).item()
            # pred = (pred > 0.5).type(torch.float)
            # correct += (pred == y.unsqueeze(1)).type(torch.float).sum().item()
    test_loss /= num_batches
    # correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [18]:
test(large_test_loader_class, large_model_class, loss_fn_class)
test(large_test_loader_class, large_model_class2, loss_fn_class2)
means = {"net1": [], "net2": []}
stds = {"net1": [], "net2": []}
large_class = large_model_class.cpu().state_dict()
opposite_class = large_model_class2.cpu().state_dict()
for key in large_class:
    means["net1"].append(np.mean(large_class[key].numpy()))
    means["net2"].append(np.mean(opposite_class[key].numpy()))
    stds["net1"].append(np.std(large_class[key].numpy()))
    stds["net2"].append(np.std(opposite_class[key].numpy()))
large_model_class.to(device)
large_model_class2.to(device)
train(large_train_loader_class, large_model_class, loss_fn_class, optimizer_class)
train(large_train_loader_class, large_model_class2, loss_fn_class2, optimizer_class2)
test(large_test_loader_class, large_model_class, loss_fn_class)
test(large_test_loader_class, large_model_class2, loss_fn_class2)

train(large_train_loader_class, large_model_class, loss_fn_class, optimizer_class)
train(large_train_loader_class, large_model_class2, loss_fn_class2, optimizer_class2)
test(large_test_loader_class, large_model_class, loss_fn_class)
test(large_test_loader_class, large_model_class2, loss_fn_class2)
large_class = large_model_class.cpu().state_dict()
opposite_class = large_model_class2.cpu().state_dict()
for key in large_class:
    means["net1"].append(np.mean(large_class[key].numpy()))
    means["net2"].append(np.mean(opposite_class[key].numpy()))
    stds["net1"].append(np.std(large_class[key].numpy()))
    stds["net2"].append(np.std(opposite_class[key].numpy()))
large_model_class.to(device)
large_model_class2.to(device)
print(means)
print(stds)


Test Error: 
 Accuracy: 49.1%, Avg loss: 0.736283 

Test Error: 
 Accuracy: 50.8%, Avg loss: 0.714588 

Test Error: 
 Accuracy: 73.7%, Avg loss: 0.587229 

Test Error: 
 Accuracy: 74.8%, Avg loss: 0.582829 

Test Error: 
 Accuracy: 79.8%, Avg loss: 0.491286 

Test Error: 
 Accuracy: 80.7%, Avg loss: 0.473744 

{'net1': [-6.7121124e-05, 0.0009538211, 0.0036603732, 0.027246706, -6.880879e-05, 0.0009498017, -4.316261e-05, 0.022705873], 'net2': [5.369689e-05, -0.000763057, -0.0029282987, -0.021797365, 5.458728e-05, -0.0007665455, 0.00066451915, -0.017559199]}
{'net1': [0.02578504, 0.026284326, 0.037934456, 0.0, 0.025800886, 0.026292682, 0.060963903, 0.0], 'net2': [0.020628033, 0.02102746, 0.030347565, 0.0, 0.020657178, 0.021030901, 0.057816267, 0.0]}


In [28]:
test(large_test_loader_regress, large_model_regress, loss_fn_regress)
test(large_test_loader_regress, large_model_regress2, loss_fn_regress2)
means = {"net1": [], "net2": []}
stds = {"net1": [], "net2": []}
large_regress = large_model_regress.cpu().state_dict()
opposite_regress = large_model_regress2.cpu().state_dict()
for key in large_regress:
    means["net1"].append(np.mean(large_regress[key].numpy()))
    means["net2"].append(np.mean(opposite_regress[key].numpy()))
    stds["net1"].append(np.std(large_regress[key].numpy()))
    stds["net2"].append(np.std(opposite_regress[key].numpy()))
large_model_regress.to(device)
large_model_regress2.to(device)
train(large_train_loader_regress, large_model_regress, loss_fn_regress, optimizer_regress)
train(large_train_loader_regress, large_model_regress2, loss_fn_regress2, optimizer_regress2)
test(large_test_loader_regress, large_model_regress, loss_fn_regress)
test(large_test_loader_regress, large_model_regress2, loss_fn_regress2)

train(large_train_loader_regress, large_model_regress, loss_fn_regress, optimizer_regress)
train(large_train_loader_regress, large_model_regress2, loss_fn_regress2, optimizer_regress2)
test(large_test_loader_regress, large_model_regress, loss_fn_regress)
test(large_test_loader_regress, large_model_regress2, loss_fn_regress2)
large_regress = large_model_regress.cpu().state_dict()
opposite_regress = large_model_regress2.cpu().state_dict()
for key in large_regress:
    means["net1"].append(np.mean(large_regress[key].numpy()))
    means["net2"].append(np.mean(opposite_regress[key].numpy()))
    stds["net1"].append(np.std(large_regress[key].numpy()))
    stds["net2"].append(np.std(opposite_regress[key].numpy()))
large_model_regress.to(device)
large_model_regress2.to(device)
print(means)
print(stds)

Test Error: 
 Accuracy: 0.0%, Avg loss: 850028.702500 

Test Error: 
 Accuracy: 0.0%, Avg loss: 850034.068750 

Test Error: 
 Accuracy: 0.0%, Avg loss: 36654.905156 

Test Error: 
 Accuracy: 0.0%, Avg loss: 31475.019961 

Test Error: 
 Accuracy: 0.0%, Avg loss: 36135.055469 

Test Error: 
 Accuracy: 0.0%, Avg loss: 33468.321484 

{'net1': [3.850696e-05, 0.001461811, -0.004751329, -0.024107516, -0.54672396, -3.6230526, -3.9212875, 116.00389], 'net2': [-3.850696e-05, -0.001461811, 0.004751329, 0.024107516, 0.14124398, -0.0054369867, 0.7171431, -20.542202]}
{'net1': [0.02578406, 0.025510116, 0.055035055, 0.0, 7.6437316, 12.305326, 48.278015, 0.0], 'net2': [0.02578406, 0.025510116, 0.055035055, 0.0, 7.3358784, 7.71518, 46.25749, 0.0]}


In [20]:
means = {"net1": [], "net2": []}
stds = {"net1": [], "net2": []}
large_class = large_model_class.cpu().state_dict()
opposite_class = large_model_class2.cpu().state_dict()
for key in large_class:
    means["net1"].append(np.mean(large_class[key].numpy()))
    means["net2"].append(np.mean(opposite_class[key].numpy()))
    stds["net1"].append(np.std(large_class[key].numpy()))
    stds["net2"].append(np.std(opposite_class[key].numpy()))
print(means)
print(stds)

{'net1': [-2.0919164e-05, -0.0015514004, 0.00018152373, 0.04110895], 'net2': [2.1551123e-05, 0.0015482809, 0.00029282202, -0.040358137]}
{'net1': [0.025817605, 0.025753267, 0.05653238, 0.0], 'net2': [0.025817648, 0.025751727, 0.05649176, 0.0]}


In [49]:
test(large_test_loader_class, large_model_class, loss_fn_class)
test(large_test_loader_class, large_model_class2, loss_fn_class2)
train(large_train_loader_class, large_model_class, loss_fn_class, optimizer_class)
train(large_train_loader_class, large_model_class2, loss_fn_class2, optimizer_class2)
test(large_test_loader_class, large_model_class, loss_fn_class)
test(large_test_loader_class, large_model_class2, loss_fn_class2)
train(large_train_loader_class, large_model_class, loss_fn_class, optimizer_class)
train(large_train_loader_class, large_model_class2, loss_fn_class2, optimizer_class2)
test(large_test_loader_class, large_model_class, loss_fn_class)
test(large_test_loader_class, large_model_class2, loss_fn_class2)
new_state = copy.deepcopy(large_model_class2.cpu().state_dict())
old_state = copy.deepcopy(large_model_class.cpu().state_dict())
old_state2 = copy.deepcopy(new_state)
for key in new_state:
    new_state[key] = (old_state[key] * old_state2[key])
large_model_class.load_state_dict(new_state)
large_model_class.to(device)
test(large_test_loader_class, large_model_class, loss_fn_class)
train(large_train_loader_class, large_model_class, loss_fn_class, optimizer_class)
test(large_test_loader_class, large_model_class, loss_fn_class)


Test Error: 
 Accuracy: 0.0%, Avg loss: 0.703579 

Test Error: 
 Accuracy: 0.0%, Avg loss: 0.699592 

Test Error: 
 Accuracy: 0.0%, Avg loss: 0.597723 

Test Error: 
 Accuracy: 0.0%, Avg loss: 0.592114 

Test Error: 
 Accuracy: 0.0%, Avg loss: 0.489344 

Test Error: 
 Accuracy: 0.0%, Avg loss: 0.466844 

Test Error: 
 Accuracy: 0.0%, Avg loss: 0.714294 

Test Error: 
 Accuracy: 0.0%, Avg loss: 0.629636 



In [65]:
test(large_test_loader_regress, large_model_regress, loss_fn_regress)
test(large_test_loader_regress, large_model_regress2, loss_fn_regress2)
train(large_train_loader_regress, large_model_regress, loss_fn_regress, optimizer_regress)
train(large_train_loader_regress, large_model_regress2, loss_fn_regress2, optimizer_regress2)
test(large_test_loader_regress, large_model_regress, loss_fn_regress)
test(large_test_loader_regress, large_model_regress2, loss_fn_regress2)
train(large_train_loader_regress, large_model_regress, loss_fn_regress, optimizer_regress)
train(large_train_loader_regress, large_model_regress2, loss_fn_regress2, optimizer_regress2)
test(large_test_loader_regress, large_model_regress, loss_fn_regress)
test(large_test_loader_regress, large_model_regress2, loss_fn_regress2)
new_state = copy.deepcopy(large_model_regress2.cpu().state_dict())
old_state = copy.deepcopy(large_model_regress.cpu().state_dict())
old_state2 = copy.deepcopy(new_state)
for key in new_state:
    new_state[key] = (old_state[key] + old_state2[key]) / 2.0
large_model_regress.load_state_dict(new_state)
large_model_regress.to(device)
test(large_test_loader_regress, large_model_regress, loss_fn_regress)
train(large_train_loader_regress, large_model_regress, loss_fn_regress, optimizer_regress)
test(large_test_loader_regress, large_model_regress, loss_fn_regress)
train(large_train_loader_regress, large_model_regress, loss_fn_regress, optimizer_regress)
test(large_test_loader_regress, large_model_regress, loss_fn_regress)

Test Error: 
 Accuracy: 0.0%, Avg loss: 29253.105234 

Test Error: 
 Accuracy: 0.0%, Avg loss: 850023.845000 

Test Error: 
 Accuracy: 0.0%, Avg loss: 30040.762773 

Test Error: 
 Accuracy: 0.0%, Avg loss: 31703.303672 

Test Error: 
 Accuracy: 0.0%, Avg loss: 28897.320234 

Test Error: 
 Accuracy: 0.0%, Avg loss: 30902.173125 

Test Error: 
 Accuracy: 0.0%, Avg loss: 146203.059688 

Test Error: 
 Accuracy: 0.0%, Avg loss: 36925.466445 

Test Error: 
 Accuracy: 0.0%, Avg loss: 37468.502539 



In [64]:
epochs = 10
for _ in range(epochs):
    train(large_train_loader_regress, large_model_regress, loss_fn_regress, optimizer_regress)
    test(large_test_loader_regress, large_model_regress, loss_fn_regress)


Test Error: 
 Accuracy: 0.0%, Avg loss: 31693.382930 

Test Error: 
 Accuracy: 0.0%, Avg loss: 31921.102305 

Test Error: 
 Accuracy: 0.0%, Avg loss: 31845.476602 

Test Error: 
 Accuracy: 0.0%, Avg loss: 32323.142109 

Test Error: 
 Accuracy: 0.0%, Avg loss: 31885.955430 

Test Error: 
 Accuracy: 0.0%, Avg loss: 31332.490156 

Test Error: 
 Accuracy: 0.0%, Avg loss: 30379.478828 

Test Error: 
 Accuracy: 0.0%, Avg loss: 30295.531094 

Test Error: 
 Accuracy: 0.0%, Avg loss: 30584.742852 

Test Error: 
 Accuracy: 0.0%, Avg loss: 29253.105273 

