In [1]:
from weight_avg_trees import LinearModel, weight_avg_2, weight_avg_4, weight_avg_8, regular
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader

device_in_use = 'cuda'

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing

#reading in dataset
ch = fetch_california_housing()
df = pd.DataFrame(data=ch.data, columns=ch.feature_names)

df_train, df_temp = train_test_split(df, train_size=.70, random_state=42)
df_val, df_test = train_test_split(df_temp, train_size=.5, random_state=42)

print(df_train.shape[0])
print(df_val.shape[0])
print(df_test.shape[0])

class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.features = dataframe.drop('MedInc', axis=1).values
        self.labels = dataframe['MedInc'].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return torch.tensor(self.features[idx], dtype=torch.float), torch.tensor(self.labels[idx], dtype=torch.float)

train_dataset = CustomDataset(df_train)
val_dataset = CustomDataset(df_val)
test_dataset = CustomDataset(df_test)

train_loader = DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=len(val_dataset), shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

14447
3096
3097


In [3]:
# Define a function to move an entire dataset to the device in advance
def preload_dataset_to_device(loader, batch_size, device):
    preloaded_data = [(inputs.to(device), targets.to(device)) for inputs, targets in loader]
    return DataLoader(preloaded_data, batch_size=batch_size)

# Preload datasets to device (if they fit into your device memory)
train_loader = preload_dataset_to_device(train_loader, len(train_dataset), device_in_use)
val_loader = preload_dataset_to_device(val_loader, len(val_dataset), device_in_use)

In [7]:
input_size = 7  
output_size = 1  

train_loss_traditional = []
test_loss_traditional = []

train_loss_2 = []
test_loss_2 = []

train_loss_4 = []
test_loss_4 = []

train_loss_8 = []
test_loss_8 = []

#105 is the lcm(3,7,15)

from tqdm import tqdm
for i in tqdm(range(10000)):

    testloss, trainloss = regular(105, train_loader, val_loader, input_size, output_size, device_in_use) 

    train_loss_traditional.append(trainloss) 
    test_loss_traditional.append(testloss)

    testloss, trainloss = weight_avg_2(105, train_loader, val_loader, input_size, output_size, device_in_use) 

    train_loss_2.append(trainloss) 
    test_loss_2.append(testloss)

    testloss, trainloss = weight_avg_4(105, train_loader, val_loader, input_size, output_size, device_in_use) 

    train_loss_4.append(trainloss) 
    test_loss_4.append(testloss)

    testloss, trainloss = weight_avg_8(105, train_loader, val_loader, input_size, output_size, device_in_use) 

    train_loss_8.append(trainloss) 
    test_loss_8.append(testloss)


print("Traditional:",np.mean(test_loss_traditional),np.std(test_loss_traditional))
print("Weight Avg 2 Base Models:",np.mean(test_loss_2),np.std(train_loss_2))
print("Weight Avg 4 Base Models:",np.mean(test_loss_4),np.std(train_loss_4))
print("Weight Avg 8 Base Models:",np.mean(test_loss_8),np.std(train_loss_8))

100%|██████████| 10000/10000 [2:05:02<00:00,  1.33it/s] 

Traditional: 196.44563788416386 199.17977393808368
Weight Avg 2 Base Models: 118.64311773900985 139.12516133189243
Weight Avg 4 Base Models: 71.95023518414497 87.7739766108237
Weight Avg 8 Base Models: 43.446090750408175 53.67336261702516



