In [1]:
from importlib import reload
import weight_avg_trees_best
reload(weight_avg_trees_best)
from weight_avg_trees_best import weight_avg_2_classification, regular_classification, weight_avg_8_classification
import torch
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from tqdm import tqdm
import pandas as pd
from torch.utils.data import Dataset, DataLoader

device_in_use = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device_in_use)

cpu


In [2]:
# Load the data
ch = load_breast_cancer()
df = pd.DataFrame(data=ch.data, columns=ch.feature_names)
# Assuming `ch.target` is the target variable
df['Target'] = ch.target

# Splitting the dataset
df_train, df_temp = train_test_split(df, train_size=0.70, random_state=42)
df_val, df_test = train_test_split(df_temp, train_size=0.5, random_state=42)

# Separate the target variable
y_train = df_train['Target'].values
y_val = df_val['Target'].values
y_test = df_test['Target'].values

# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler to the training data (excluding the target variable) and transform it
scaled_train_features = scaler.fit_transform(df_train.drop(columns=['Target']))

# Transform the validation and test data (excluding the target variable)
scaled_val_features = scaler.transform(df_val.drop(columns=['Target']))
scaled_test_features = scaler.transform(df_test.drop(columns=['Target']))

# Recombine scaled features with target variable
df_scaled_train = pd.DataFrame(scaled_train_features, columns=df_train.columns[:-1])  # Excluding the target variable column
df_scaled_train['Target'] = y_train

df_scaled_val = pd.DataFrame(scaled_val_features, columns=df_val.columns[:-1])
df_scaled_val['Target'] = y_val

df_scaled_test = pd.DataFrame(scaled_test_features, columns=df_test.columns[:-1])
df_scaled_test['Target'] = y_test

class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.features = dataframe.drop('Target', axis=1).values
        self.labels = dataframe['Target'].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return torch.tensor(self.features[idx], dtype=torch.float), torch.tensor(self.labels[idx], dtype=torch.long)

train_dataset = CustomDataset(df_scaled_train)
val_dataset = CustomDataset(df_scaled_val)
test_dataset = CustomDataset(df_scaled_test)

train_loader = DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=len(val_dataset), shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

In [3]:
# Define a function to move an entire dataset to the device in advance
def preload_dataset_to_device(loader, batch_size, device):
    preloaded_data = [(inputs.to(device), targets.to(device)) for inputs, targets in loader]
    return DataLoader(preloaded_data, batch_size=batch_size)

# Preload datasets to device (if they fit into your device memory)
train_loader = preload_dataset_to_device(train_loader, len(train_dataset), device_in_use)
val_loader = preload_dataset_to_device(val_loader, len(val_dataset), device_in_use)

In [4]:
input_size = scaled_train_features.shape[1]
output_size = 2
test_loss_dic, train_loss_dic = weight_avg_2_classification(200, train_loader, val_loader, input_size, output_size, device_in_use, model = 'NN') 

train_loss_dic

{'m1_epoch_0': {'loss': 0.7152990698814392, 'accuracy': 58.82353210449219},
 'm1_epoch_1': {'loss': 0.6583237648010254, 'accuracy': 65.88235473632812},
 'm1_epoch_2': {'loss': 0.6061540246009827, 'accuracy': 76.47058868408203},
 'm1_epoch_3': {'loss': 0.5588969588279724, 'accuracy': 83.52941131591797},
 'm1_epoch_4': {'loss': 0.5164943933486938, 'accuracy': 89.4117660522461},
 'm1_epoch_5': {'loss': 0.4787095785140991, 'accuracy': 94.11764526367188},
 'm1_epoch_6': {'loss': 0.445175439119339, 'accuracy': 95.29412078857422},
 'm1_epoch_7': {'loss': 0.4154502749443054, 'accuracy': 96.47058868408203},
 'm1_epoch_8': {'loss': 0.38907748460769653, 'accuracy': 96.47058868408203},
 'm1_epoch_9': {'loss': 0.3656213581562042, 'accuracy': 95.29412078857422},
 'm1_epoch_10': {'loss': 0.34469136595726013, 'accuracy': 95.29412078857422},
 'm1_epoch_11': {'loss': 0.3259486258029938, 'accuracy': 95.29412078857422},
 'm1_epoch_12': {'loss': 0.3091065287590027, 'accuracy': 96.47058868408203},
 'm1_epoc

In [5]:
test_loss_dic.items()

dict_items([('m1_epoch_0', {'loss': 0.7544164061546326, 'accuracy': 47.4874382019043}), ('m1_epoch_1', {'loss': 0.7015863656997681, 'accuracy': 59.04522705078125}), ('m1_epoch_2', {'loss': 0.6524832844734192, 'accuracy': 70.8542709350586}), ('m1_epoch_3', {'loss': 0.6072279810905457, 'accuracy': 78.89447021484375}), ('m1_epoch_4', {'loss': 0.5658337473869324, 'accuracy': 86.43215942382812}), ('m1_epoch_5', {'loss': 0.5282000303268433, 'accuracy': 89.44723510742188}), ('m1_epoch_6', {'loss': 0.494128555059433, 'accuracy': 91.70854187011719}), ('m1_epoch_7', {'loss': 0.4633628726005554, 'accuracy': 92.9648208618164}), ('m1_epoch_8', {'loss': 0.4356171488761902, 'accuracy': 92.71356964111328}), ('m1_epoch_9', {'loss': 0.41060125827789307, 'accuracy': 93.46733856201172}), ('m1_epoch_10', {'loss': 0.38803473114967346, 'accuracy': 93.21607971191406}), ('m1_epoch_11', {'loss': 0.36765721440315247, 'accuracy': 93.71859741210938}), ('m1_epoch_12', {'loss': 0.3492318093776703, 'accuracy': 93.718

In [6]:
def best_metrics(d):
    """
    Returns the keys corresponding to the minimum loss and maximum accuracy from the dictionary.
    Each entry in the dictionary is expected to be a dictionary with 'loss' and 'accuracy' as keys.
    """
    min_loss = float('inf')
    max_accuracy = float('-inf')
    key_min_loss = None
    key_max_accuracy = None

    for key, metrics in d.items():
        if metrics['loss'] < min_loss:
            min_loss = metrics['loss']
            key_min_loss = key
        if metrics['accuracy'] > max_accuracy:
            max_accuracy = metrics['accuracy']
            key_max_accuracy = key

    return {'min_loss': (key_min_loss, min_loss), 'max_accuracy': (key_max_accuracy, max_accuracy)}

best = best_metrics(train_loss_dic)
best

{'min_loss': ('m3_epoch_65', 0.08298847079277039),
 'max_accuracy': ('m1_epoch_31', 97.64705657958984)}

In [7]:
best['min_loss'][1]

0.08298847079277039

In [8]:
input_size = scaled_train_features.shape[1]
output_size = 2

train_loss_traditional = []
test_loss_traditional = []
test_acc_traditional = []

train_loss_2 = []
test_loss_2 = []
test_acc_2 = []

train_loss_8 = []
test_loss_8 = []
test_acc_8 = []

#105 is the lcm(3,7,15)
import numpy as np
from tqdm import tqdm
for i in tqdm(range(200)):

    test_loss_dic, train_loss_dic = regular_classification(3*15*5, train_loader, val_loader, input_size, output_size, device_in_use, model = 'NN') 
    testval = best_metrics(test_loss_dic)['min_loss'][1]
    trainval = best_metrics(train_loss_dic)['min_loss'][1]

    testacc = best_metrics(test_loss_dic)['max_accuracy'][1]

    train_loss_traditional.append(trainval) 
    test_loss_traditional.append(testval)

    test_acc_traditional.append(testacc)

    test_loss_dic, train_loss_dic = weight_avg_2_classification(3*15*5, train_loader, val_loader, input_size, output_size, device_in_use, model = 'NN') 

    testval = best_metrics(test_loss_dic)['min_loss'][1]
    trainval = best_metrics(train_loss_dic)['min_loss'][1]

    testacc = best_metrics(test_loss_dic)['max_accuracy'][1]

    test_acc_2.append(testacc)

    train_loss_2.append(trainval) 
    test_loss_2.append(testval)

    # test_loss_dic, train_loss_dic = weight_avg_8_classification(3*15*5, train_loader, val_loader, input_size, output_size, device_in_use, model = 'NN') 

    # testval = best_metrics(test_loss_dic)['min_loss'][1]
    # trainval = best_metrics(train_loss_dic)['min_loss'][1]

    # testacc = best_metrics(test_loss_dic)['max_accuracy'][1]

    # test_acc_8.append(testacc)

    # train_loss_8.append(trainval) 
    # test_loss_8.append(testval)

print('LOSS')
print("Traditional:",np.mean(test_loss_traditional),np.std(test_loss_traditional))
print("Weight Avg 2 Base Models:",np.mean(test_loss_2),np.std(test_loss_2))
# print("Weight avg 8 loss: ",np.mean(test_loss_8),np.std(test_loss_8))
print('ACC')
print("Traditional:",np.mean(test_acc_traditional),np.std(test_acc_traditional))
print("Weight Avg 2 Base Models:",np.mean(test_acc_2),np.std(test_acc_2))
# print("Weight avg 8 acc: ",np.mean(test_acc_8),np.std(test_acc_8))
# print("Weight Avg 8 Base Models:",np.mean(test_loss_8),np.std(test_loss_8))

100%|██████████| 200/200 [32:24<00:00,  9.72s/it]

LOSS
Traditional: 0.05579584740102291 0.002557121539348141
Weight Avg 2 Base Models: 0.07446974758058786 0.0015969273878280114
ACC
Traditional: 99.02387142181396 0.2318878840803102
Weight Avg 2 Base Models: 98.30653118133544 0.14702277183879106





In [None]:
input_size = scaled_train_features.shape[1]
output_size = 2

train_loss_traditional = []
test_loss_traditional = []

train_loss_2 = []
test_loss_2 = []

train_loss_8 = []
test_loss_8 = []

#105 is the lcm(3,7,15)

from tqdm import tqdm
for i in tqdm(range(500)):

    test_loss_dic, train_loss_dic = regular_classification(3*15*5, train_loader, val_loader, input_size, output_size, device_in_use, model = 'Linear') 
    _, testval = keywithminval(test_loss_dic)
    _, trainval = keywithminval(train_loss_dic)

    train_loss_traditional.append(trainval) 
    test_loss_traditional.append(testval)

    test_loss_dic, train_loss_dic = weight_avg_2_classification(3*15*5, train_loader, val_loader, input_size, output_size, device_in_use, model = 'Linear') 

    _, testval = keywithminval(test_loss_dic)
    _, trainval = keywithminval(train_loss_dic)

    train_loss_2.append(trainval) 
    test_loss_2.append(testval)

    test_loss_dic, train_loss_dic = weight_avg_8_classification(3*15*5, train_loader, val_loader, input_size, output_size, device_in_use, model = 'Linear') 

    _, testval = keywithminval(test_loss_dic)
    _, trainval = keywithminval(train_loss_dic)

    train_loss_8.append(trainval) 
    test_loss_8.append(testval)


print("Traditional:",np.mean(test_loss_traditional),np.std(test_loss_traditional))
print("Weight Avg 2 Base Models:",np.mean(test_loss_2),np.std(test_loss_2))
print("Weight Avg 8 Base Models:",np.mean(test_loss_8),np.std(test_loss_8))

100%|██████████| 500/500 [18:04<00:00,  2.17s/it]

Traditional: 0.12413684768974781 0.004761746924787912
Weight Avg 2 Base Models: 0.14461213859915734 0.004461780008218083
Weight Avg 8 Base Models: 0.21363147780299185 0.005292064879879165



