In [1]:
from weight_avg_trees_best import *
import torch
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from tqdm import tqdm
device_in_use = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device_in_use)

cpu


In [2]:
# Load the data
ch = load_breast_cancer()
df = pd.DataFrame(data=ch.data, columns=ch.feature_names)
# Assuming `ch.target` is the target variable
df['Target'] = ch.target

# Splitting the dataset
df_train, df_temp = train_test_split(df, train_size=0.70, random_state=42)
df_val, df_test = train_test_split(df_temp, train_size=0.5, random_state=42)

# Separate the target variable
y_train = df_train['Target'].values
y_val = df_val['Target'].values
y_test = df_test['Target'].values

# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler to the training data (excluding the target variable) and transform it
scaled_train_features = scaler.fit_transform(df_train.drop(columns=['Target']))

# Transform the validation and test data (excluding the target variable)
scaled_val_features = scaler.transform(df_val.drop(columns=['Target']))
scaled_test_features = scaler.transform(df_test.drop(columns=['Target']))

# Recombine scaled features with target variable
df_scaled_train = pd.DataFrame(scaled_train_features, columns=df_train.columns[:-1])  # Excluding the target variable column
df_scaled_train['Target'] = y_train

df_scaled_val = pd.DataFrame(scaled_val_features, columns=df_val.columns[:-1])
df_scaled_val['Target'] = y_val

df_scaled_test = pd.DataFrame(scaled_test_features, columns=df_test.columns[:-1])
df_scaled_test['Target'] = y_test

class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.features = dataframe.drop('Target', axis=1).values
        self.labels = dataframe['Target'].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return torch.tensor(self.features[idx], dtype=torch.float), torch.tensor(self.labels[idx], dtype=torch.long)

train_dataset = CustomDataset(df_scaled_train)
val_dataset = CustomDataset(df_scaled_val)
test_dataset = CustomDataset(df_scaled_test)

train_loader = DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=len(val_dataset), shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

In [3]:
# Define a function to move an entire dataset to the device in advance
def preload_dataset_to_device(loader, batch_size, device):
    preloaded_data = [(inputs.to(device), targets.to(device)) for inputs, targets in loader]
    return DataLoader(preloaded_data, batch_size=batch_size)

# Preload datasets to device (if they fit into your device memory)
train_loader = preload_dataset_to_device(train_loader, len(train_dataset), device_in_use)
val_loader = preload_dataset_to_device(val_loader, len(val_dataset), device_in_use)

In [4]:
input_size = scaled_train_features.shape[1]
output_size = 2
test_loss_dic, train_loss_dic = weight_avg_8_classification(200, train_loader, val_loader, input_size, output_size, device_in_use, model = 'NN') 

test_loss_dic

{'m1_0': 0.687580406665802,
 'm1_1': 0.637185275554657,
 'm1_2': 0.5911945700645447,
 'm1_3': 0.5494576692581177,
 'm1_4': 0.5117207169532776,
 'm1_5': 0.47767308354377747,
 'm1_6': 0.44697847962379456,
 'm1_7': 0.41930603981018066,
 'm1_8': 0.3943415582180023,
 'm1_9': 0.3718012571334839,
 'm1_10': 0.3514251708984375,
 'm1_11': 0.3329841196537018,
 'm1_12': 0.31627365946769714,
 'm2_0': 0.7699902653694153,
 'm2_1': 0.709478497505188,
 'm2_2': 0.6538681387901306,
 'm2_3': 0.6032981276512146,
 'm2_4': 0.5577084422111511,
 'm2_5': 0.5168600082397461,
 'm2_6': 0.48038870096206665,
 'm2_7': 0.44786983728408813,
 'm2_8': 0.4188659191131592,
 'm2_9': 0.3929622769355774,
 'm2_10': 0.3697820007801056,
 'm2_11': 0.34899309277534485,
 'm2_12': 0.33030810952186584,
 'm3_0': 0.6430279016494751,
 'm3_1': 0.593492329120636,
 'm3_2': 0.5488672256469727,
 'm3_3': 0.5089380145072937,
 'm3_4': 0.47332748770713806,
 'm3_5': 0.44158968329429626,
 'm3_6': 0.4132705628871918,
 'm3_7': 0.3879481256008148,
 '

In [5]:
def keywithminval(d):
     """ a) create a list of the dict's keys and values; 
         b) return the key with the max value"""  
     v = list(d.values())
     k = list(d.keys())
     return k[v.index(min(v))], min(v)

keywithminval(test_loss_dic)

('m10_12', 0.2509273886680603)

In [6]:
input_size = scaled_train_features.shape[1]
output_size = 2

train_loss_traditional = []
test_loss_traditional = []

train_loss_2 = []
test_loss_2 = []

train_loss_8 = []
test_loss_8 = []

#105 is the lcm(3,7,15)

from tqdm import tqdm
for i in tqdm(range(25)):

    test_loss_dic, train_loss_dic = regular_classification(3*15*5, train_loader, val_loader, input_size, output_size, device_in_use, model = 'NN') 
    _, testval = keywithminval(test_loss_dic)
    _, trainval = keywithminval(train_loss_dic)

    train_loss_traditional.append(trainval) 
    test_loss_traditional.append(testval)

    test_loss_dic, train_loss_dic = weight_avg_2_classification(3*15*5, train_loader, val_loader, input_size, output_size, device_in_use, model = 'NN') 

    _, testval = keywithminval(test_loss_dic)
    _, trainval = keywithminval(train_loss_dic)

    train_loss_2.append(trainval) 
    test_loss_2.append(testval)

    test_loss_dic, train_loss_dic = weight_avg_8_classification(3*15*5, train_loader, val_loader, input_size, output_size, device_in_use, model = 'NN') 

    _, testval = keywithminval(test_loss_dic)
    _, trainval = keywithminval(train_loss_dic)

    train_loss_8.append(trainval) 
    test_loss_8.append(testval)


print("Traditional:",np.mean(test_loss_traditional),np.std(test_loss_traditional))
print("Weight Avg 2 Base Models:",np.mean(test_loss_2),np.std(test_loss_2))
print("Weight Avg 8 Base Models:",np.mean(test_loss_8),np.std(test_loss_8))

  0%|          | 0/25 [00:00<?, ?it/s]

100%|██████████| 25/25 [06:40<00:00, 16.02s/it]

Traditional: 0.07820585042238236 0.0013544073829742148
Weight Avg 2 Base Models: 0.0783040863275528 0.0016185683179212927
Weight Avg 8 Base Models: 0.21884826600551605 0.0066553451648152335





: 

In [None]:
input_size = scaled_train_features.shape[1]
output_size = 2

train_loss_traditional = []
test_loss_traditional = []

train_loss_2 = []
test_loss_2 = []

train_loss_8 = []
test_loss_8 = []

#105 is the lcm(3,7,15)

from tqdm import tqdm
for i in tqdm(range(500)):

    test_loss_dic, train_loss_dic = regular_classification(3*15*5, train_loader, val_loader, input_size, output_size, device_in_use, model = 'Linear') 
    _, testval = keywithminval(test_loss_dic)
    _, trainval = keywithminval(train_loss_dic)

    train_loss_traditional.append(trainval) 
    test_loss_traditional.append(testval)

    test_loss_dic, train_loss_dic = weight_avg_2_classification(3*15*5, train_loader, val_loader, input_size, output_size, device_in_use, model = 'Linear') 

    _, testval = keywithminval(test_loss_dic)
    _, trainval = keywithminval(train_loss_dic)

    train_loss_2.append(trainval) 
    test_loss_2.append(testval)

    test_loss_dic, train_loss_dic = weight_avg_8_classification(3*15*5, train_loader, val_loader, input_size, output_size, device_in_use, model = 'Linear') 

    _, testval = keywithminval(test_loss_dic)
    _, trainval = keywithminval(train_loss_dic)

    train_loss_8.append(trainval) 
    test_loss_8.append(testval)


print("Traditional:",np.mean(test_loss_traditional),np.std(test_loss_traditional))
print("Weight Avg 2 Base Models:",np.mean(test_loss_2),np.std(test_loss_2))
print("Weight Avg 8 Base Models:",np.mean(test_loss_8),np.std(test_loss_8))

100%|██████████| 500/500 [18:04<00:00,  2.17s/it]

Traditional: 0.12413684768974781 0.004761746924787912
Weight Avg 2 Base Models: 0.14461213859915734 0.004461780008218083
Weight Avg 8 Base Models: 0.21363147780299185 0.005292064879879165



