In [1]:
from weight_avg_trees_best import *
import torch
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from tqdm import tqdm
device_in_use = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device_in_use)

cuda


In [2]:
# Load the data
ch = load_breast_cancer()
df = pd.DataFrame(data=ch.data, columns=ch.feature_names)
# Assuming `ch.target` is the target variable
df['Target'] = ch.target

# Splitting the dataset
df_train, df_temp = train_test_split(df, train_size=0.70, random_state=42)
df_val, df_test = train_test_split(df_temp, train_size=0.5, random_state=42)

# Separate the target variable
y_train = df_train['Target'].values
y_val = df_val['Target'].values
y_test = df_test['Target'].values

# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler to the training data (excluding the target variable) and transform it
scaled_train_features = scaler.fit_transform(df_train.drop(columns=['Target']))

# Transform the validation and test data (excluding the target variable)
scaled_val_features = scaler.transform(df_val.drop(columns=['Target']))
scaled_test_features = scaler.transform(df_test.drop(columns=['Target']))

# Recombine scaled features with target variable
df_scaled_train = pd.DataFrame(scaled_train_features, columns=df_train.columns[:-1])  # Excluding the target variable column
df_scaled_train['Target'] = y_train

df_scaled_val = pd.DataFrame(scaled_val_features, columns=df_val.columns[:-1])
df_scaled_val['Target'] = y_val

df_scaled_test = pd.DataFrame(scaled_test_features, columns=df_test.columns[:-1])
df_scaled_test['Target'] = y_test

class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.features = dataframe.drop('Target', axis=1).values
        self.labels = dataframe['Target'].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return torch.tensor(self.features[idx], dtype=torch.float), torch.tensor(self.labels[idx], dtype=torch.long)

train_dataset = CustomDataset(df_scaled_train)
val_dataset = CustomDataset(df_scaled_val)
test_dataset = CustomDataset(df_scaled_test)

train_loader = DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=len(val_dataset), shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

In [3]:
# Define a function to move an entire dataset to the device in advance
def preload_dataset_to_device(loader, batch_size, device):
    preloaded_data = [(inputs.to(device), targets.to(device)) for inputs, targets in loader]
    return DataLoader(preloaded_data, batch_size=batch_size)

# Preload datasets to device (if they fit into your device memory)
train_loader = preload_dataset_to_device(train_loader, len(train_dataset), device_in_use)
val_loader = preload_dataset_to_device(val_loader, len(val_dataset), device_in_use)

In [7]:
input_size = scaled_train_features.shape[1]
output_size = 2
test_loss_dic, train_loss_dic = weight_avg_2_classification(1200, train_loader, val_loader, input_size, output_size, device_in_use, model = 'NN') 

test_loss_dic

{'m1_0': 0.742535412311554,
 'm1_1': 0.7029932141304016,
 'm1_2': 0.667330265045166,
 'm1_3': 0.6351428627967834,
 'm1_4': 0.606048583984375,
 'm1_5': 0.5796959400177002,
 'm1_6': 0.5557683110237122,
 'm1_7': 0.5339845418930054,
 'm1_8': 0.5140975713729858,
 'm1_9': 0.495890349149704,
 'm1_10': 0.47917452454566956,
 'm1_11': 0.46378520131111145,
 'm1_12': 0.4495788514614105,
 'm1_13': 0.4364304840564728,
 'm1_14': 0.4242306053638458,
 'm1_15': 0.4128835201263428,
 'm1_16': 0.40230515599250793,
 'm1_17': 0.39242181181907654,
 'm1_18': 0.38316822052001953,
 'm1_19': 0.3744869530200958,
 'm1_20': 0.3663269281387329,
 'm1_21': 0.3586428463459015,
 'm1_22': 0.3513944149017334,
 'm1_23': 0.3445455729961395,
 'm1_24': 0.338064044713974,
 'm1_25': 0.331920862197876,
 'm1_26': 0.32609009742736816,
 'm1_27': 0.32054808735847473,
 'm1_28': 0.3152737021446228,
 'm1_29': 0.3102477490901947,
 'm1_30': 0.30545279383659363,
 'm1_31': 0.30087295174598694,
 'm1_32': 0.2964937686920166,
 'm1_33': 0.29230