In [1]:
import os

import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import grad
from torchvision import transforms
from torchvision import datasets
import torchvision.datasets.utils as dataset_utils
from site import addsitedir
addsitedir("/home/wangqihang/MyContinualLearning/visual")
from utils import metrics, datasets, nn_utils
import utils.others as others

## Data Preparation

In [2]:
from sklearn.model_selection import train_test_split
import pandas as pd

import os

def split_feature_label_datasets(files, p=0.5):
    for feature_file, label_file in files:
        # Extract the base filename without extension for saving the split datasets
        feature_base_name = os.path.basename(feature_file)
        label_base_name = os.path.basename(label_file)
        feature_name_without_extension = os.path.splitext(feature_base_name)[0]
        label_name_without_extension = os.path.splitext(label_base_name)[0]
        
        # Read the CSV files
        feature_data = pd.read_csv(feature_file, header=None)
        label_data = pd.read_csv(label_file, header=None)
        
        # Perform the split
        train_feature_data, test_feature_data, train_label_data, test_label_data = train_test_split(
            feature_data, label_data, test_size=p, random_state=42)
        
        # Save the mini train and test datasets
        train_feature_file = os.path.join(os.path.dirname(feature_file), f"mini_train_{feature_name_without_extension}.csv")
        test_feature_file = os.path.join(os.path.dirname(feature_file), f"mini_test_{feature_name_without_extension}.csv")
        train_label_file = os.path.join(os.path.dirname(label_file), f"mini_train_{label_name_without_extension}.csv")
        test_label_file = os.path.join(os.path.dirname(label_file), f"mini_test_{label_name_without_extension}.csv")
        
        train_feature_data.to_csv(train_feature_file, index=False, header=False)
        test_feature_data.to_csv(test_feature_file, index=False, header=False)
        train_label_data.to_csv(train_label_file, index=False, header=False)
        test_label_data.to_csv(test_label_file, index=False, header=False)

PREPATH = "/home/wangqihang/MyContinualLearning/store/datasets/QoT/"

TRAINs = []

CHANNEL_TRAINs = []
CHANNEL_TESTs = []

TIMEVAR_TRAINs = []
TIMEVAR_TESTs = []

DROPED_TRAINs = []
DROPED_TESTs = []
for i in range(5):
    TRAINs.append((PREPATH + f"X_train_{str(i+1)}.csv", PREPATH + f"y_train_{str(i+1)}.csv"))

for i in range(5):
    CHANNEL_TESTs.append((PREPATH + f"X_test_1_{str(i+1)}.csv", PREPATH + f"y_test_1_{str(i+1)}.csv"))
split_feature_label_datasets(CHANNEL_TESTs, p=0.5)
CHANNEL_TESTs = []
for i in range(5):
    CHANNEL_TESTs.append((PREPATH + f"mini_test_X_test_1_{str(i+1)}.csv", PREPATH + f"mini_test_y_test_1_{str(i+1)}.csv"))
    CHANNEL_TRAINs.append((PREPATH + f"mini_train_X_test_1_{str(i+1)}.csv", PREPATH + f"mini_train_y_test_1_{str(i+1)}.csv"))

for i in range(6):
    TIMEVAR_TESTs.append((PREPATH + f"X_test_2_{str(i+1)}.csv", PREPATH + f"y_test_2_{str(i+1)}.csv"))
split_feature_label_datasets(TIMEVAR_TESTs, p=0.5)
TIMEVAR_TESTs = []
for i in range(6):
    TIMEVAR_TESTs.append((PREPATH + f"mini_test_X_test_2_{str(i+1)}.csv", PREPATH + f"mini_test_y_test_2_{str(i+1)}.csv"))
    TIMEVAR_TRAINs.append((PREPATH + f"mini_train_X_test_2_{str(i+1)}.csv", PREPATH + f"mini_train_y_test_2_{str(i+1)}.csv"))

for i in range(3):
    DROPED_TESTs.append((PREPATH + f"X_test_3_{str(i+1)}.csv", PREPATH + f"y_test_3_{str(i+1)}.csv"))
split_feature_label_datasets(DROPED_TESTs, p=0.5)
DROPED_TESTs = []
for i in range(3):
    DROPED_TESTs.append((PREPATH + f"mini_test_X_test_3_{str(i+1)}.csv", PREPATH + f"mini_test_y_test_3_{str(i+1)}.csv"))
    DROPED_TRAINs.append((PREPATH + f"mini_train_X_test_3_{str(i+1)}.csv", PREPATH + f"mini_train_y_test_3_{str(i+1)}.csv"))

split_feature_label_datasets([[PREPATH + "X_train_1.csv", PREPATH + "y_train_1.csv"]], p=0.2)

Train_dataset = nn_utils.dataset(PREPATH + "mini_train_X_train_1.csv", PREPATH + "mini_train_y_train_1.csv", preprocessing=True)
Train_loader = nn_utils.loader(Train_dataset, batch_size=len(Train_dataset), shuffle=False)

Test_dataset = nn_utils.dataset(PREPATH + "mini_test_X_train_1.csv", PREPATH + "mini_test_y_train_1.csv", transform=[Train_dataset.X_scaler, Train_dataset.y_scaler])
Test_loader = nn_utils.loader(Test_dataset, batch_size=len(Test_dataset), shuffle=False)

Channel_train_loader = []
Time_train_loader = []
Drop_train_loader = []

Channel_test_loader = []
Time_test_loader = []
Drop_test_loader = []

for i in range(5):
    test_dataset = nn_utils.dataset(*CHANNEL_TESTs[i], transform=[Train_dataset.X_scaler, Train_dataset.y_scaler])
    test_dataloader = nn_utils.loader(test_dataset, batch_size=len(test_dataset), shuffle=False)
    Channel_test_loader.append(test_dataloader)
    train_dataset = nn_utils.dataset(*CHANNEL_TRAINs[i], transform=[Train_dataset.X_scaler, Train_dataset.y_scaler])
    train_dataloader = nn_utils.loader(train_dataset, batch_size=len(train_dataset), shuffle=False)
    Channel_train_loader.append(train_dataloader)

for i in range(6):
    test_dataset = nn_utils.dataset(*TIMEVAR_TESTs[i], transform=[Train_dataset.X_scaler, Train_dataset.y_scaler])
    test_dataloader = nn_utils.loader(test_dataset, batch_size=len(test_dataset), shuffle=False)
    Time_test_loader.append(test_dataloader)
    train_dataset = nn_utils.dataset(*TIMEVAR_TRAINs[i], transform=[Train_dataset.X_scaler, Train_dataset.y_scaler])
    train_dataloader = nn_utils.loader(train_dataset, batch_size=len(train_dataset), shuffle=False)
    Time_train_loader.append(train_dataloader)
    
for i in range(3):
    test_dataset = nn_utils.dataset(*DROPED_TESTs[i], transform=[Train_dataset.X_scaler, Train_dataset.y_scaler])
    test_dataloader = nn_utils.loader(test_dataset, batch_size=len(test_dataset), shuffle=False)
    Drop_test_loader.append(test_dataloader)
    train_dataset = nn_utils.dataset(*DROPED_TRAINs[i], transform=[Train_dataset.X_scaler, Train_dataset.y_scaler])
    train_dataloader = nn_utils.loader(train_dataset, batch_size=len(train_dataset), shuffle=False)
    Drop_train_loader.append(train_dataloader)



## Util Functions

In [3]:
import csv

def get_std(a, b):
    a = a.numpy()
    b = b.numpy()
    return np.sqrt(np.var(a - b))

def get_99std(a, b):
    a = a.numpy()
    b = b.numpy()
    return np.percentile(np.abs(a-b), 99)

def get_100std(a, b):
    a = a.numpy()
    b = b.numpy()
    return np.percentile(np.abs(a-b), 100)

def save_list_to_csv(data_list, filename):
    with open(filename, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        for row in data_list:
            csv_writer.writerow([row])

def save_dict_to_csv(data_dict, filename):
    with open(filename, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        for key, value in data_dict.items():
            csv_writer.writerow([key, value])

def test_model(model, device, test_loader):
    model.eval()
    loss = nn.MSELoss()
    var = metrics.StdDeviation()
    test_loss = 0
    # test_var = 0
    mse = 0
    cnt = 0
    with torch.no_grad():
        for data, target in test_loader:
            cnt += 1
            data, target = data.to(device), target.to(device).float()
            output = model(data)
            test_loss += loss(output, target).item()
            test_var = var(output.cpu(), target.cpu())# sum up batch loss

    test_loss /= cnt

    return test_loss, test_var

def erm_train(model, device, train_loader, optimizer, epoch):
    model.to(device)
    model.train()
    mseloss = nn.MSELoss()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device).float()
        optimizer.zero_grad()
        output = model(data)
        loss = 2 * mseloss(output, target)
        loss.backward()
        optimizer.step()

    return loss.item()






In [4]:
import random
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data import ConcatDataset, DataLoader
from random import sample
from threading import Thread
import time
import sys
import pickle


def get_scenario_agnostic_dataloaders(task_index, given_sequence):
    '''
    Returns the current and replay dataloaders irrespective of the scenario, 
    allowing for a sequence that switches between different scenarios.

    :param task_index: The index of the current task in the sequence
    :param given_sequence: A list of tuples, each containing ('Scenario', task_index)
    '''
    # Mapping scenario names to dataloaders
    scenario_to_dataloader_train = {
        'Channel': Channel_train_loader,
        'Time': Time_train_loader,
        'Drop': Drop_train_loader
    }
    scenario_to_dataloader_test = {
        'Channel': Channel_test_loader,
        'Time': Time_test_loader,
        'Drop': Drop_test_loader
    }
    
    current_scenario, current_task = given_sequence[task_index]
    current_train_dataloader = scenario_to_dataloader_train[current_scenario][current_task]
    current_test_dataloader = scenario_to_dataloader_train[current_scenario][current_task]
    
    # Collecting past tasks from the given sequence up to the current task index for replay
    past_tasks = given_sequence[:task_index]
    replay_train_dataloaders = [scenario_to_dataloader_train[scenario][task] for scenario, task in past_tasks]
    replay_test_dataloaders = [scenario_to_dataloader_test[scenario][task] for scenario, task in past_tasks]
    
    return current_train_dataloader, current_test_dataloader, replay_train_dataloaders, replay_test_dataloaders

def generate_random_sequence(num_tasks, scenarios=['Channel', 'Time', 'Drop']):
    '''
    Generates a random sequence of tasks and scenarios.

    :param num_tasks: The total number of tasks in the sequence
    :param scenarios: The list of possible scenarios
    '''
    if num_tasks == 500:
        _Channel_train_loader = Channel_train_loader[:2]
        _Time_train_loader = Time_train_loader[:2]
    elif num_tasks == 1000:
        _Channel_train_loader = Channel_train_loader[:4]
        _Time_train_loader = Time_train_loader[:4]
    else:
        _Channel_train_loader = Channel_train_loader
        _Time_train_loader = Time_train_loader
    scenario_to_dataloader = {
        'Channel': _Channel_train_loader,
        'Time': _Time_train_loader,
        'Drop': Drop_train_loader
    }
    sequence = []
    for _ in range(num_tasks):
        scenario = random.choice(scenarios)
        task_index = random.randint(0, len(scenario_to_dataloader[scenario]) - 1)
        sequence.append((scenario, task_index))
    return sequence

def save_model(model, path='/home/wangqihang/MyContinualLearning/store/models/base_model.pth'):
    torch.save(model.state_dict(), path)

def load_model(model, path='/home/wangqihang/MyContinualLearning/store/models/base_model.pth'):
    model.load_state_dict(torch.load(path))
    return model

def optimizer_to(optim, device):
    for param in optim.state.values():
        # Not sure there are any global tensors in the state dict
        if isinstance(param, torch.Tensor):
            param.data = param.data.to(device)
            if param._grad is not None:
                param._grad.data = param._grad.data.to(device)
        elif isinstance(param, dict):
            for subparam in param.values():
                if isinstance(subparam, torch.Tensor):
                    subparam.data = subparam.data.to(device)
                    if subparam._grad is not None:
                        subparam._grad.data = subparam._grad.data.to(device)

# Placeholder function for OOD detection based on test loss
def ood_detection(model, loss_hist, test_loader, loss_function, threshold=0.5):
    '''
    Detects if the model is Out-Of-Distribution (OOD) based on the test loss.
    :param model: Trained ML model
    :param test_loader: DataLoader for the test set
    :param loss_function: Loss function used in training
    :param threshold: Loss threshold for OOD detection
    :return: Boolean indicating if retraining is needed
    '''
    model.to('cpu')
    model.eval()
    total_loss = 0.0
    for data, target in test_loader:
        output = model(data)
        loss = loss_function(output, target)
        total_loss += loss.item()
    
    avg_loss = total_loss / len(test_loader)
    loss_hist.append(avg_loss)
    return avg_loss > threshold

def train_without_replay_without_retrain(model, device, loss_function, optimizer, sequence, retrain_threshold=0.1):
    '''
    Trains the model on a sequence of tasks, with retraining but without replay.
    :param model: The ML model to be trained
    :param loss_function: Loss function used in training
    :param optimizer: Optimizer used in training
    :param sequence: Sequence of tasks (scenario, task index) to train on
    :param retrain_threshold: Loss threshold for triggering retraining
    '''
    retrain_count = 0
    loss_hist = []
    for task_index in range(len(sequence)):
        # Double each task in the sequence for OOD detection and then retraining/test
        current_train_loader, current_test_loader, replay_train_loaders, replay_test_loaders = get_scenario_agnostic_dataloaders(task_index, sequence)
        
        # OOD Detection
        if ood_detection(model, loss_hist, current_test_loader, loss_function, retrain_threshold):
            retrain_count += 1
            # Retraining
            
    return loss_hist, retrain_count

def train_without_replay_with_retrain(model, device, loss_function, optimizer, sequence, retrain_threshold=0.1):
    '''
    Trains the model on a sequence of tasks, with retraining but without replay.
    :param model: The ML model to be trained
    :param loss_function: Loss function used in training
    :param optimizer: Optimizer used in training
    :param sequence: Sequence of tasks (scenario, task index) to train on
    :param retrain_threshold: Loss threshold for triggering retraining
    '''
    
    retrain_count = 0
    loss_hist = []
    times = []
    for task_index in range(len(sequence)):
        # Double each task in the sequence for OOD detection and then retraining/test
        current_train_loader, current_test_loader, replay_train_loaders, replay_test_loaders = get_scenario_agnostic_dataloaders(task_index, sequence)
        start_time = time.time()
        # OOD Detection
        if ood_detection(model, loss_hist, current_test_loader, loss_function, retrain_threshold):
            retrain_count += 1
            # Retraining
            model.to(device)
            model.train()
            for data, target in current_train_loader:
                data, target = data.to(device), target.to(device)
                optimizer.zero_grad()
                output = model(data)
                loss = loss_function(output, target)
                loss.backward()
                optimizer.step()
        end_time = time.time()
        execution_time = end_time - start_time
        times.append(execution_time)
    return loss_hist, retrain_count, times


def train_with_replay_with_retrain(model, device, loss_function, optimizer, sequence, retrain_threshold=0.1):
    '''
    Trains the model on a sequence of tasks, with both retraining and replay.
    :param model: The ML model to be trained
    :param loss_function: Loss function used in training
    :param optimizer: Optimizer used in training
    :param sequence: Sequence of tasks (scenario, task index) to train on
    :param retrain_threshold: Loss threshold for triggering retraining
    '''
    retrain_count = 0
    replay_memory = []  # To store past training data for replay
    loss_hist = []
    times = []
    mem = []
    
    for task_index in range(len(sequence)):
        # Double each task in the sequence for OOD detection and then retraining/test
        current_train_loader, current_test_loader, replay_train_loaders, replay_test_loaders = get_scenario_agnostic_dataloaders(task_index, sequence)
        # print(f"task {task_index} on going")
        # OOD Detection
        start_time = time.time()
        if ood_detection(model, loss_hist, current_test_loader, loss_function, retrain_threshold):
            retrain_count += 1
            
            # Create a concatenated dataset of current task and replay memory
            concat_dataset = ConcatDataset([current_train_loader.dataset] + replay_memory)
            concat_loader = DataLoader(concat_dataset, batch_size=len(concat_dataset), shuffle=True)  # Adjust batch size as needed
            # Retraining
            model.to(device)
            model.train()
            for data, target in concat_loader:
                data, target = data.to(device), target.to(device)
                optimizer.zero_grad()
                output = model(data)
                loss = loss_function(output, target)
                loss.backward()
                optimizer.step()

            # Update Replay Memory
            sample_data = torch.utils.data.Subset(current_train_loader.dataset, [i for i in range(len(current_train_loader.dataset) // 3)])
            replay_memory.append(sample_data)

        end_time = time.time()
        execution_time = end_time - start_time
        times.append(execution_time)

        tempfile = open('tempfile', 'wb')
        pickle.dump(replay_memory, tempfile)
        tempfile.close()
        mem.append(os.path.getsize('tempfile')/1000000)
    
    return loss_hist, retrain_count, times, mem


class Encoder(nn.Module):
    def __init__(self, intermidiate_dim, latent_dim):
        super(Encoder, self).__init__()
        # Build encoder model
        self.drop = nn.Dropout(0.1)
        self.dense1 = nn.Linear(44, 128)
        self.act1 = nn.Tanh()
        self.dense2 = nn.Linear(128, intermidiate_dim)
        self.act2 = nn.Tanh()

        self.model = nn.Sequential(self.dense1, self.act1,
                                   self.dense2, self.act2)
        
        # Posterior on Y; probabilistic regressor
        self.dense_mu_y = nn.Linear(intermidiate_dim, 1)
        self.dense_logvar_y = nn.Linear(intermidiate_dim, 1)

        # q(z|x)
        self.dense_mu_z = nn.Linear(intermidiate_dim, latent_dim)
        self.dense_logvar_z = nn.Linear(intermidiate_dim, latent_dim)

        # latent generator
        self.dense_gen_z = nn.Linear(1, latent_dim)

    def sampling(self, mu, log_var):
        # Reparameterize
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        z = mu + eps * std
        return z
    
    def forward(self, x):
        out = self.model(x)
        # z
        mu_z = self.dense_mu_z(out)
        logvar_z = self.dense_logvar_z(out)
        z = self.sampling(mu_z, logvar_z)
        # y
        mu_y = self.dense_mu_y(out)
        logvar_y = self.dense_logvar_y(out)
        y = self.sampling(mu_y, logvar_y)

        # y conditional z
        z_bar_y = self.dense_gen_z(y)

        return [mu_z, logvar_z, z], [mu_y, logvar_y, y], z_bar_y

class Decoder(nn.Module):
    def __init__(self, intermidiate_dim, latent_dim):
        super(Decoder, self).__init__()

        self.dense1 = nn.Linear(latent_dim, intermidiate_dim)
        self.act1 = nn.Tanh()
        self.dense2 = nn.Linear(intermidiate_dim, 128)
        self.act2 = nn.Tanh()
        self.dense3 = nn.Linear(128, 44)

        self.model = nn.Sequential(self.dense1, self.act1,
                                   self.dense2, self.act2,
                                   self.dense3)
    def forward(self, x):
        return self.model(x)


class EnhancedGenerativeModel:
    def __init__(self, intermidiate_dim=32, latent_dim=8, lr=0.0001, batch_size=32):
        self.encoder = Encoder(intermidiate_dim, latent_dim).cuda(device='cuda:1')
        self.decoder = Decoder(intermidiate_dim, latent_dim).cuda(device='cuda:1')
        self.optimizer = optim.Adam(list(self.encoder.parameters()) + list(self.decoder.parameters()), lr=lr)
        self.batch_size = batch_size

    def sampling(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        z = mu + eps * std
        return z
    
    def update(self, current_train_loader):
        self.encoder.train()
        self.decoder.train()
        
        # Generate synthetic data
        num_samples = len(current_train_loader.dataset)
        synthetic_data = self.generate_data(num_samples)
        synthetic_labels = torch.zeros((num_samples, 1))

        # Create a DataLoader for the synthetic data
        synthetic_dataset = TensorDataset(synthetic_data, synthetic_labels)
        synthetic_loader = DataLoader(synthetic_dataset, batch_size=len(synthetic_dataset), shuffle=True)

        # Concatenate the feature and label in the row direction from current_train_loader
        concatenated_data_list = []
        for data, target in current_train_loader:
            concatenated_data = torch.cat((data, target), dim=1)  # Assuming target is column vector
            if len(data) != 0 and len(target) != 0:
                concatenated_data_list.append(concatenated_data)
        concatenated_data_tensor = torch.vstack(concatenated_data_list)
        
        # Create labels for the concatenated data (these can be zero or whatever is appropriate)
        concatenated_labels = torch.zeros(concatenated_data_tensor.shape[0], 1)

        # Create a DataLoader for the concatenated data
        concatenated_dataset = TensorDataset(concatenated_data_tensor, concatenated_labels)
        concatenated_loader = DataLoader(concatenated_dataset, batch_size=len(concatenated_dataset), shuffle=True)

        # Combine synthetic and real (concatenated) data
        combined_dataset = ConcatDataset([concatenated_dataset, synthetic_dataset])
        combined_loader = DataLoader(combined_dataset, batch_size=len(combined_dataset), shuffle=True)

        for ep in range(10):
            for batch_data, batch_target in combined_loader:
                batch_data, batch_target = batch_data.cuda(device='cuda:1'), batch_target.cuda(device='cuda:1')
                
                self.optimizer.zero_grad()

                # Forward pass through encoder
                [mu_z, logvar_z, z], [mu_y, logvar_y, y], z_bar_y = self.encoder(batch_data)

                # Decode the latent variable
                x_hat = self.decoder(z)

                # Loss computation (Reconstruction + KL divergence)
                reconstruction_loss = F.mse_loss(x_hat, batch_data, reduction='sum')
                kl_loss = -0.5 * torch.sum(1 + logvar_z - mu_z.pow(2) - logvar_z.exp())
                loss = reconstruction_loss + kl_loss

                loss.backward()
                self.optimizer.step()



    def generate_data(self, num_samples):
        self.encoder.eval()
        self.decoder.eval()

        # Generate latent variables (you might need to adjust dimensions)
        z = torch.randn(num_samples, 8).cuda(device='cuda:1')

        # Generate data using the decoder
        with torch.no_grad():
            generated_data = self.decoder(z)
        
        # Move generated data to CPU
        generated_data = generated_data.cpu()
        
        return generated_data

    def save_model(self, path):
        torch.save({
            'encoder': self.encoder.state_dict(),
            'decoder': self.decoder.state_dict(),
            'optimizer': self.optimizer.state_dict()
        }, path)

    def load_model(self, path):
        checkpoint = torch.load(path)
        self.encoder.load_state_dict(checkpoint['encoder'])
        self.decoder.load_state_dict(checkpoint['decoder'])
        self.optimizer.load_state_dict(checkpoint['optimizer'])

def unpack_generated_data(generated_data):
    '''
    Unpacks the generated synthetic data into features and labels.
    
    :param generated_data: Tensor containing the generated synthetic data where features and labels are concatenated.
    
    :returns: A tuple (features, labels)
    '''
    # Assuming the generated data is of shape [num_samples, feature_dim + label_dim]
    # and that the label is a single column at the end of the data tensor
    features = generated_data[:, :-1]  # All columns except the last one
    labels = generated_data[:, -1].unsqueeze(-1)  # The last column
    
    return features, labels

# Placeholder function for OOD detection based on test loss
def ood_detection(model, loss_hist, test_loader, loss_function, threshold=0.2):
    '''
    Detects if the model is Out-Of-Distribution (OOD) based on the test loss.
    :param model: Trained ML model
    :param test_loader: DataLoader for the test set
    :param loss_function: Loss function used in training
    :param threshold: Loss threshold for OOD detection
    :return: Boolean indicating if retraining is needed
    '''
    model.to('cpu')
    model.eval()
    total_loss = 0.0
    for data, target in test_loader:
        output = model(data)
        loss = loss_function(output, target)
        total_loss += loss.item()
    
    avg_loss = total_loss / len(test_loader)
    loss_hist.append(avg_loss)
    return avg_loss > threshold


def train_with_gen_with_retrain_multithreaded(model, device, generative_model, loss_function, optimizer, sequence, retrain_threshold=0.1):
    '''
    Trains the model on a sequence of tasks, with both retraining and data generated from a generative model.
    Uses multi-threading to train the generative model and main model in parallel.
    :param model: The ML model to be trained
    :param generative_model: The generative model for simulating past data
    :param loss_function: Loss function used in training
    :param optimizer: Optimizer used in training
    :param sequence: Sequence of tasks (scenario, task index) to train on
    :param retrain_threshold: Loss threshold for triggering retraining
    '''
    retrain_count = 0
    loss_hist = []
    times = []
    mem = []
    
    def train_generative_model(train_loader):
        generative_model.update(train_loader)
    
    gen_thread = None  # Initialize generative model thread as None

    for task_index in range(len(sequence)):
        current_train_loader, current_test_loader, _, _ = get_scenario_agnostic_dataloaders(task_index, sequence)
        start_time = time.time()
        if ood_detection(model, loss_hist, current_test_loader, loss_function, retrain_threshold):
            retrain_count += 1

            # If a previous generative model thread is running, wait for it to finish
            if gen_thread is not None and gen_thread.is_alive():
                gen_thread.join()

            # Generate data using the generative model
            gen_data, gen_target = unpack_generated_data(generative_model.generate_data(len(current_train_loader.dataset)))
            gen_dataset = TensorDataset(gen_data, gen_target)
            # gen_loader = DataLoader(gen_dataset, batch_size=32, shuffle=True)
            
            # Create a concatenated dataset of current task and generated data
            concat_dataset = ConcatDataset([current_train_loader.dataset, gen_dataset])
            # concat_dataset = ConcatDataset([current_train_loader.dataset])
            concat_loader = DataLoader(concat_dataset, batch_size=len(concat_dataset), shuffle=True)
            
            # Retraining the main model
            model.to(device)
            model.train()
            
            for data, target in concat_loader:
                data, target = data.to(device), target.to(device)
                optimizer.zero_grad()
                output = model(data)
                loss = loss_function(output, target)
                loss.backward()
                optimizer.step()

            # Start a new thread for training the generative model
            gen_thread = Thread(target=train_generative_model, args=(current_train_loader,))
            gen_thread.start()

        end_time = time.time()
        execution_time = end_time - start_time
        times.append(execution_time)
        
        tempfile = open('tempfile', 'wb')
        pickle.dump(generative_model, tempfile)
        tempfile.close()
        mem.append(os.path.getsize('tempfile')/1000000)

    return loss_hist, retrain_count, times, mem

def calculate_eta(T_m, S_m, epsilon_m, alpha=1.0, beta=1.0, gamma=1.0):
    """
    Calculate the efficiency metric eta for different retraining strategies in optical networks.
    
    Parameters:
    - T0 (float): Initial training time
    - S0 (float): Initial required storage space
    - T_m (list of float): List of retraining times for each iteration
    - S_m (list of float): List of required storage spaces for each iteration
    - epsilon_m (list of float): List of errors for each iteration
    - N (int): Total number of retrainings counted once after the entire sequence has been traversed
    - alpha (float, optional): Control parameter for the trade-off between model performance and resource usage
    - beta (float, optional): Control parameter for the impact of the total number of retrainings

    Returns:
    - eta (float): The efficiency metric
    """
    
    M = len(T_m)  # Total number of retraining iterations
    
    numerator = np.sum(np.exp(- (alpha * np.array(epsilon_m) + beta * np.array(T_m) + gamma * np.array(S_m))))
    denominator = M
    
    eta = numerator / denominator
    
    return eta




In [5]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

layer=8
hidden_num=[32, 64, 64, 64, 64, 32, 16]
# hidden_num = [32, 64, 16, 16, 16, 16, 32]
dropout=[0.01, 0.1, 0.2, 0.1, 0.1, 0.01, 0.01]
model = nn_utils.NeuralNetwork(43, 1, layer, hidden_num, dropout).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0)

model = load_model(model)

# Example usage
generative_model = EnhancedGenerativeModel()
generative_model.update(Train_loader)


In [6]:
def train_once(model, sequence, alpha=1, beta=0.1, gamma=0.1):
    seq_len = len(sequence)
    loss_function = nn.MSELoss()
    device = torch.device("cuda" if use_cuda else "cpu")
    
    # SI
    model = load_model(model)
    SI_optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
    SI_loss, _ = train_without_replay_without_retrain(model, device, loss_function, SI_optimizer, sequence)
    SI_eta = calculate_eta([0 for i in range(seq_len)], [0 for i in range(seq_len)], [0 if i<0.2 else i for i in SI_loss], alpha, beta, gamma)
    SI_MSE = np.mean(SI_loss)

    # AR
    model = load_model(model)
    AR_optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
    AR_loss, AR_retrain, AR_time = train_without_replay_with_retrain(model, device, loss_function, AR_optimizer, sequence)
    AR_eta = calculate_eta(AR_time, [0 for i in range(seq_len)], [0 if i<0.2 else i for i in AR_loss], alpha, beta, gamma)
    AR_MSE = np.mean(AR_loss)

    # ARER
    model = load_model(model)
    ARER_optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
    ARER_loss, ARER_retrain, ARER_time, ARER_mem = train_with_replay_with_retrain(model, device, loss_function, ARER_optimizer, sequence)
    ARER_eta = calculate_eta(ARER_time, ARER_mem, [0 if i<0.2 else i for i in ARER_loss], alpha, beta, gamma)
    ARER_MSE = np.mean(ARER_loss)

    # ARER_DGM
    model = load_model(model)
    ARER_DGM_optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
    ARER_DGM_loss, ARER_DGM_retrain, ARER_DGM_time, ARER_DGM_mem = train_with_gen_with_retrain_multithreaded(model, device, generative_model, loss_function, ARER_DGM_optimizer, sequence)
    ARER_DGM_eta = calculate_eta(ARER_DGM_time, ARER_DGM_mem, [0 if i<0.2 else i for i in ARER_DGM_loss], alpha, beta, gamma)
    ARER_DGM_MSE = np.mean(ARER_DGM_loss)

    Results = {}
    Results['Len'] = seq_len
    Results['SI'] = {}
    Results['SI']['eta'] = SI_eta
    Results['SI']['loss'] = SI_MSE
    Results['SI']['retrain'] = 0

    Results['AR'] = {}
    Results['AR']['eta'] = AR_eta
    Results['AR']['loss'] = AR_MSE
    Results['AR']['retrain'] = AR_retrain

    Results['ARER'] = {}
    Results['ARER']['eta'] = ARER_eta
    Results['ARER']['loss'] = ARER_MSE
    Results['ARER']['retrain'] = ARER_retrain

    Results['ARER_DGM'] = {}
    Results['ARER_DGM']['eta'] = ARER_DGM_eta
    Results['ARER_DGM']['loss'] = ARER_DGM_MSE
    Results['ARER_DGM']['retrain'] = ARER_DGM_retrain

    return Results

random_sequence_1 = generate_random_sequence(1000, scenarios=['Channel', 'Time'])
results_1 = train_once(model, random_sequence_1, alpha=10, beta=0.01, gamma=0.001)
random_sequence_2 = generate_random_sequence(500, scenarios=['Channel', 'Time'])
results_2 = train_once(model, random_sequence_2, alpha=10, beta=0.01, gamma=0.001)
random_sequence_3 = generate_random_sequence(2000, scenarios=['Channel', 'Time'])
results_3 = train_once(model, random_sequence_3, alpha=10, beta=0.01, gamma=0.001)

save_dict_to_csv(results_1, '/home/wangqihang/MyContinualLearning/store/results/results_s1_1000.csv')
save_dict_to_csv(results_2, '/home/wangqihang/MyContinualLearning/store/results/results_s1_500.csv')
save_dict_to_csv(results_3, '/home/wangqihang/MyContinualLearning/store/results/results_s1_2000.csv')

In [7]:
random_sequence_2 = generate_random_sequence(2000, scenarios=['Channel', 'Time'])
seq_len = len(random_sequence_2)
loss_function = nn.MSELoss()
device = torch.device("cuda" if use_cuda else "cpu")
alpha=10
beta=1
gamma=0.01

# SI
model = load_model(model)
SI_optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
SI_loss, _ = train_without_replay_without_retrain(model, device, loss_function, SI_optimizer, random_sequence_2)
SI_eta = calculate_eta([0 for i in range(seq_len)], [0 for i in range(seq_len)], SI_loss, alpha, beta, gamma)
SI_MSE = np.mean(SI_loss)

# AR
model = load_model(model)
AR_optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
AR_loss, AR_retrain, AR_time = train_without_replay_with_retrain(model, device, loss_function, AR_optimizer, random_sequence_2)
AR_eta = calculate_eta(AR_time, [0 for i in range(seq_len)], AR_loss, alpha, beta, gamma)
AR_MSE = np.mean(AR_loss)

# ARER
model = load_model(model)
ARER_optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
ARER_loss, ARER_retrain, ARER_time, ARER_mem = train_with_replay_with_retrain(model, device, loss_function, ARER_optimizer, random_sequence_2)
ARER_eta = calculate_eta(ARER_time, ARER_mem, ARER_loss, alpha, beta, gamma)
ARER_MSE = np.mean(ARER_loss)

# ARER_DGM
model = load_model(model)
ARER_DGM_optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
ARER_DGM_loss, ARER_DGM_retrain, ARER_DGM_time, ARER_DGM_mem = train_with_gen_with_retrain_multithreaded(model, device, generative_model, loss_function, ARER_DGM_optimizer, random_sequence_2)
ARER_DGM_eta = calculate_eta(ARER_DGM_time, ARER_DGM_mem, ARER_DGM_loss, alpha, beta, gamma)
ARER_DGM_MSE = np.mean(ARER_DGM_loss)

In [8]:
alpha=10
beta=0.1
gamma=0.1
SI_eta = calculate_eta([0 for i in range(seq_len)], [0 for i in range(seq_len)], [0 if i<0.2 else i for i in SI_loss], alpha, beta, gamma)
SI_MSE = np.mean(SI_loss)
AR_eta = calculate_eta(AR_time, [0 for i in range(seq_len)], [0 if i<0.2 else i for i in AR_loss], alpha, beta, gamma)
AR_MSE = np.mean(AR_loss)
ARER_eta = calculate_eta(ARER_time, ARER_mem, [0 if i<0.2 else i for i in ARER_loss], alpha, beta, gamma)
ARER_MSE = np.mean(ARER_loss)
ARER_DGM_eta = calculate_eta(ARER_DGM_time, ARER_DGM_mem, [0 if i<0.2 else i for i in ARER_DGM_loss], alpha, beta, gamma)
ARER_DGM_MSE = np.mean(ARER_DGM_loss)

In [9]:
Results = {}
Results['Len'] = seq_len
Results['SI'] = {}
Results['SI']['eta'] = SI_eta
Results['SI']['loss'] = SI_MSE
Results['SI']['retrain'] = 0

Results['AR'] = {}
Results['AR']['eta'] = AR_eta
Results['AR']['loss'] = AR_MSE
Results['AR']['retrain'] = AR_retrain

Results['ARER'] = {}
Results['ARER']['eta'] = ARER_eta
Results['ARER']['loss'] = ARER_MSE
Results['ARER']['retrain'] = ARER_retrain

Results['ARER_DGM'] = {}
Results['ARER_DGM']['eta'] = ARER_DGM_eta
Results['ARER_DGM']['loss'] = ARER_DGM_MSE
Results['ARER_DGM']['retrain'] = ARER_DGM_retrain

save_dict_to_csv(Results, '/home/wangqihang/MyContinualLearning/store/results/results_101001_1000.csv')
save_dict_to_csv(Results, '/home/wangqihang/MyContinualLearning/store/results/results_101001_500.csv')
save_dict_to_csv(results_3, '/home/wangqihang/MyContinualLearning/store/results/results_101001_2000.csv')

In [10]:
ARER_eta

0.7345302021404506

In [11]:
SI_eta

0.6167477456804493

In [12]:
AR_eta

0.7547625041316729