In [None]:
# If U are using SageMaker Prepare for the dataset
!pip install awscli
!aws s3 cp s3://handata/ref_youtube_audio/ ref_youtube_audio/ --recursive

In [None]:
!pip install transformers
!pip install -U openai-whisper
!pip install librosa

In [1]:
from transformers import AutoFeatureExtractor, WhisperForAudioClassification
import torch
import torch.nn as nn
import whisper
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import whisper
import pandas as pd
from categories import ytvos_category_dict
import numpy as np
from util import read_aws_json,read_aws_wav,read_local_json,read_local_wav
import logging
from torch import optim
from losses import get_loss_func
from utils.evaluate import Evaluator
from util import infoNCE_loss
import random
from tqdm.notebook import tqdm
from enum import Enum
from sklearn.metrics import f1_score,precision_recall_curve,precision_score,recall_score,accuracy_score,balanced_accuracy_score
from collections import Counter
from audiomentations import Compose, Gain, AddGaussianNoise, PitchShift,TimeStretch,Shift
import torch.nn.functional as F
from torch.optim.lr_scheduler import ExponentialLR
SageMaker = False
Local = True
# ROOT = 'C:/Users/Administrator/Desktop/CLUL-main/data/'
# SAVEDIR = 'C:/Users/Administrator/Desktop/CLUL-main/run/'
ROOT = '/root/CLUL/data/'
SAVEDIR = '/root/CLUL/run/'


In [2]:
class Audio_Encoder(nn.Module):
    def __init__(self, feature_extractor, model, num_class=66,dropout_prob=0.2,pool_num = 100,bias = True):
        super().__init__()
        self.num_class = num_class
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.feature_extractor = feature_extractor
        self.encoder = model.encoder
        for name, param in self.encoder.named_parameters():
          param.requires_grad = False
        self.projector = nn.Linear(in_features=768, out_features=256, bias=True)
        self.classifier = nn.Linear(256, num_class)

        self.avg_pool = nn.AvgPool2d(kernel_size=(pool_num,1), stride=(pool_num,1))
        # self.norm_layer = nn.LayerNorm(256, eps=1e-5, bias=True)
        self.batchnorm = nn.BatchNorm1d(2048, affine=False)
        self.dropout = nn.Dropout(p=dropout_prob)
        self.dropout2 = nn.Dropout(0.5)

        self.fc1 = nn.Linear(1500//pool_num * 256, 2048)
        self.fc2 = nn.Linear(2048, 256)
        self.fc3 = nn.Linear(256, num_class)

    def forward(self, audios):
        input_features = []
        for audio in audios:

            feature = self.feature_extractor(audio.cpu(),sampling_rate=16000,return_tensors="pt").input_features
            input_features.append(feature)

        input_features = torch.cat(input_features, dim=0).to(self.device)
        hidden_states = self.encoder(input_features)
        # hidden_states = self.projector(hidden_states)
        # pooled_output = hidden_states.mean(dim=1)
        # logits = self.classifier(pooled_output)

        x = self.avg_pool(hidden_states)

        x = self.projector(x)
        # x = self.positionencoding(x)
        feature = x.reshape(x.shape[0], -1)

        x = self.dropout(feature)

        x = self.fc1(x)
        # x = self.batchnorm(x)
        x = self.dropout(x)
        x = self.fc2(x)

        x = self.dropout(x)
        x = self.fc3(x)

        output_dict = {
            'clipwise_output': x,
            'feature': feature,
            'embedding': hidden_states}

        return output_dict

class ytvos_Dataset(Dataset):
    def __init__(self, data_frame: pd.DataFrame, sr=44100, num_class=66):
        self.data_frame = data_frame
        self.sr = sr
        self.num_class = num_class
        self.data_root = '/home/user/SED_Adaptation_Classifier-main/data/ref_youtube_audio/audio'

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, index):
        if torch.is_tensor(index):
            index = index.tolist()
        audio_name = self.data_frame.iloc[index]["video"]
        audio_id = self.data_frame.iloc[index]["audio"]
        audio_path = 'ref_youtube_audio/audio' + '/' + audio_name + '/' + audio_id + '.wav'
        name = audio_name + self.data_frame.iloc[index]["exp"]

        
        waveform = read_local_wav(ROOT + audio_path)
#         waveform = whisper.load_audio(audio_path,sr = 16000)

        tag = self.data_frame.iloc[index]["category"]
        target = ytvos_category_dict[self.data_frame.iloc[index]["category"]]
        target = np.eye(self.num_class)[target]
        data_dict = {'audio_name': name, 'waveform': waveform, 'target': target, 'tag': tag}

        return data_dict

def get_datalist(cur_iter):
        task_id = cur_iter
        task_train_metas = []
        task_test_metas = []

       
        metas = read_local_json(ROOT + 'task_split_1/metas.json')['metas']
        tasks = read_local_json(ROOT + 'task_split_1/task{}.json'.format(task_id))[str(task_id)]

        for category,task_metas_dict in tasks.items():
            train_ids = task_metas_dict['train']
            test_ids = task_metas_dict['test']
            for train_id in train_ids:
                task_train_metas.append(metas[train_id])
            for test_id in test_ids:
                task_test_metas.append(metas[test_id])

        return task_train_metas,task_test_metas
    
def default_collate_fn(batch):
    audio_name = [data['audio_name'] for data in batch]
    waveform = [torch.from_numpy(data['waveform']) for data in batch]
    target = [data['target'] for data in batch]

    # waveform = torch.FloatTensor(waveform)
    # waveform = pad_sequence(waveform, batch_first=True, padding_value=0)
    target = torch.FloatTensor(target)

    return {'audio_name': audio_name, 'waveform': waveform, 'target': target}

def get_dataloader(data_frame, dataset,split, batch_size, num_workers=0):
    assert dataset == "ref_youtube_audio"
    dataset = ytvos_Dataset(data_frame=data_frame)
    return DataLoader(dataset=dataset, batch_size=batch_size,
                      shuffle=(split == 'train'), drop_last=False,
                      num_workers=num_workers, collate_fn=default_collate_fn)

def get_train_test_dataloader(batch_size, n_worker, train_list, test_list):
    train_loader = get_dataloader(pd.DataFrame(train_list), 'ref_youtube_audio','train', batch_size=batch_size, 
                                  num_workers=n_worker)
    test_loader = get_dataloader(pd.DataFrame(test_list), 'ref_youtube_audio','test', batch_size=batch_size, 
                                 num_workers=n_worker)
    return train_loader, test_loader


In [64]:
class CLUL:
    def __init__(self,batch_size = 32,lr = 1e-3,memory_size = 500,
                 forget_size = 100,epoch=5,loss ='focal_loss',
                 total_class_num = 65,mode = 'All_Learn_then_Forget',
                 patience = 5,n_worker = 0,
                 **kwargs):
        feature_extractor = AutoFeatureExtractor.from_pretrained("/root/CLUL/whisper")
        whisper_model = whisper.load_model("small")
        
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.model = Audio_Encoder(feature_extractor, whisper_model).to(self.device)

        self.batch_size = batch_size
        self.lr = lr
        self.epoch = epoch
        self.logger = logging.getLogger()
        self.forget_list = []
        self.memory_list = []
        self.memory_size = memory_size
        self.forget_size = forget_size
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr, betas=(0.9, 0.999))
        self.scheduler = ExponentialLR(self.optimizer, gamma=0.9)
        self.criterion = get_loss_func(loss)
        self.num_pretrain_class = 0
        self.evaluator = Evaluator(self.model, self.num_pretrain_class, self.device)
        
        self.mode = mode
        self.patience = patience
        self.counter = 0
        self.total_class_num = total_class_num
        self.forget_label = total_class_num
        self.n_worker = n_worker
        self.cltask = {
            'task0':[15, 17, 60, 50, 32, 24, 63, 36, 31, 40, 52, 4, 25],
            "task1":[48, 54, 35, 62, 13, 42, 37, 49, 51, 45, 44, 14, 5],
            "task2":[46, 18, 57, 28, 11, 30, 61, 27, 22, 2, 29, 0, 19],
            "task3":[3, 59, 10, 12, 8, 1, 26, 23, 34, 58, 64, 56, 41],
            "task4":[47, 20, 53, 39, 9, 21, 16, 38, 33, 43, 6, 7, 55]
        }
        self.ultask = {
            "ul_task0":[],
            "ul_task1":[15,17,60],
            "ul_task2": [48, 54, 35],
            "ul_task3":[46, 18, 57],
            "ul_task4" : [3, 59, 10],
            "ul_task5" : [47, 20, 53]
        }
           
    def evaluate(self,model_path,cur_iter):
        self.change_model(model_path)
        train_list,test_list = self.get_train_test_datalist(cur_iter)
        _, test_loader = get_train_test_dataloader(self.batch_size, self.n_worker, train_list, test_list)
        y_true,y_pred = self.evaluator.evaluate(test_loader)
        
        cl_class_label,ul_class_label = self.get_cl_ul_class_label(cur_iter)
        # statistics = self.calculate_metrics(y_true,y_pred,cl_class_label,ul_class_label)
        # print(y_true,y_pred,cl_class_label,ul_class_label)
        # return statistics
        return y_true,y_pred,cl_class_label,ul_class_label

    def get_train_test_datalist(self,cur_iter):
        train_list,test_list = get_datalist(cur_iter)
        return train_list,test_list
        
    def change_model(self, path):
        checkpoint_dict = torch.load(path)
        with torch.no_grad():
            for name, param in self.model.named_parameters():
                if name in checkpoint_dict:
                    param.data.copy_(checkpoint_dict[name])
                
    def equal_class_sampling(self, samples, num_class):
        class_list = [self.cltask["task0"], self.cltask["task1"],self.cltask["task2"],self.cltask["task3"],self.cltask["task4"]]
        cur_class_list = []
        for i in range(num_class//13):
            cur_class_list += class_list[i]
        mem_per_cls = self.memory_size // num_class
        sample_df = pd.DataFrame(samples)

        # Warning: assuming the classes were ordered following task number.
        ret = []
        for y in cur_class_list:
            cls_df = sample_df[(sample_df["category"].map(ytvos_category_dict)) == y]
            ret += cls_df.sample(n=min(mem_per_cls, len(cls_df))).to_dict(
                orient="records"
            )

        num_rest_slots = self.memory_size - len(ret)
        if num_rest_slots > 0:
            self.logger.warning("Fill the unused slots by breaking the equilibrium.")
            ret += (
                sample_df[~sample_df.exp.isin(pd.DataFrame(ret).exp)]
                .sample(n=num_rest_slots)
                .to_dict(orient="records")
            )

        num_dups = pd.DataFrame(ret).exp.duplicated().sum()
        if num_dups > 0:
            self.logger.warning(f"Duplicated samples in memory: {num_dups}")

        return ret

    def get_data(self, infer_loader, augment):
        Z, Z_, predict_list = [], [], []
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(device)
        self.model.eval()
        with torch.no_grad():
            for id, data in enumerate(tqdm(infer_loader)):
                wavs = data['waveform']
                aug_wavs = []
                for wav in wavs:
                    aug_wav = augment(wav.unsqueeze(0).unsqueeze(0), sample_rate=1600)
                    aug_wavs.append(torch.as_tensor(aug_wav.squeeze(0).squeeze(0), dtype=torch.float32))

                output_dict = self.model(data['waveform'])
                aug_output_dict = self.model(aug_wavs)

                Z.extend(output_dict['feature'].cpu())
                Z_.extend(aug_output_dict['feature'].cpu())

                clipwise_output = output_dict['clipwise_output']
                pres = np.argmax(clipwise_output.detach().cpu(), axis=1)
                

                for pre in pres: predict_list.append(pre.item())

            class_label_dic = self.save_indexes(predict_list)
        return Z, Z_, class_label_dic, predict_list
    
    def save_indexes(self,arr):
        index_dict = {}
        for idx, num in enumerate(arr):
            if num in index_dict:
                index_dict[num].append(idx)
            else:
                  index_dict[num] = [idx]
        return index_dict

    def Boundary_Expansion_Forget_Label_Set(self,y_true,ul_class_label):
        index_row = torch.argmax(y_true,dim=1)
        for r ,c in enumerate(index_row):
            if int(c) in ul_class_label:
                with torch.no_grad():
                    y_true[r][c] = torch.tensor(0.0, dtype=torch.float32)
                    y_true[r][-1] = torch.tensor(1.0, dtype=torch.float32)
        return y_true
    
    def Ramdom_Label_Forget_Label_Set(self,y_true,cl_class_label,ul_class_label):
        index_row = torch.argmax(y_true,dim=1)
        for r ,c in enumerate(index_row):
            if int(c) in ul_class_label:
                with torch.no_grad():
                    idx = random.choice(list(set(cl_class_label)-set(ul_class_label)))
                    y_true[r][c] = torch.tensor(0.0, dtype=torch.float32)
                    y_true[r][idx] = torch.tensor(1.0, dtype=torch.float32)
        return y_true
    def class_infoNCE(self, Z, Z_, class_label_dic, predict_list, temperature):
        ## You can change the method to calculate NCEs
        NCEs = []
        # print('This is cclass_label_dic',class_label_dic)
        for id in range(len(predict_list)):
            label = predict_list[id]
            same_label_list = class_label_dic[label]
            class_z = [Z[i] for i in same_label_list if i != id]
            class_z_ = [Z_[i] for i in same_label_list]

            positive_pair = class_z + class_z_

            positive_similarities = F.cosine_similarity(Z[id].unsqueeze(0), torch.stack(positive_pair)) / 2 + 0.5
            # print('This is postitive pair info',Z[id].unsqueeze(0).shape,torch.stack(positive_pair).shape,positive_similarities.shape)
            positive_value = torch.exp(positive_similarities / temperature).sum() / len(positive_pair)
            # print(positive_similarities,positive_value)
            neg_labels = [i for i in list(class_label_dic.keys()) if i != label]

            negative_values = 0
            for neg_label in neg_labels:
                neg_label_list = class_label_dic[neg_label]
                neg_z = [Z[i] for i in neg_label_list]
                neg_z_ = [Z_[i] for i in neg_label_list]
                negative_pair = neg_z + neg_z_
                negative_similarities = F.cosine_similarity(Z[id].unsqueeze(0), torch.stack(negative_pair)) / 2 + 0.5
                # print('This is negative pair info',Z[id].unsqueeze(0).shape,torch.stack(negative_pair).shape,negative_similarities.shape,len(negative_pair))
                negative_value = torch.exp(negative_similarities / temperature).sum() / len(negative_pair)
                # print(negative_similarities,negative_value)
                negative_values += negative_value

            NCE = -torch.log(positive_value / (positive_value + negative_values))
            # print('positive_value',positive_value,'negative values', negative_values,'this is single nce',NCE)
            NCEs.append(NCE)
        print(torch.stack(NCEs).shape)
        return torch.stack(NCEs)
    
    def single_mutual_info_sampling(self,cur_iter, train_list, cl_class_label, ul_class_label):
        ret_mem = []
        val_class_label = list(set(cl_class_label) - set(ul_class_label))
        train_df = pd.DataFrame(train_list)
        train_df['category_id'] = train_df['category'].map(ytvos_category_dict)

        train_df = train_df[train_df['category_id']. isin( val_class_label)]
        assert len(Counter(train_df['category'])) == len(val_class_label)

        inf_loader = get_dataloader(train_df, 'ref_youtube_audio', 'test', self.batch_size, self.n_worker)
        temperature = 0.05


        augment = Compose([
            # Gain(min_gain_in_db=-12.0, max_gain_in_db=12.0),
            # AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.001),
            PitchShift(min_semitones=-0.5, max_semitones=0.5, p=0.5),
            # AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015),
            # TimeShift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
            # Shift(min_shift=-0.5, max_shift=0.5, p=0.5),
            # TimeStretch(min_rate=0.9, max_rate=1.1, p=0.5),
        ])

        #Calculate current infoNCE
        Z, Z_, class_label_dic, predict_list = self.get_data(inf_loader, augment)
        assert (len(Z) == len(Z_) == len(predict_list))
        cur_NCEs = self.class_infoNCE(Z, Z_, class_label_dic, predict_list, temperature)


        #Calculate previous infoNCE

        self.change_model(SAVEDIR + '{}/task{}best_epoch2.pt'.format(self.mode,cur_iter-1))
        pre_Z, pre_Z_, pre_class_label_dic, pre_predict_list = self.get_data(inf_loader, augment)
        assert (len(Z) == len(Z_) == len(predict_list))
        pre_NCEs = self.class_infoNCE(pre_Z, pre_Z_, pre_class_label_dic, pre_predict_list, temperature)
        self.change_model(SAVEDIR + '{}/task{}best_epoch2.pt'.format(self.mode,cur_iter))

        

        NCEs = pre_NCEs - cur_NCEs
        train_df['NCE'] = NCEs

        mem_per_cls = self.memory_size // len(val_class_label)

        for i in val_class_label:
            cls_df = train_df[(train_df["category"].map(ytvos_category_dict)) == i]
            if len(cls_df) <= mem_per_cls:
                ret_mem += cls_df.to_dict(orient="records")
            else:
                jump_idx = len(cls_df) // mem_per_cls
                uncertain_samples = cls_df.sort_values(by="NCE")[::jump_idx]
                ret_mem += uncertain_samples[:mem_per_cls].to_dict(orient="records")

        num_rest_slots = self.memory_size - len(ret_mem)
        if num_rest_slots > 0:
            self.logger.warning("Fill the unused slots by breaking the equilibrium.")
            ret_mem += (
                train_df[~train_df.exp.isin(pd.DataFrame(ret_mem).exp)]
                .sample(n=num_rest_slots)
                .to_dict(orient="records")
            )

        num_dups = pd.DataFrame(ret_mem).exp.duplicated().sum()
        if num_dups > 0:
            self.logger.warning(f"Duplicated samples in memory: {num_dups}")


        class_count = Counter(pd.DataFrame(ret_mem)['category'])
        print('After Unpdate Statistics',class_count)
        
        return ret_mem
      
    def double_mutual_info_sampling(self, candidates, cur, num_class):
        from audiomentations import Compose, Gain, AddGaussianNoise, PitchShift,TimeStretch,Shift
        from collections import Counter
        
        ulclass_list =   [None,self.ultask["task1"],self.ultask["task2"],self.ultask["task3"],self.ultask["task4"]]
        class_list = [self.cltask["task0"], self.cltask["task1"],self.cltask["task2"],self.cltask["task3"],self.cltask["task4"]]
        cl_class_list = []
        ul_class_list = []
        for i in range(num_class // 13):
            cur_class_list |= set(class_list[i])
            cur_class_list -= set(ulclass_list[i])
        cur_class_list.add(self.total_class_num-1)
        # Unlearning Part:class deleted will not be added into the memory bank

        infer_df = pd.DataFrame(candidates)

        class_count = Counter(infer_df['category'])
        print('Before Unpdate Statistics')
        for name, number in class_count.items():
            print(name, number)
        # mem_per_cls = self.memory_size // num_class  # kc: the number of the samples of each class

        batch_size = 8
        temperature = 0.05
        ret = []
        infer_loader = get_dataloader(infer_df, 'ref_youtube_audio', split='test', batch_size=batch_size, num_class=num_class,
                                      num_workers=8)
        augment = Compose([
            # Gain(min_gain_in_db=-12.0, max_gain_in_db=12.0),
            # AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.001),
            PitchShift(min_semitones=-0.5, max_semitones=0.5, p=0.5),
            # AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015),
            # TimeShift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
            # Shift(min_shift=-0.5, max_shift=0.5, p=0.5),
            # TimeStretch(min_rate=0.9, max_rate=1.1, p=0.5),
        ])

        Z, Z_, class_label_dic, predict_list = self.get_data(infer_loader, augment)
        assert (len(Z) == len(Z_) == len(predict_list))

        cur_NCEs = self.class_infoNCE(Z, Z_, class_label_dic, predict_list, temperature)

        path = '/home/user/SED_Adaptation_Classifier-main/workspace/ref_youtube/MIO/iter{}epoch.pt'.format(cur - 1)
        self.change_model(path)

        pre_Z, pre_Z_, pre_class_label_dic, pre_predict_list = self.get_data(infer_loader, augment)
        assert (len(Z) == len(Z_) == len(predict_list))

        pre_NCEs = self.class_infoNCE(pre_Z, pre_Z_, pre_class_label_dic, pre_predict_list, temperature)

        path = '/home/user/SED_Adaptation_Classifier-main/workspace/ref_youtube/MIO/iter{}epoch.pt'.format(cur)
        self.change_model(path)

        # print(len(Z),len(Z_),len(predict_list),len(candidates))

        NCEs = pre_NCEs - cur_NCEs
        for candidate,NCE in zip(candidates,NCEs):candidate['NCE'] = NCE

        sample_df = pd.DataFrame(candidates)
         # kc: the number of the samples of each class in memory bank
        mem_per_cls = self.memory_size // len(cl_class_list)
        
        for_per_cls = self.forget_size// len(ul_class_list)
        


        for i in cur_class_list:
            cls_df = sample_df[(sample_df["category"].map(ytvos_category_dict)) == i]
            if len(cls_df) <= mem_per_cls:
                ret += cls_df.to_dict(orient="records")
            else:
                jump_idx = len(cls_df) // mem_per_cls
                uncertain_samples = cls_df.sort_values(by="NCE")[::jump_idx]
                ret += uncertain_samples[:mem_per_cls].to_dict(orient="records")

        num_rest_slots = self.memory_size - len(ret)
        if num_rest_slots > 0:
            logger.warning("Fill the unused slots by breaking the equilibrium.")
            ret += (
                sample_df[~sample_df.exp.isin(pd.DataFrame(ret).exp)]
                .sample(n=num_rest_slots)
                .to_dict(orient="records")
            )

        num_dups = pd.DataFrame(ret).exp.duplicated().sum()
        if num_dups > 0:
            logger.warning(f"Duplicated samples in memory: {num_dups}")


        # top_indices = np.argpartition(NCEs.cpu().numpy(), -2000)[-2000:]
        #
        # for index in top_indices:
        #     ret.append(candidates[index])

        class_count = Counter(pd.DataFrame(ret)['category'])
        print('After Unpdate Statistics')
        for name, number in class_count.items():
            print(name, number)

        return ret
    
#     def train_with_datalist(self,train_list,test_list):
        
        train_loader, test_loader = get_train_test_dataloader(self.batch_size, self.n_worker, train_list, test_list)
        self.logger.info(f"In-memory samples: {len(self.memory_list)}")
        self.logger.info(f"Train samples: {len(train_list)}")
        self.logger.info(f"Test samples: {len(test_list)}")
        # logger.info(f"Model: {self.model}")
        self.logger.info(f"Optimizer: {self.optimizer}")
        acc_list = []
        best = {'acc': 0, 'epoch': 0,'f1_score':0}

        for epoch in range(self.epoch):
            mean_loss = 0
            for idx,batch_data_dict in enumerate(tqdm(train_loader)):
                batch_data_dict['waveform'] = batch_data_dict['waveform']
                batch_data_dict['target'] = batch_data_dict['target'].to(self.device)

                # Forward
                self.model.train()

                batch_output_dict = self.model(batch_data_dict['waveform'])
                """{'clipwise_output': (batch_size, classes_num), ...}"""
                batch_target_dict = {'target': batch_data_dict['target']}
                """{'target': (batch_size, classes_num)}"""
                # Loss
                
                loss = self.criterion(batch_output_dict, batch_target_dict)
                self.logger.info(f'Batch Training Initial Loss: {loss}')
                if idx % 10 == 0:
                    print(f'Epoch:{epoch},Batch {idx} Loss: {loss}')
                # Backwards
                loss.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()

                loss = loss.item()

                mean_loss += loss
            epoch_loss = mean_loss / len(train_loader)
            self.logger.info(f'Epoch {epoch} | Training Loss: {epoch_loss}')
            print(f'Epoch {epoch} | Training Loss: {epoch_loss}')
            # Evaluate
            test_statistics = self.evaluator.evaluate(test_loader)
            ave_f1_score = np.mean(test_statistics['f1_score'])
            ave_acc = np.mean(test_statistics['accuracy'])
            acc_list.append(ave_acc)
            self.logger.info(f"Epoch {epoch} | Evaluation Accuracy: {ave_acc}|Evaluation f1_score: {ave_f1_score}")
            self.logger.info(f'Current Accuracy: {ave_acc} in epoch {epoch}.|Current f1_score: {ave_f1_score} in epoch {epoch}.')
            print(f"Task {cur_iter} | Epoch {epoch} | Evaluation Accuracy: {ave_acc}|Evaluation f1_score: {ave_f1_score}|Evaluation precision {test_statistics['precision']}")
            

            if ave_f1_score > best['f1_score']:
                best['acc'] = ave_acc
                best['f1_score'] = ave_f1_score
                best['epoch'] = epoch
                self.logger.info(f'Best Accuracy: {ave_acc} in epoch {epoch}.|Best f1_score: {ave_f1_score} in epoch {epoch}.')
                selected_state_dict = {}
                for name, param in self.model.named_parameters():
                    if 'projector' in name or 'classifier' in name or 'fc' in name and ('encoder' not in name):
                        selected_state_dict[name] = param
                torch.save(selected_state_dict,SAVEDIR + '{}/task{}best_epoch{}.pt'.format(self.mode,cur_iter,epoch))
                self.counter = 0
            else:
                self.counter += 1
                self.logger.info(f'EarlyStopping counter: {self.counter} out of {self.patience}.')
                if self.counter >= self.patience:
                    break
        print(f"Task {cur_iter} | Best Epoch {best['epoch']} | Best Evaluation Accuracy: {best['acc']}|Evaluation f1_score: {best['f1_score']}")
        return 
    
    def calculate_metrics(self,y_true,y_pred,cl_class_label,ul_class_label):
        statistics = {'cl_weighted_accuracy':0,'ul_weighted_accuracy':0,'cl_accuracy':0,'ul_accuracy':0}
        cl_y_true,cl_y_pred = [],[]
        ul_y_true,ul_y_pred = [],[]
        for y_t,y_d in zip(y_true,y_pred):
            if y_t in ul_class_label:
                ul_y_true.append(y_t)
                ul_y_pred.append(y_d)
            else:
                cl_y_true.append(y_t)
                cl_y_pred.append(y_d)
        cl_weighted_accuracy = balanced_accuracy_score(cl_y_true,cl_y_pred)
        ul_weighted_accuracy = balanced_accuracy_score(ul_y_true,ul_y_pred)

        cl_accuracy = accuracy_score(cl_y_true,cl_y_pred)
        ul_accuracy = accuracy_score(ul_y_true,ul_y_pred)

        statistics['ul_accuracy'] = ul_accuracy
        statistics['cl_accuracy'] = cl_accuracy

        statistics['cl_weighted_accuracy'] = cl_weighted_accuracy
        statistics['ul_weighted_accuracy'] = ul_weighted_accuracy
        # print(cl_y_true,cl_y_pred)
        # print(ul_y_true,ul_y_pred)
        return statistics

    def get_cl_ul_class_label(self,cur_iter):
        cl_class_label = self.cltask[f'task{cur_iter}']
        ul_class_label = self.ultask[f'ul_task{cur_iter}']
        return cl_class_label,ul_class_label
    
#     def train_with_forget_without_forget_bank(self, cur_iter,mode = 'all_learn_forget'):
        
#         memory_bank = self.memory_list
#         test_list = []
#         for i in range(cur_iter + 1):
#             train_list_,test_data_list_ = get_datalist(i)
#             test_list += test_data_list_
        
#         train_list,_ = get_datalist(cur_iter)
#         train_list += memory_bank

#         train_loader,test_loader = get_train_test_dataloader(self.batch_size, self.n_worker, train_list, test_list)
#         cl_class_label,ul_class_label = [],[]

#         best = {'cl_weighted_accuracy':0,'cl_accuracy':0,'ul_weighted_accuracy':0,'ul_accuracy':0,'epoch':0}
#         for i in range(cur_iter + 1):
#             cl_class_label += self.cltask[f'task{i}']
#             ul_class_label += self.ultask[f'ul_task{i}']
#         print('train loader length',len(train_loader),'test loader length',len(test_loader),'cl class label',cl_class_label,'ul class label',ul_class_label)
#         for epoch in range(self.epoch):
#             mean_loss = 0
#             for idx,batch_data_dict in enumerate(tqdm(train_loader)):
#                 batch_data_dict['waveform'] = batch_data_dict['waveform']
#                 # print(batch_data_dict['target'],ul_class_label)
#                 batch_data_dict['target'] = self.forget_label_set(batch_data_dict['target'],ul_class_label)
#                 batch_data_dict['target'] = batch_data_dict['target'].to(self.device)

#                 # Forward
#                 self.model.train()

#                 batch_output_dict = self.model(batch_data_dict['waveform'])
#                 """{'clipwise_output': (batch_size, classes_num), ...}"""
#                 batch_target_dict = {'target': batch_data_dict['target']}
#                 """{'target': (batch_size, classes_num)}"""
#                 # Loss
                
#                 loss = self.criterion(batch_output_dict, batch_target_dict)
#                 self.logger.info(f'Batch Training Initial Loss: {loss}')
#                 if idx % 10 == 0:
#                     print(f'Epoch:{epoch},Batch {idx} Loss: {loss}')
#                 # Backwards
#                 loss.backward()
#                 self.optimizer.step()
#                 self.optimizer.zero_grad()

#                 loss = loss.item()

#                 mean_loss += loss
#             epoch_loss = mean_loss / len(train_loader)
            
#             print(f'Epoch {epoch} | Training Loss: {epoch_loss}')
#             # Evaluate
#             y_true,y_pred = self.evaluator.evaluate(test_loader)

#             statistics = self.calculate_metrics(y_true,y_pred,cl_class_label,ul_class_label)

#             print(f"Task {cur_iter} |  Epoch {epoch} | statistics {statistics}")
#             if  statistics['cl_weighted_accuracy'] > best['cl_weighted_accuracy']:
#                 best['cl_weighted_accuracy'] = statistics['cl_weighted_accuracy']
#                 best['cl_accuracy'] = statistics['cl_accuracy']
#                 best['epoch'] = epoch
#                 # self.logger.info(f'Best Accuracy: {accuracy} in epoch {epoch}.|Best weighted_accuracy: {weighted_accuracy} in epoch {epoch}.')
#                 selected_state_dict = {}
#                 for name, param in self.model.named_parameters():
#                     if 'projector' in name or 'classifier' in name or 'fc' in name and ('encoder' not in name):
#                         selected_state_dict[name] = param
#                 torch.save(selected_state_dict,SAVEDIR + '{}/task{}best_epoch{}.pt'.format(self.mode,cur_iter,epoch))
#                 self.counter = 0
#             else:
#                 self.counter += 1
#                 self.logger.info(f'EarlyStopping counter: {self.counter} out of {self.patience}.')
#                 if self.counter >= self.patience:
#                     break
#         print(f"Task {cur_iter} | Best Epoch {best['epoch']} | Best Accuracy: {best['cl_accuracy']}|Best weighted_accuracy: {best['cl_weighted_accuracy']}")
#         return train_list,test_list,cl_class_label,ul_class_label
    
    def train(self,mode,train_list,test_list,cl_class_label,ul_class_label,cur_iter):
        
        # method = 'Boundary_Expansion'
        method = 'Random_Label'
        train_loader,test_loader = get_train_test_dataloader(self.batch_size, self.n_worker, train_list, test_list)
        best = {'epoch':0,'cl_weighted_accuracy':0,'ul_weighted_accuracy':0,'cl_accuracy':0,'ul_accuracy':0}
        print('train loader length',len(train_loader),'test loader length',len(test_loader),'cl class label',cl_class_label,'ul class label',ul_class_label)
    
        for epoch in range(self.epoch):
            mean_loss = 0
            for idx,batch_data_dict in enumerate(tqdm(train_loader)):
                batch_data_dict['waveform'] = batch_data_dict['waveform']
                # print(batch_data_dict['target'],ul_class_label)
                if method == 'Boundary_Expansion':
                    batch_data_dict['target'] = self.Boundary_Expansion_Forget_Label_Set(batch_data_dict['target'],ul_class_label)
                if method == 'Random_Label':
                    batch_data_dict['target'] = self.Ramdom_Label_Forget_Label_Set(batch_data_dict['target'],cl_class_label,ul_class_label)
                    
                batch_data_dict['target'] = batch_data_dict['target'].to(self.device)
                
                # print("=============This is input data==============")
                # print(batch_data_dict['target'])
                # Forward
                self.model.train()

                batch_output_dict = self.model(batch_data_dict['waveform'])
                """{'clipwise_output': (batch_size, classes_num), ...}"""
                batch_target_dict = {'target': batch_data_dict['target']}
                """{'target': (batch_size, classes_num)}"""
                # Loss
                
                loss = self.criterion(batch_output_dict, batch_target_dict)
                self.logger.info(f'Batch Training Initial Loss: {loss}')
                if idx % 10 == 0:
                    print(f'Epoch:{epoch},Batch {idx} Loss: {loss}')
                # Backwards
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
                self.optimizer.step()
                # self.scheduler.step()
                self.optimizer.zero_grad()

                loss = loss.item()

                mean_loss += loss
            epoch_loss = mean_loss / len(train_loader)
            
            print(f'Epoch {epoch} | Training Loss: {epoch_loss}')
            # Evaluate
            y_true,y_pred = self.evaluator.evaluate(test_loader)
            
            statistics = self.calculate_metrics(y_true,y_pred,cl_class_label,ul_class_label)

            print(f"Task {cur_iter} |  Current Epoch {epoch} | statistics {statistics}")
            if  statistics['cl_weighted_accuracy'] > best['cl_weighted_accuracy']:
                best['cl_weighted_accuracy'] = statistics['cl_weighted_accuracy']
                best['cl_accuracy'] = statistics['cl_accuracy']
                best['epoch'] = epoch
                selected_state_dict = {}
                for name, param in self.model.named_parameters():
                    if 'projector' in name or 'classifier' in name or 'fc' in name and ('encoder' not in name):
                        selected_state_dict[name] = param
                torch.save(selected_state_dict,SAVEDIR + '{}/task{}best_epoch.pt'.format(self.mode,cur_iter))
                print('Save Model Successfully',SAVEDIR + '{}/task{}best_epoch.pt'.format(self.mode,cur_iter))
                self.counter = 0
            else:
                self.counter += 1
                self.logger.info(f'EarlyStopping counter: {self.counter} out of {self.patience}.')
                if self.counter >= self.patience:
                    break
            self.scheduler.step()
        print(f"Task {cur_iter} | Best epoch  Best Statistics {best}")
        
        return 
    
    def evaluate(self,model_path,test_datalist,cl_class_label,ul_class_label):
        self.change_model(model_path)
        test_loader = get_dataloader(pd.DataFrame(test_list), 'ref_youtube_audio','test', batch_size=self.batch_size, num_workers=self.n_worker)
        y_true,y_pred = self.evaluator.evaluate(test_loader)
        statistics = self.calculate_metrics(y_true,y_pred,cl_class_label,ul_class_label)
        print(statistics)
        return statistics
    
    def evaluate_self(self,test_datalist,cl_class_label,ul_class_label):
        test_loader = get_dataloader(pd.DataFrame(test_list), 'ref_youtube_audio','test', batch_size=self.batch_size, num_workers=self.n_worker)
        y_true,y_pred = self.evaluator.evaluate(test_loader)
        statistics = self.calculate_metrics(y_true,y_pred,cl_class_label,ul_class_label)
        print(statistics)
        return statistics

In [67]:
#All Training and each stage forget
#Each Stage train the all Data Need to Remember and all Data Need to be Forget(Memory bank full,Forget bank full)
def Step_Learn_Froeget_Full_memory_Full_forget():
    clul = CLUL(epoch = 10)
    for idx in range(5):
        clul.train_with_forget_without_forget_bank(idx)
        train_list,_ = clul.get_train_test_datalist(0)
        clul.memory += train_list

#At Last Stage learn and forget together
def All_Learn_Forget_No_memory_No_forget():
    clul = CLUL(epoch = 10)
    train_list,test_list = [],[]
    cl_class_label,ul_class_label = [],[]
    for idx in range(5):
        train_list_,test_list_ = clul.get_train_test_datalist(idx)
        cl_class_label_,ul_class_label_ =clul.get_cl_ul_class_label(idx)
        train_list += train_list_
        test_list += test_list_
        cl_class_label += cl_class_label_
        ul_class_label += ul_class_label_
        
    print('train list length',len(train_list),'test list length',len(test_list),
      'cl class label length',len(cl_class_label),'ul class label length',len(ul_class_label))
    clul.train('All_Learn_Forget_No_memory_No_forget',train_list,test_list,cl_class_label,ul_class_label,4)

def All_Learn_then_Forget():
    clul = CLUL(epoch = 5,mode='All_Learn_then_Forget')
    train_list,test_list = [],[]
    cl_class_label,ul_class_label = [],[]
    for idx in range(5):
        train_list_,test_list_ = clul.get_train_test_datalist(idx)
        cl_class_label_,ul_class_label_ =clul.get_cl_ul_class_label(idx)
        train_list += train_list_
        test_list += test_list_
        cl_class_label += cl_class_label_
        
    print('train list length',len(train_list),'test list length',len(test_list),
      'cl class label length',len(cl_class_label),'ul class label length',len(ul_class_label))
    clul.train('All_Learn_then_Forget',train_list,test_list,cl_class_label,ul_class_label,4)

    
    train_list,test_list = [],[]
    cl_class_label,ul_class_label = [],[]
    for idx in range(5):
        train_list_,test_list_ = clul.get_train_test_datalist(idx)
        cl_class_label_,ul_class_label_ =clul.get_cl_ul_class_label(idx)
        train_list += train_list_
        test_list += test_list_
        ul_class_label += ul_class_label
    
    train_list = pd.DataFrame(train_list)[(pd.DataFrame(train_list)["category"].map(ytvos_category_dict)).isin(ul_class_label)].to_dict(orient="records")
    print('train list length',len(train_list),'test list length',len(test_list),
      'cl class label length',len(cl_class_label),'ul class label length',len(ul_class_label))
    clul.train('All_Learn_then_Forget',train_list,test_list,cl_class_label,ul_class_label,4)
      
def Naive_CL():
    clul = CLUL(epoch=5,mode= 'Naive_CL')
    test_list = []
    cl_class_label = []

    
    for idx in range(5):
        train_list,test_list_ = clul.get_train_test_datalist(idx)
        cl_class_label_,ul_class_label_ =clul.get_cl_ul_class_label(idx)
        cl_class_label += cl_class_label_
        test_list += test_list_
        cl_class_label = list(set(cl_class_label))
        
        clul.train('Naive_CL',train_list,test_list,cl_class_label,[],idx)

def Full_Memory_CL():
    clul = CLUL(epoch=5,mode= 'Full_Memory_CL')
    test_list = []
    cl_class_label = []

    
    for idx in range(5):
        train_list_,test_list_ = clul.get_train_test_datalist(idx)
        cl_class_label_,ul_class_label_ =clul.get_cl_ul_class_label(idx)
        cl_class_label += cl_class_label_
        test_list += test_list_
        train_list += train_list_
        cl_class_label = list(set(cl_class_label))
        
        clul.train('Full_Memory_CL',train_list,test_list,cl_class_label,[],idx)
    

def Learn_All():
    clul = CLUL(epoch = 10,mode='Learn_All')
    
     
    train_list,test_list = [],[]
    cl_class_label,ul_class_label = [],[]
    
    for idx in range(5):
        train_list_,test_list_ = clul.get_train_test_datalist(idx)
        cl_class_label_,ul_class_label_ =clul.get_cl_ul_class_label(idx)
        train_list += train_list_
        test_list += test_list_
        cl_class_label += cl_class_label_
        ul_class_label += ul_class_label_
        

    print('Learn train list length',len(train_list),'test list length',len(test_list),
      'cl class label length',len(cl_class_label),'ul class label length',len([]))
    
    clul.train('Learn_All',train_list,test_list,cl_class_label,[],4)

#Fine tune the data on the D_r
def Learn_CL():
    clul = CLUL(epoch = 3,mode='Learn_CL')
    clul.change_model('/root/CLUL/run/Learn_All/task4best_epoch.pt')
     
    train_list,test_list = [],[]
    cl_class_label,ul_class_label = [],[]
    
    for idx in range(5):
        train_list_,test_list_ = clul.get_train_test_datalist(idx)
        cl_class_label_,ul_class_label_ =clul.get_cl_ul_class_label(idx)
        train_list += train_list_
        test_list += test_list_
        cl_class_label += cl_class_label_
        ul_class_label += ul_class_label_
        
    cl_class_label = list(set(cl_class_label)-set(ul_class_label))
    train_list = pd.DataFrame(train_list)[(pd.DataFrame(train_list)["category"].map(ytvos_category_dict)).isin(cl_class_label)].to_dict(orient="records")
    print('Learn train list length',len(train_list),'test list length',len(test_list),
      'cl class label length',len(cl_class_label),'ul class label length',len(ul_class_label))
    
    clul.train('Learn_CL',train_list,test_list,cl_class_label,ul_class_label,4)


def Forget_UL_Boundary_Expansion():
    clul = CLUL(epoch = 3,mode='Forget_UL_Boundary_Expansion')
    clul.change_model('/root/CLUL/run/Learn_All/task4best_epoch.pt')
    
    # [15, 17, 60, 48, 54, 35, 46, 18, 57, 3, 59, 10]
    train_list,test_list = [],[]
    cl_class_label,ul_class_label = [],[15]
    
    for idx in range(5):
        train_list_,test_list_ = clul.get_train_test_datalist(idx)
        cl_class_label_,ul_class_label_ =clul.get_cl_ul_class_label(idx)
        train_list += train_list_
        test_list += test_list_
        cl_class_label += cl_class_label_
    train_list = pd.DataFrame(train_list)[(pd.DataFrame(train_list)["category"].map(ytvos_category_dict)).isin(ul_class_label)].to_dict(orient="records")
    print('forget train list length',len(train_list),'test list length',len(test_list),
      'cl class label length',len(cl_class_label),'ul class label length',len(ul_class_label))
    
    clul.train('Forget_UL_Boundary_Expansion',train_list,test_list,cl_class_label,ul_class_label,4)

    
def Forget_UL_Random_label():
    clul = CLUL(epoch = 3,mode='Forget_UL_Random_label')
    clul.change_model('/root/CLUL/run/Learn_All/task4best_epoch.pt')
     
    train_list,test_list = [],[]
    cl_class_label,ul_class_label = [],[]
    
    for idx in range(5):
        train_list_,test_list_ = clul.get_train_test_datalist(idx)
        cl_class_label_,ul_class_label_ =clul.get_cl_ul_class_label(idx)
        train_list += train_list_
        test_list += test_list_
        cl_class_label += cl_class_label_
        ul_class_label += ul_class_label_
        
    ul_class_label = list(set(cl_class_label) - set(ul_class_label))
    train_list = pd.DataFrame(train_list)[(pd.DataFrame(train_list)["category"].map(ytvos_category_dict)).isin(ul_class_label)].to_dict(orient="records")
    print('forget train list length',len(train_list),'test list length',len(test_list),
      'cl class label length',len(cl_class_label),'ul class label length',len(ul_class_label))
    
    clul.train('All_Learn_then_Forget',train_list,test_list,cl_class_label,ul_class_label,4)



# Last Stage Learn all and forget all by Bounday Expanding

In [None]:
# In the last stage learn all and forget all together 
All_Learn_Forget_No_memory_No_forget()

# Incremental learning in 5 tasks with no memory bank

In [None]:
#Incremental learning in 5 tasks and no memory bank
Naive_CL()

# Incremental learning in 5 tasks with Full memory bank

In [None]:
Full_Memory_CL()

# Learn first then Forget

In [None]:
Learn_All()

In [None]:
Learn_CL()

In [None]:
Forget_UL_Boundary_Expansion()

In [68]:
Forget_UL_Random_label()

forget train list length 8232 test list length 2494 cl class label length 65 ul class label length 53
train loader length 258 test loader length 78 cl class label [15, 17, 60, 50, 32, 24, 63, 36, 31, 40, 52, 4, 25, 48, 54, 35, 62, 13, 42, 37, 49, 51, 45, 44, 14, 5, 46, 18, 57, 28, 11, 30, 61, 27, 22, 2, 29, 0, 19, 3, 59, 10, 12, 8, 1, 26, 23, 34, 58, 64, 56, 41, 47, 20, 53, 39, 9, 21, 16, 38, 33, 43, 6, 7, 55] ul class label [0, 1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 16, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 49, 50, 51, 52, 53, 55, 56, 58, 61, 62, 63, 64]


  0%|          | 0/258 [00:00<?, ?it/s]

Epoch:0,Batch 0 Loss: 100.18904876708984
Epoch:0,Batch 10 Loss: 41.5723876953125
Epoch:0,Batch 20 Loss: 30.053726196289062
Epoch:0,Batch 30 Loss: 17.66861915588379
Epoch:0,Batch 40 Loss: 21.125993728637695
Epoch:0,Batch 50 Loss: 16.48838996887207
Epoch:0,Batch 60 Loss: 16.23040008544922
Epoch:0,Batch 70 Loss: 13.77851676940918
Epoch:0,Batch 80 Loss: 12.48988151550293
Epoch:0,Batch 90 Loss: 15.555898666381836
Epoch:0,Batch 100 Loss: 12.741931915283203
Epoch:0,Batch 110 Loss: 12.094322204589844
Epoch:0,Batch 120 Loss: 13.497087478637695
Epoch:0,Batch 130 Loss: 11.989791870117188
Epoch:0,Batch 140 Loss: 12.87718677520752
Epoch:0,Batch 150 Loss: 12.588712692260742
Epoch:0,Batch 160 Loss: 12.647796630859375
Epoch:0,Batch 170 Loss: 12.895086288452148
Epoch:0,Batch 180 Loss: 11.810033798217773
Epoch:0,Batch 190 Loss: 11.024385452270508
Epoch:0,Batch 200 Loss: 11.824185371398926
Epoch:0,Batch 210 Loss: 12.377079010009766
Epoch:0,Batch 220 Loss: 11.775630950927734
Epoch:0,Batch 230 Loss: 11.742

Evaluation starting ...: 100%|██████████| 78/78 [01:13<00:00,  1.06it/s]

Returned target_acc and clipwise_output_acc
Task 4 |  Current Epoch 0 | statistics {'cl_weighted_accuracy': 0.09087767451659721, 'ul_weighted_accuracy': 0.0, 'cl_accuracy': 0.1393643031784841, 'ul_accuracy': 0.0}
Save Model Successfully /root/CLUL/run/Forget_UL_Random_label/task4best_epoch.pt





  0%|          | 0/258 [00:00<?, ?it/s]

Epoch:1,Batch 0 Loss: 11.084427833557129
Epoch:1,Batch 10 Loss: 11.884904861450195
Epoch:1,Batch 20 Loss: 11.206670761108398
Epoch:1,Batch 30 Loss: 11.606139183044434
Epoch:1,Batch 40 Loss: 11.836101531982422
Epoch:1,Batch 50 Loss: 11.80435848236084
Epoch:1,Batch 60 Loss: 11.207924842834473
Epoch:1,Batch 70 Loss: 10.596290588378906
Epoch:1,Batch 80 Loss: 11.524690628051758
Epoch:1,Batch 90 Loss: 10.76893138885498
Epoch:1,Batch 100 Loss: 11.463611602783203
Epoch:1,Batch 110 Loss: 10.96826171875
Epoch:1,Batch 120 Loss: 11.645011901855469
Epoch:1,Batch 130 Loss: 11.59510612487793
Epoch:1,Batch 140 Loss: 10.997419357299805
Epoch:1,Batch 150 Loss: 10.763168334960938
Epoch:1,Batch 160 Loss: 11.835344314575195
Epoch:1,Batch 170 Loss: 11.325338363647461
Epoch:1,Batch 180 Loss: 11.078719139099121
Epoch:1,Batch 190 Loss: 11.008458137512207
Epoch:1,Batch 200 Loss: 11.467052459716797
Epoch:1,Batch 210 Loss: 11.333417892456055
Epoch:1,Batch 220 Loss: 11.161840438842773
Epoch:1,Batch 230 Loss: 10.83

ValueError: Unable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length.

In [33]:
#In the last stage learn all then forget all 
# All_Learn_then_Forget()
# forget_all = [35, 3, 10, 46, 15, 48, 17, 18, 54, 57, 59, 60]
# All_Forget()
# clul = CLUL()
# cl_class_label,ul_class_label = [],[]
# train_list,test_list = [],[]
# for idx in range(5):
#     train_list_,test_list_ = clul.get_train_test_datalist(idx)
#     cl_class_label_,ul_class_label_ =clul.get_cl_ul_class_label(idx)
#     train_list += train_list_
#     test_list += test_list_
#     cl_class_label += cl_class_label_
#     ul_class_label += ul_class_label_

# Learn_CL()
clul = CLUL()
train_list,test_list = clul.get_train_test_datalist(0)
train_loader,test_loader = get_train_test_dataloader(clul.batch_size, clul.n_worker, train_list, test_list)
for tra in train_loader:
    print(tra)


KeyboardInterrupt: 

In [None]:
clul = CLUL()
train_list,test_list = clul.get_train_test_datalist(0)
train_loader,test_loader = get_train_test_dataloader(clul.batch_size, clul.n_worker, train_list, test_list)
for tra in train_loader:
    print(tra)

In [None]:
train_list1,test_list1 = clul.get_train_test_datalist(0)
cl_class_label1,ul_class_label1 = clul.get_cl_ul_class_label(0)

train_list2,test_list2 = clul.get_train_test_datalist(1)
cl_class_label2,ul_class_label2 = clul.get_cl_ul_class_label(1)

train_list = train_list1 + train_list2
cl_class_label = cl_class_label1 + cl_class_label2
ul_class_label = ul_class_label1 + ul_class_label2


clul.change_model(r'C:\Users\Administrator\Desktop\CLUL-main\run\CLUL _no_foget_bank\task1best_epoch2.pt')
memory = clul.single_mutual_info_sampling(1,train_list,cl_class_label,ul_class_label)

In [None]:
# class_label_dic
train_list,test_list = get_datalist(0)
train_df = pd.DataFrame(train_list)
train_list,train_df
for i in range(1000):
    print(train_list[i]['video'] == train_df.iloc[i]['video'])
train_df


from enum import Enum
from sklearn.metrics import f1_score,precision_recall_curve,precision_score,recall_score,accuracy_score,balanced_accuracy_score
y_true = [0, 0, 0, 1, 1, 1,   2]
y_pred = [0, 0, 0, 1, 1, 1,   4]
class AverageMethod(str, Enum):
    MICRO = 'micro'
    WEIGHTED = 'weighted'
    MACRO = 'macro'
def evaluate(y_true,y_pred,average:AverageMethod):
    statistics = {}
    statistics['f1_score'] = f1_score(y_true,y_pred,average=average.value)
    statistics['precision'] = precision_score(y_true,y_pred,average=average.value)
    statistics['recall'] = recall_score(y_true,y_pred,average=average.value)
    statistics['accuracy'] = accuracy_score(y_true,y_pred)
    return statistics
evaluate(y_true,y_pred,AverageMethod.MACRO),balanced_accuracy_score(y_true,y_pred)


# train_list,test_list = get_datalist(0)
# train_loader ,test_loader = get_train_test_dataloader(16,0,train_list,test_list)

# for train in train_loader:
#     print(train)


# def train_total():
#     for task_id in range(5):
#         train_list,test_list,cl_class_label,ul_class_label = clul.train_with_forget_without_forget_bank(task_id)
#         if task_id == 0:
#             clul.equal_class_sampling(train_list)
#         else:
#             clul.single_mutual_info_sampling(train_list,cl_class_label,ul_class_label)

In [None]:
from tqdm.notebook import tqdm
a = [1,2,3,4,5]
for i in tqdm(a):
    print(i)

In [None]:
from ipywidgets import Button

button = Button(description="Click me!")
button
