# Rainbow Memory: Make a dataset of each task.

In [10]:
import sys
import pandas as pd
import numpy as np
from pathlib import Path

You should read json file which follows below format. 

[{"klass": "truck", "file_name": "test/class1.txt", "idx":6}, ...]

You should change the file name as below. 


In [11]:
train = pd.read_json(r'C:\Users\cannedfish\Desktop\测试数据_(非密）面向未知机载AESA雷达的开集智能识别算法\train.json')
test = pd.read_json(r'C:\Users\cannedfish\Desktop\测试数据_(非密）面向未知机载AESA雷达的开集智能识别算法\test.json')

Change the variables `rnd_seed` and `num_tasks` 

In [12]:
rnd_seed = 3 # random seed 
num_tasks = 4 # the number of tasks. 
np.random.seed(rnd_seed)

In [13]:
klass = train.klass.unique()
num_cls_per_task = len(klass) // num_tasks
np.random.shuffle(klass)

class2label = {cls_:idx for idx, cls_ in enumerate(klass)} 
train["label"] = train.klass.apply(lambda x: class2label[x])
test["label"] = test.klass.apply(lambda x: class2label[x])

task_class = np.split(klass, num_tasks)
task_train = [train[train.klass.isin(tc)] for tc in task_class]
task_test = [test[test.klass.isin(tc)] for tc in task_class]

## Disjoint Task Boundaries Benchmark

Configure disjoint dataset which does not share the classes of each task.


In [14]:
origin_name = "pdw" # Need to change the name of your dataset.
root = Path('../collections/disjoint')
root.mkdir(exist_ok=True)

for idx, train_task in enumerate(task_train):
    file_name = origin_name
    train_prefix = {'_disjoint':'', 
              '_rand':rnd_seed, 
              '_cls':num_cls_per_task,
              '_task':idx
             }
 
    for name, value in train_prefix.items():
        file_name += name + str(value)
    file_path = (root/file_name).with_suffix('.json')
    train_task.to_json(file_path, orient='records')
    print(f"{file_path}")

..\collections\disjoint\pdw_disjoint_rand3_cls1_task0.json
..\collections\disjoint\pdw_disjoint_rand3_cls1_task1.json
..\collections\disjoint\pdw_disjoint_rand3_cls1_task2.json
..\collections\disjoint\pdw_disjoint_rand3_cls1_task3.json


## Blurry Task Boundaries Benchmark

Configure blurry task dataset which can share the classes of each task

There are two types of classes per each task as described in the paper. 

- **Major Classes** account for 90(70) percent of whole dataset of the corresponding dataset in blurry-10(30). 
- **Minor Classes** account for 10(30) percent of whole dataset of the corresponding dataset in blurry-10(30). 


In [15]:
major_ratio = 0.9 # 0.9 for blurry-10, 0.7 for blurry-30.

task_trainM = []
task_trainN = []
for t in task_train:
    sub_task_trainN = []

    taskM = t.sample(n = int(len(t) * 0.9), replace=False)
    taskN = pd.concat([taskM, t]).drop_duplicates(keep=False)
    taskN_size = len(taskN)

    task_trainM.append(taskM)
    for _ in range(len(task_train)-1):
        sub_task_trainN.append(taskN.sample(n=taskN_size//(len(task_train)-1)))
        
    task_trainN.append(sub_task_trainN)
    
task_mixed_train = []
for idx, task in enumerate(task_trainM):
    other_task_samples = pd.DataFrame() 
    for j in range(len(task_trainM)):
        if idx != j: 
            other_task_samples = pd.concat([other_task_samples, task_trainN[j].pop(0)])
    mixed_task = pd.concat([task, other_task_samples])
    task_mixed_train.append(mixed_task)

In [16]:
origin_name = "pdw" # Need to change the name of your dataset.
root = Path('../collections/blurry')
root.mkdir(exist_ok=True)

for idx, task in enumerate(task_mixed_train):
    file_name = origin_name
    prefix = {'_blurry':f'{int(round((1.0 - major_ratio)*100))}', 
              '_rand':rnd_seed, 
              '_cls':num_cls_per_task,
              '_task':idx
             }
    
    for name, value in prefix.items():
        file_name += name + str(value)

    file_path = (root/file_name).with_suffix('.json')
    task.to_json(file_path, orient='records')
    print(f"{file_path}")

..\collections\blurry\pdw_blurry10_rand3_cls1_task0.json
..\collections\blurry\pdw_blurry10_rand3_cls1_task1.json
..\collections\blurry\pdw_blurry10_rand3_cls1_task2.json
..\collections\blurry\pdw_blurry10_rand3_cls1_task3.json


## Test 

In [17]:
origin_name = "pdw" # Need to change the name of your dataset.
task_test = [test[test.klass.isin(tc)] for tc in task_class]

root = Path('../collections/test')
root.mkdir(exist_ok=True)

for idx, task in enumerate(task_test):
    file_name = origin_name
    prefix = {'_rand':rnd_seed, 
              '_cls':num_cls_per_task,
              '_task':idx
             }
    for name, value in prefix.items():
        file_name += name + str(value)
        
    file_path = (root/file_name).with_suffix('.json')
    task.to_json(file_path, orient='records')
    print(f"{file_path}")

..\collections\test\pdw_rand3_cls1_task0.json
..\collections\test\pdw_rand3_cls1_task1.json
..\collections\test\pdw_rand3_cls1_task2.json
..\collections\test\pdw_rand3_cls1_task3.json
