In [51]:
import random
import time
import warnings
import argparse
import shutil
import os.path as osp

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.optim import SGD
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import DataLoader
import torch.nn.functional as F
import cv2
import numpy as np
import utils
from torch.utils.data import Dataset, DataLoader
from tllib.alignment.dan import MultipleKernelMaximumMeanDiscrepancy, ImageClassifier
from tllib.modules.kernels import GaussianKernel
from tllib.utils.data import ForeverDataIterator
from tllib.utils.metric import accuracy
from tllib.utils.meter import AverageMeter, ProgressMeter
from tllib.utils.logger import CompleteLogger
from tllib.utils.analysis import collect_feature, tsne, a_distance
import pandas as pd

In [52]:
def split_data(df, frac=0.2):
    seletected = df['flow_id'].drop_duplicates().sample(frac=frac)
    val = df[df['flow_id'].isin(seletected)]
    train = df[~df['flow_id'].isin(seletected)]
    return train, val

In [53]:
def resize_image(image, target_size=(224,224)):
    return cv2.resize(image, target_size, interpolation = cv2.INTER_LINEAR)

In [54]:
def most_frequent(List):
    return max(set(List), key=List.count)

In [55]:
class MyDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [56]:
def data_processing(raw_data):
    #Get flow label
    result = raw_data.groupby('flow_id')['Label'].apply(list).to_dict()
    flow_label = []
    for flow in result:
        flow_label.append(most_frequent(result[flow]))
    flow_label = np.array(flow_label)
    true_data = raw_data.drop('flow_id',axis=1)

    datas = true_data.drop('Label',axis=1).to_numpy()/255
    datas = datas.reshape(-1,20,256).astype('float32')
    # Resize each image in the dataset
    datas = np.array([resize_image(img) for img in datas])
    rgb_datas = np.repeat(datas[:, :,np.newaxis], 3, axis=2)
    print(type(rgb_datas))
    final_dataset = MyDataset(rgb_datas, flow_label)
    return final_dataset

In [57]:
Train_data = pd.read_feather('/media/bkcs/ea03a187-9ad2-44ad-9ec0-6246736e0fcd/gquic/Train/GQUIC_data_256.feather')
Test_data = pd.read_feather('/media/bkcs/ea03a187-9ad2-44ad-9ec0-6246736e0fcd/gquic/Test/GQUIC_test_256.feather')

In [58]:
train_source = Train_data[Train_data.Label != 3]
train_target, val_raw = split_data(Train_data[Train_data.Label == 3])
test_raw = Test_data[Test_data.Label == 3]

In [59]:
train_source_dataset = data_processing(train_source)

train_target_dataset = data_processing(train_target)

val_dataset = data_processing(val_raw)

test_dataset = data_processing(test_raw)

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [60]:
train_source_loader = DataLoader(train_source_dataset, batch_size=32,
                                 shuffle=True, drop_last=True)
train_target_loader = DataLoader(train_target_dataset, batch_size=32,
                                 shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

train_source_iter = ForeverDataIterator(train_source_loader)
train_target_iter = ForeverDataIterator(train_target_loader)


In [61]:
len(next(train_source_iter))

2

In [62]:
next(train_source_iter)[:2]

[tensor([[[[0.4571, 0.6535, 0.7367,  ..., 0.7709, 0.5476, 0.2050],
           [0.4571, 0.6535, 0.7367,  ..., 0.7709, 0.5476, 0.2050],
           [0.4571, 0.6535, 0.7367,  ..., 0.7709, 0.5476, 0.2050]],
 
          [[0.4571, 0.6535, 0.7367,  ..., 0.7709, 0.5476, 0.2050],
           [0.4571, 0.6535, 0.7367,  ..., 0.7709, 0.5476, 0.2050],
           [0.4571, 0.6535, 0.7367,  ..., 0.7709, 0.5476, 0.2050]],
 
          [[0.4571, 0.6535, 0.7367,  ..., 0.7709, 0.5476, 0.2050],
           [0.4571, 0.6535, 0.7367,  ..., 0.7709, 0.5476, 0.2050],
           [0.4571, 0.6535, 0.7367,  ..., 0.7709, 0.5476, 0.2050]],
 
          ...,
 
          [[0.5843, 0.6930, 0.5706,  ..., 0.3964, 0.4389, 0.0639],
           [0.5843, 0.6930, 0.5706,  ..., 0.3964, 0.4389, 0.0639],
           [0.5843, 0.6930, 0.5706,  ..., 0.3964, 0.4389, 0.0639]],
 
          [[0.5843, 0.6930, 0.5706,  ..., 0.3964, 0.4389, 0.0639],
           [0.5843, 0.6930, 0.5706,  ..., 0.3964, 0.4389, 0.0639],
           [0.5843, 0.6930, 0.570

In [63]:
x_s, labels_s = next(train_source_iter)[:2]

In [64]:
x_s.shape

torch.Size([32, 224, 3, 224])