In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

import sys
sys.path.append('/home/myk/min0')

sys.path.append('/home/myk/min0/traffic-classification')
sys.path.append('/home/myk/min0/traffic-classification/Model')
import DataRepresentation.Label2Int as pp
from Transformer.layer import EmbeddingLayer

In [None]:
import random

def seed_everything(seed):
    torch.manual_seed(seed) #torch를 거치는 모든 난수들의 생성순서를 고정한다
    torch.cuda.manual_seed(seed) #cuda를 사용하는 메소드들의 난수시드는 따로 고정해줘야한다 
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True #딥러닝에 특화된 CuDNN의 난수시드도 고정 
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed) #numpy를 사용할 경우 고정
    random.seed(seed) #파이썬 자체 모듈 random 모듈의 시드 고정
seed_everything(42)

In [None]:
x = np.load('/home/myk/min0/sentencepiece_10.npy')
y = pp.getlabel('/home/myk/min0/metadata.csv')
print(x.shape)
print(y.shape)
num_index = int(np.max(x))
print(num_index)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device ="cpu"
print(device)

In [None]:
batch = 8
lr = 0.001	
epoch = 20
	
embedding_dim = 32
n_layer = 4	
n_head = 8
dim_ff = 16
dropout = 0.3
output_size = 20	

In [None]:
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

# train, test dataset split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,shuffle=True, random_state=42)

class CustomDataset(Dataset):
    def __init__(self, x_data, y_data):
        self.x_data = x_data
        self.y_data = y_data

    def __len__(self):
        return len(self.x_data)

    def __getitem__(self, idx):
        x = self.x_data[idx]
        y = self.y_data[idx]
        return x, y

train_dataset = CustomDataset(x_train,y_train)
test_dataset = CustomDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch, shuffle=True,drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch, shuffle=True,drop_last=True)

print(x_train.shape)
print(x_test.shape)
print(batch)

In [None]:
class transformer(nn.Module):
    def __init__(self, num_embeddings, embedding_dim,  n_layer, n_head, dim_ff, dropout, output_size, device="cuda"):
        
        super(transformer, self).__init__()
        
        token_embed = EmbeddingLayer.TokenEmbedding(max_len = num_embeddings, d_embed=embedding_dim)
        pos_embed = EmbeddingLayer.PositionalEmbedding(d_embedding=embedding_dim, max_len = num_embeddings, device=device)
        self.embedding = EmbeddingLayer.Embedding(token_embed, pos_embed).to(device)
        encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=n_head, dim_feedforward=dim_ff, dropout=dropout, batch_first=True)
        self.encoder = nn.TransformerEncoder(encoder_layer=encoder_layer, num_layers=n_layer)
        
        #self.classifier = classifier_2dCNN(900 ,output_size,dropout).to(device)
        self.classifier = nn.Sequential(
            nn.Linear(20040,200),
            nn.GELU(),
            nn.Linear(200, output_size)
        )
    def forward(self, x, mask):
        out = self.embedding(x)
        print("after embedding",out.shape)
        out = self.encoder(out, src_key_padding_mask = mask)
        print("after encoder",out.shape)
        out = out.permute
        out = self.classifier(out)
        return out

In [None]:
model = transformer(num_embeddings=num_index+1, embedding_dim= embedding_dim, n_layer=n_layer, n_head=n_head, dim_ff=dim_ff, dropout=dropout,output_size=output_size, device=device )

In [None]:
model = torch.nn.DataParallel(model).to(device)

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
def src_key_padding_mask(x, pad_idx = 0):
    '''
    input : x [ batch, seq_len ]
    output : mask [batch, seq_len]
    input애서 pad_idx인 부분을 true로, 나머지는 false로 채운 mask를 생성하는 함수 
    '''
    mask = (x == pad_idx)
    return mask

In [None]:

for i in range(epoch):
    for j,[data,label] in enumerate(train_loader):
        data = data.type(torch.LongTensor)
        label = label.type(torch.LongTensor)
        x = data.to(device)
        y = label.to(device)
        mask = src_key_padding_mask(x).to(device)
    
        outputs = model(x,mask)
        # 손실 계산
        loss = criterion(outputs, y)
        
        # 역전파 및 가중치 업데이트
        optim.zero_grad()
        loss.backward()
        optim.step()
        
    if (i+1) % 10 == 0:
        print(f'epoch {(i+1)} :  Loss: {loss.item()}')