In [1]:
import sys
sys.path.append('/home/myk/min0/')

import numpy as np
import torch
import pandas as pd
import preprocess as pp
import cwe
import random

In [2]:
def seed_everything(seed):
    torch.manual_seed(seed) #torch를 거치는 모든 난수들의 생성순서를 고정한다
    torch.cuda.manual_seed(seed) #cuda를 사용하는 메소드들의 난수시드는 따로 고정해줘야한다 
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True #딥러닝에 특화된 CuDNN의 난수시드도 고정 
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed) #numpy를 사용할 경우 고정
    random.seed(seed) #파이썬 자체 모듈 random 모듈의 시드 고정
seed_everything(42)

In [3]:
x = np.load('../x_triple2token_dict.npy')
y = pp.getlabel('../metadata.csv')

num_index = np.max(x)

0 is arkio
1 is bigballers
2 is bigscreen
3 is blaston
4 is campfire
5 is epicrollercoasters
6 is firsthand
7 is firsttouch
8 is fitxr
9 is godsofgravity
10 is gorillatag
11 is gunraiders
12 is horizonworkrooms
13 is horizonworlds
14 is hyperdash
15 is immersed
16 is netflix
17 is roblox
18 is vrchat
19 is youtube


In [4]:
print(num_index)

4322


In [5]:
input_size = 150
hidden_dim = 100
num_layers = 5
output_size = 20
batch = 16
lr = 0.001
epoch = 200

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [7]:
model = cwe.CWE(num_index+1, hidden_dim, num_layers, input_size,output_size, batch).to(device)
print(model)

CWE(
  (embedding): Embedding(4323, 150)
  (lstm): LSTM(150, 100, num_layers=5, batch_first=True, dropout=0.22, bidirectional=True)
  (layer1): Sequential(
    (0): Conv1d(1, 32, kernel_size=(1,), stride=(1,))
    (1): ReLU()
  )
  (layer2): Sequential(
    (0): Conv1d(32, 64, kernel_size=(1,), stride=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=3200, out_features=1000, bias=True)
    (1): Linear(in_features=1000, out_features=20, bias=True)
  )
)


In [8]:
criterion = torch.nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(), lr=lr)

In [9]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,shuffle=True, random_state=1)

print(x_train.shape)
print(y_train.shape)

(2544, 900)
torch.Size([2544])


In [10]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, x_data, y_data):
        self.x_data = x_data
        self.y_data = y_data

    def __len__(self):
        return len(self.x_data)

    def __getitem__(self, idx):
        x = self.x_data[idx]
        y = self.y_data[idx]
        return x, y

train_dataset = CustomDataset(x_train,y_train)
test_dataset = CustomDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch, shuffle=True,drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch, shuffle=True,drop_last=True)

In [11]:

for i in range(epoch):
    for j,[data,label] in enumerate(train_loader):
        label = label.type(torch.LongTensor)
        x = data.to(device)
        y = label.to(device)
        outputs = model.forward(x)
        # 손실 계산
        loss = criterion(outputs, y)
        
        # 역전파 및 가중치 업데이트
        optim.zero_grad()
        loss.backward()
        optim.step()
        
    if (i+1) % 10 == 0:
        print(f'epoch {(i+1)} :  Loss: {loss.item()}')

epoch 10 :  Loss: 0.30922311544418335
epoch 20 :  Loss: 0.38475528359413147
epoch 30 :  Loss: 0.2358551174402237
epoch 40 :  Loss: 0.3647477924823761
epoch 50 :  Loss: 0.17824171483516693
epoch 60 :  Loss: 0.0013858855236321688
epoch 70 :  Loss: 0.0001439579064026475
epoch 80 :  Loss: 0.0057332213036715984
epoch 90 :  Loss: 0.013013914227485657
epoch 100 :  Loss: 0.0745667815208435
epoch 110 :  Loss: 0.21265053749084473
epoch 120 :  Loss: 0.07972748577594757
epoch 130 :  Loss: 0.006724173203110695
epoch 140 :  Loss: 0.01777603290975094
epoch 150 :  Loss: 7.017593452474102e-05
epoch 160 :  Loss: 0.0009433758677914739
epoch 170 :  Loss: 0.00010281504364684224
epoch 180 :  Loss: 0.024735799059271812
epoch 190 :  Loss: 0.0002779499045573175
epoch 200 :  Loss: 7.770962110953405e-05


In [12]:
model.eval()
model_name = "LCNN_ver12.pt"
total =0
correct=0
incorrect_num_correct = np.zeros(120)
incorrect_num_answer = np.zeros(120)
with torch.no_grad():
    for image,label in test_loader:
        x = image.to(device)
        y= label.to(device)
        output = model.forward(x)
        
        # torch.max함수는 (최댓값,index)를 반환 
        _,output_index = torch.max(output,1)
        
        # 전체 개수 += 라벨의 개수
        total += label.size(0)
        
        # 도출한 모델의 index와 라벨이 일치하면 correct에 개수 추가
        correct += (output_index == y).sum().float()
        
        for i in range(len(y)):
            if y[i]!=output_index[i]:
                k = int(y[i].cpu().numpy())
                incorrect_num_correct[k] += 1
                incorrect_num_answer[int(output_index[i].cpu().numpy())]+=1

    
    # 정확도 도출
    print("Accuracy of Test Data: {}%".format(100*correct/total))
   

Accuracy of Test Data: 92.78845977783203%


In [13]:
torch.save(model,model_name)