In [None]:
import os
import numpy as np
import torch 
import pandas as pd
import preprocess as pp
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
# for CNN model 
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as isinstance

import torch.utils.data

In [4]:
x = pp.preprocess(2,"accstudy","metadata.csv")

print("About Label : ")
y = pp.getlabel("os/metadata.csv")

Packet Max Length is 51
Final data shape is torch.Size([5787, 51, 1088])
총 실행 시간  41.13045406341553
About Label : 
0 is mac_mac-os-x
1 is none_kali-linux
2 is ubuntu_ubuntu-14.4-32b
3 is ubuntu_ubuntu-14.4-64b
4 is ubuntu_ubuntu-16.4-32b
5 is ubuntu_ubuntu-16.4-64b
6 is ubuntu_ubuntu-server
7 is ubuntu_web-server
8 is windows_windows-10
9 is windows_windows-10-pro
10 is windows_windows-7-pro
11 is windows_windows-8.1
12 is windows_windows-vista


In [5]:
np.save("acc_x_nprint.npy",x)

In [None]:
# data split

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,shuffle=True, random_state=1)

print(x_train.shape)
print(y_train.shape)

In [None]:
# data imbalance 검사 

pd.DataFrame(y_train).value_counts()

In [None]:
# setting parameter

batch_size = 16
lr = 0.001
epoch=10

In [None]:
class CustomDataset(Dataset):
    def __init__(self, x_data, y_data):
        self.x_data = x_data.float()
        self.y_data = y_data.float()

    def __len__(self):
        return len(self.x_data)

    def __getitem__(self, idx):
        x = self.x_data[idx].unsqueeze(0)
        y = self.y_data[idx]
        return x, y

In [None]:
train_dataset = CustomDataset(x_train,y_train)
test_dataset = CustomDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True,drop_last=True)

In [None]:
# CNN model 제작 

class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        
        self.layer= nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=4),
            nn.ReLU(),
            
            nn.Conv2d(16,32,kernel_size=4),
            nn.ReLU(),
            
            nn.MaxPool2d(kernel_size=2),
            
            nn.Conv2d(32,64,kernel_size=4),
            nn.ReLU(),
             
            nn.Conv2d(64,128,kernel_size=4),
            nn.ReLU(),
            
            nn.MaxPool2d(kernel_size=2)
        )
        
        self.fc_layer = nn.Sequential(
            nn.Linear(128* 8*267,500),
            nn.Linear(500,7)
        )
        
    def forward (self, x):
        out = self.layer(x)
        out_reshape = out.view(batch_size, -1)
        out_final = self.fc_layer(out_reshape)
        return out_final

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)

model = CNN().to(device)

In [None]:
# loss, optim function 설정

loss = nn.CrossEntropyLoss()

optim = torch.optim.Adam(model.parameters(),lr=lr)

In [None]:
for i in range(epoch):
    for j,[image,label] in enumerate(train_loader):
        x = image.to(device)

        y = label.to(device)
        
        optim.zero_grad()
        
        output = model.forward(x)
        
        y = y.type(torch.long)
        l = loss(output,y)
        l.backward()
        optim.step()
        
    if i % 1 == 0:
        print(f'epoch {i} :  Loss: {l.item()}')

In [None]:
torch.save(model, 'cnn.pt') # 모델 저장장

In [None]:
from sklearn.metrics import f1_score

correct = 0
total = 0

# evaluate model
model.eval()

with torch.no_grad():
    for image,label in test_loader:
        x = image.to(device)
        y= label.to(device)

        output = model.forward(x)
        
        # torch.max함수는 (최댓값,index)를 반환 
        _,output_index = torch.max(output,1)
        
        # 전체 개수 += 라벨의 개수
        total += label.size(0)
        
        # 도출한 모델의 index와 라벨이 일치하면 correct에 개수 추가
        correct += (output_index == y).sum().float()
    
    print("Accuracy of Test Data: {}%".format(100*correct/total))

In [None]:
torch.save(model,'cnn.pt')