# Step1. 관련 패키지 및 모듈 import 하기

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
cd '/content/drive/MyDrive/ColabNotebooks/Lecture/Pytorch/Codes'

/content/drive/MyDrive/ColabNotebooks/Lecture/Pytorch/Codes


In [3]:
import os
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

from torch.nn import init

# Step 2. 와인 데이터셋 구성하기

In [4]:
trainFileName = './data/winequality_train_all_shuffle.csv'
testFileName =  './data/winequality_test_all.csv'

train_data = pd.read_csv(trainFileName, sep=',')
test_data = pd.read_csv(testFileName, sep=',')

# Generate Test Label
test_label = pd.DataFrame([0. if item == '0' else 1. for item in test_data['class']])
test_label = [0. if item == 0 else 1. for item in test_data['class']]

# drop the class infromation
test_data = test_data.drop(['class'], axis=1)

In [5]:
# Check
train_data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,class
0,8.6,0.725,0.24,6.6,0.117,31.0,134.0,1.0014,3.32,1.07,9.3,5,0
1,6.9,0.28,0.28,12.2,0.042,52.0,139.0,0.99522,3.03,0.56,10.4,6,1
2,6.6,0.23,0.2,11.4,0.044,45.0,131.0,0.99604,2.96,0.51,9.7,6,1
3,10.4,0.24,0.49,1.8,0.075,6.0,20.0,0.9977,3.18,1.06,11.0,6,0
4,7.1,0.28,0.26,1.9,0.049,12.0,86.0,0.9934,3.15,0.38,9.4,5,1


In [6]:
# Check
test_data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
1,7.4,0.66,0.0,1.8,0.075,13.0,40.0,0.9978,3.51,0.56,9.4,5
2,7.8,0.58,0.02,2.0,0.073,9.0,18.0,0.9968,3.36,0.57,9.5,7
3,7.5,0.5,0.36,6.1,0.071,17.0,102.0,0.9978,3.35,0.8,10.5,5
4,8.9,0.62,0.19,3.9,0.17,51.0,148.0,0.9986,3.17,0.93,9.2,5


# Step 3. 다층퍼셉트론 모델 설계하기

In [9]:
class mymodel(nn.Module):
    def __init__(self, is_train=True):        
        super(mymodel, self).__init__()
        ################################
        ## Problem #1        
        ################################
        self.training = is_train
        self.fc1 = nn.Linear(12, 50)        
        self.fc2 = nn.Linear(50, 22)
        self.fc3 = nn.Linear(22, 2)
        self.softmax = nn.Softmax()
        ###############################
                
    def forward(self, x):
        ################################
        ## Problem #1        
        ################################  
        x = self.fc1(x)
        x = self.fc2(x)  
        x = self.fc3(x)
        x = self.softmax(x)
        ################################
        return x

# 모델 구조 확인하기

In [10]:
def print_network(model):    
    def _get_network_description(network):
        '''Get the string and total parameters of the network'''
        if isinstance(network, nn.DataParallel):
            network = network.module
        s = str(network)
        n = sum(map(lambda x: x.numel(), network.parameters()))
        return s, n
    s, n = _get_network_description(model)
    if isinstance(model, nn.DataParallel):
        net_struc_str = '{} - {}'.format(model.__class__.__name__, model.module.__class__.__name__)
    else: net_struc_str = '{}'.format(model.__class__.__name__)
    log = 'Network structure: {}, with parameters: {:,d}'.format(net_struc_str, n)
    return log, s

In [11]:
model = mymodel()
log, architecture = print_network(model)
print(log)
print(architecture)

Network structure: mymodel, with parameters: 1,818
mymodel(
  (fc1): Linear(in_features=12, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=22, bias=True)
  (fc3): Linear(in_features=22, out_features=2, bias=True)
  (softmax): Softmax(dim=None)
)


# Step 4. Training 함수 구현하기

In [12]:
def train(train_data, test_data, test_label, batch_size=1000, epochs=1000, learning_rate = 5e-4):
    
    # training on cpu    
    #device = torch.device('cpu')
    device = torch.device('cuda')

    # random seed
    torch.manual_seed(1)

    # define model
    model = mymodel().to(device)

    # optimizer
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0, betas=(0.9, 0.999))        

    # loss fucntion
    criterion = nn.CrossEntropyLoss().to(device)
    
    # now start Training!!!
    running_loss = 0.0
    
    for epoch in range(epochs):
        
        model.train()
        
        #model.eval()

        # shuffling every epoch        
        train_data_shuffle = train_data
        train_label_shuffle = pd.DataFrame([0. if item == '0' else 1. for item in train_data_shuffle['class']])
        train_label_shuffle = [0. if item == 0 else 1. for item in train_data_shuffle['class']]

        train_data_shuffle = train_data_shuffle.drop(['class'], axis=1)


        for start, end in zip(range(0, len(train_data_shuffle), batch_size), range(batch_size, len(train_data_shuffle), batch_size)):       

            ##inputs = torch.from_numpy(train_data_shuffle[start:end].as_matrix()).to(device)
            inputs = torch.from_numpy(train_data_shuffle[start:end].to_numpy()).to(device)
            inputs = inputs.float()
            labels = torch.Tensor(train_label_shuffle[start:end]).to(device)
            labels = labels.long()
                 
            outputs = model(inputs)            
            loss = criterion(outputs, labels)
            
            ######################################################
            ### Problem 2: Backpropagation, Updating Model Weight
            ######################################################
            optimizer.zero_grad()       
            loss.backward()        
            optimizer.step()
            ######################################################


            # print statistics
            running_loss += loss.item()
        
       ###################################################
       #### Problem 3: Test model every 50 epochs
       ###################################################          
        total = 0
        correct = 0
        if epoch % 50 == 0 or epoch +1 == epochs:
            model.eval()
            with torch.no_grad(): 
                total = len(test_label)
                
                #test_outputs = model(Variable(torch.from_numpy(test_data.as_matrix()).float()).to(device))
                test_outputs = model(Variable(torch.from_numpy(test_data.to_numpy()).float()).to(device))
                                
                _, predicted = torch.max(test_outputs.data, 1)
                predicted = predicted.cpu()
                correct = (predicted.numpy() == test_label).sum().item()
                accuracy = 100 * correct / total
       ###################################################
                
                print('[Epoch: {}] [Training Loss: {:.6f}] [Accuracy: {:.6f}]'.format(epoch, running_loss / batch_size, accuracy))
                running_loss = 0.0
        model.train()
    print("End Training!")

# Step 5. Training!

In [13]:
train(train_data, test_data, test_label)



[Epoch: 0] [Training Loss: 0.001452] [Accuracy: 52.031250]
[Epoch: 50] [Training Loss: 0.046748] [Accuracy: 88.906250]
[Epoch: 100] [Training Loss: 0.042135] [Accuracy: 89.531250]
[Epoch: 150] [Training Loss: 0.041296] [Accuracy: 90.156250]
[Epoch: 200] [Training Loss: 0.040369] [Accuracy: 92.187500]
[Epoch: 250] [Training Loss: 0.039013] [Accuracy: 94.375000]
[Epoch: 300] [Training Loss: 0.037714] [Accuracy: 95.156250]
[Epoch: 350] [Training Loss: 0.036917] [Accuracy: 95.468750]
[Epoch: 400] [Training Loss: 0.036387] [Accuracy: 95.468750]
[Epoch: 450] [Training Loss: 0.035988] [Accuracy: 96.093750]
[Epoch: 500] [Training Loss: 0.035667] [Accuracy: 95.937500]
[Epoch: 550] [Training Loss: 0.035403] [Accuracy: 96.250000]
[Epoch: 600] [Training Loss: 0.035179] [Accuracy: 96.562500]
[Epoch: 650] [Training Loss: 0.034983] [Accuracy: 96.718750]
[Epoch: 700] [Training Loss: 0.034800] [Accuracy: 96.875000]
[Epoch: 750] [Training Loss: 0.034615] [Accuracy: 97.031250]
[Epoch: 800] [Training Loss