## CNN

In [None]:
import os
import sys
from pathlib import Path

PROJECT_PATH = Path() 
sys.path.append(str(PROJECT_PATH))

import torch
import torchvision
import torch.utils.data as Data
import torch.nn as nn
from matplotlib import pyplot as plt

DATASET_PATH = Path() / "perl5" / "project" / "dataset" / "CASP14_fm"
MODEL_PATH       = PROJECT_PATH / "model"
EMBDEDDINGS_PATH = PROJECT_PATH / "embeddings"

# hyperparameters
LEARNING_RATE = 1e-4
EPOCH_NUM = 20
BATCH_SIZE = 128

In [None]:
# Prepare dataset
from dataset import EmbeddingScoreDataset

train_dataset = EmbeddingScoreDataset(EMBDEDDINGS_PATH, DATASET_PATH, is_train = True)
test_dataset  = EmbeddingScoreDataset(EMBDEDDINGS_PATH, DATASET_PATH, is_train = False)

test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor)[:2000].cuda() / 255.
test_y = test_data.test_labels[:2000].cuda()

train_loader = Data.DataLoader(train_dataset, batch_size = BATCH_SIZE, num_workers = NUM_GPU * 4, pin_memory = True, shuffle = True)
test_loader  = Data.DataLoader(test_dataset, batch_size = 1, shuffle = False)


In [None]:
# model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=0
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(
                in_channels=16,
                out_channels=32,
                kernel_size=5,
                stride=1,
                padding=0
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.fc1  = nn.Linear(32 * 7 * 7, 128) 
        self.fc2  = nn.Linear(128, 32)
        self.fc3  = nn.Linear(32,1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(-1, self.num_flat_features(x)) 
        x = F.relu(self.fc1(x)) 
        x = F.relu(self.fc2(x)) 
        x = self.fc3(x) 
        return x

In [None]:
cnn = CNN()
cnn = cnn.cuda()

NUM_GPU = torch.cuda.device_count()
USE_PARALLEL = NUM_GPU > 1
if USE_PARALLEL :
    cnn = torch.nn.DataParallel(cnn)
    
print(cnn)

In [None]:
optimizer = torch.optim.Adam(cnn.parameters(), lr=LEARNING_RATE)
criterion = nn.MSELoss()

step_list = []
loss_list = []
accuracy_list = []
counter = 0

In [None]:
# train
for epoch in range(EPOCH_NUM):
    for step, (b_x, b_y) in enumerate(train_loader):
        predict_y = cnn(b_x.cuda())
        loss = criterion(predict_y, b_y.cuda())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        counter += 1

        if step % 100 == 0:
            test_output = cnn(test_x)
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
            accuracy = float(torch.sum(pred_y == test_y)) / float(test_y.size(0))
            print('epoch:', epoch, '|step:%4d' % step, '|loss:%6f' % loss.data.cpu(), '|accuracy:%4f' % accuracy)

            step_list.append(counter)
            loss_list.append(loss.data.cpu())
            accuracy_list.append(accuracy)

            plt.cla()
            plt.plot(step_list, loss_list, c='red', label='loss')
            plt.plot(step_list, accuracy_list, c='blue', label='accuracy')
            plt.legend(loc='best')
            plt.pause(0.1)