In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

from sklearn.model_selection import KFold

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as image


# parameters

learning_rate = 0.001
training_epochs = 15
batch_size = 100

layers = 2
"""
layer_features = np.ones(layers)
layer_dropout = np.zeros(layers)
"""
layer_features = np.array([128, 10])
layer_dropout = np.array([0.25, 0.5]) # dropout = 0 if not used

splits = 4

# mnist data

transform = transforms.ToTensor()
train_mnist = dsets.MNIST(root='./data', train=True, download=True, transform=transform)
test_mnist = dsets.MNIST(root='./data', train=False, download=True, transform=transform)
train_dataloader = DataLoader(train_mnist, batch_size=batch_size, shuffle=True, drop_last=True)
test_dataloader = DataLoader(test_mnist, batch_size=batch_size, shuffle=True, drop_last=True)


# k-fold

kfold = KFold(n_splits = splits, shuffle = True, random_state= True)

train_index1 = None
val_index1 = None

for tr, val in kfold.split(train_mnist):
    train_index1 = tr
    val_index1 = val
    break

train1_mnist = torch.utils.data.Subset(train_mnist, train_index1)
val1_mnist = torch.utils.data.Subset(train_mnist, val_index1)
train_dataloader_1 = DataLoader(train1_mnist, batch_size=batch_size, shuffle=True, drop_last=True)
val_dataloader_1 = DataLoader(val1_mnist, batch_size=batch_size, shuffle=True, drop_last=True)


# device

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(777)
if device=='cuda':
    torch.cuda.manual_seed_all(777)

# LinearNet

class LinearNet(nn.Module):
    def __init__(self, input_size, num_layers, layers_size, output_size):
        super(LinearNet, self).__init__()

        self.linears = nn.ModuleList([nn.Linear(input_size, layers_size[0])])
        print('linears 0 :', self.linears[0])
        self.linears.extend([nn.Linear(layers_size[i-1], layers_size[i]) for i in range(1, num_layers-1)])
        self.linears.append(nn.Linear(layers_size[num_layers-2], output_size))

    def forward(self, x):
        return x

# CNN

class CNN(nn.Module) :

    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)

        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

        self.dropout = None
        self.linear_array = LinearNet(input_size=9216, num_layers=layers, layers_size=layer_features, output_size= 10)


    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)


        self.dropout = nn.Dropout(layer_dropout[0])
        x = self.dropout(x)
        x = torch.flatten(x, 1)


        for l in range(layers):

            x = (self.linear_array.linears[l])(x)
            if l < layers-1 :
                x = F.relu(x)

        output = F.log_softmax(x, dim=1)


        return output

In [4]:
model = CNN()

# training

def training(tr_dataloader, cnn_model):

    training_batch = len(tr_dataloader)
    tr_criterion = nn.CrossEntropyLoss()
    tr_optimizer = torch.optim.Adam(cnn_model.parameters(), lr=learning_rate)


    for epoch in range(training_epochs):
        avg_loss = 0

        for inputs, labels in tr_dataloader:

            inputs = inputs.to(device)
            labels = labels.to(device)

            tr_optimizer.zero_grad()

            outputs = cnn_model(inputs)
            loss = tr_criterion(outputs, labels)
            loss.backward()
            tr_optimizer.step()

            avg_loss += loss/training_batch

        print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_loss))


# test

def testing(te_dataloader, cnn_model):
    te_avg_loss = 0
    te_batch = len(te_dataloader)
    te_total = 0
    te_correct = 0

    tr_criterion = nn.CrossEntropyLoss()
    tr_optimizer = torch.optim.Adam(cnn_model.parameters(), lr=learning_rate)


    with torch.no_grad():
        for inputs_te, labels_te in te_dataloader:

            inputs_te = inputs_te.to(device)
            labels_te = labels_te.to(device)
            outputs_te = cnn_model(inputs_te)

            loss_te = tr_criterion(outputs_te, labels_te)
            te_avg_loss+= loss_te/te_batch

            _, predicted = torch.max(outputs_te.data, 1)
            for label, prediction in zip(labels_te, predicted):
                if label == prediction:
                    te_correct += 1
                te_total += 1

    te_accuracy = 0
    if te_total != 0:
        te_accuracy = te_correct / te_total

    print('test loss : %f'%te_avg_loss)
    print('accuracy : %f'%te_accuracy)


linears 0 : Linear(in_features=9216, out_features=128, bias=True)


In [5]:
training(train_dataloader_1, model)
testing(val_dataloader_1, model)
testing(test_dataloader, model)

[Epoch:    1] cost = 0.203248724
[Epoch:    2] cost = 0.0549868457


KeyboardInterrupt: 