In [10]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as image


In [11]:
# params

learning_rate = 0.001
training_epochs = 15
batch_size = 100

layers = 2
"""
layer_features = np.ones(layers)
layer_dropout = np.ones(layers)
"""
layer_features = np.array([128, 10])
layer_dropout = np.array([0.25, 0.5]) # dropout = 0 if not used


# mnist data

transform = transforms.ToTensor()
train_mnist = dsets.MNIST(root='./data', train=True, download=True, transform=transform)
test_mnist = dsets.MNIST(root='./data', train=False, download=True, transform=transform)
train_dataloader = DataLoader(train_mnist, batch_size=batch_size, shuffle=True, drop_last=True)
test_dataloader = DataLoader(test_mnist, batch_size=batch_size, shuffle=True, drop_last=True)


# device

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(777)
if device=='cuda':
    torch.cuda.manual_seed_all(777)

In [12]:
class LinearNet(nn.Module):
    def __init__(self, input_size, num_layers, layers_size, output_size):
        super(LinearNet, self).__init__()

        self.linears = nn.ModuleList([nn.Linear(input_size, layers_size[0])])
        print('linears 0 :', self.linears[0])
        self.linears.extend([nn.Linear(layers_size[i-1], layers_size[i]) for i in range(1, num_layers-1)])
        self.linears.append(nn.Linear(layers_size[num_layers-2], output_size))

    def forward(self, x):
        """
        for l in range(layers):

                #self.dropout = nn.Dropout(layer_dropout[l])
                #x = self.dropout(x)
                #if l == 0 :
                    #x = torch.flatten(x, 1)
            self.fc1_ = self.linear_array.linears[0]
            x = self.fc1_(x)
            if l > 0:
                x = (self.linear_array.linears[l])(x)
            if l < layers-1 :
                x = F.relu(x)

        output = F.log_softmax(x, dim=1)


        return output
        """
        return x

In [67]:
# CNN

class CNN(nn.Module) :

    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)

        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

        self.dropout = None
        self.linear_array = LinearNet(input_size=9216, num_layers=layers, layers_size=layer_features, output_size= 10)


    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        #x = torch.flatten(x, 1)


        self.dropout = nn.Dropout(layer_dropout[0])
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        #x = self.fc1(x)
        #x = F.relu(x)
        #x = self.dropout2(x)
        #x = self.fc2(x)


        for l in range(layers):

            #self.dropout = nn.Dropout(layer_dropout[l])
            #x = self.dropout(x)
            #if l == 0 :
                #x = torch.flatten(x, 1)
            self.fc1_ = self.linear_array.linears[0]

            if l == 0:
                x = self.fc1_(x)
            if l > 0:
                x = (self.linear_array.linears[l])(x)
            if l < layers-1 :
                x = F.relu(x)

        output = F.log_softmax(x, dim=1)


        return output

In [68]:
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


linears 0 : Linear(in_features=9216, out_features=128, bias=True)


In [69]:
# training

training_batch = len(train_dataloader)

for epoch in range(training_epochs):
    avg_loss = 0
    step = 0

    for inputs, labels in train_dataloader:

        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        avg_loss += loss/training_batch

    print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_loss))


[Epoch:    1] cost = 0.187231779
[Epoch:    2] cost = 0.0505603552
[Epoch:    3] cost = 0.0337810516
[Epoch:    4] cost = 0.0240517054


KeyboardInterrupt: 

In [16]:
# test

avg_loss_test = 0
test_batch = len(test_dataloader)
total = 0
correct = 0

with torch.no_grad():
    for inputs_test, labels_test in test_dataloader:

        inputs_test = inputs_test.to(device)
        labels_test = labels_test.to(device)
        outputs_test = model(inputs_test)

        loss_test = criterion(outputs_test, labels_test)
        avg_loss_test+= loss_test/test_batch

        _, predicted = torch.max(outputs_test.data, 1)
        for label, prediction in zip(labels_test, predicted):
            if label == prediction:
                correct += 1
            total += 1

accuracy = 0
if total != 0:
    accuracy = correct / total

print('test loss : %f'%avg_loss_test)
print('accuracy : %f'%accuracy)

test loss : 0.053584
accuracy : 0.988300


In [17]:

from sklearn.model_selection import KFold

arr = np.array([1, 2, 3, 4, 5, 6])
kf = KFold(n_splits=3)
for tr, te in kf.split(arr):
    val = tr[2:]
    tr = tr[:2]
    print('train : ', tr, 'test : ', te, 'val :', val)


train :  [2 3] test :  [0 1] val : [4 5]
train :  [0 1] test :  [2 3] val : [4 5]
train :  [0 1] test :  [4 5] val : [2 3]


In [19]:
print(type(train_mnist))

<class 'torchvision.datasets.mnist.MNIST'>


In [50]:
splits = 4

from sklearn.model_selection import KFold

kfold = KFold(n_splits = splits, shuffle = True, random_state= True)

train_index1 = None
val_index1 = None

for tr, val in kfold.split(train_mnist):
    train_index1 = tr
    val_index1 = val
    break


#train_dataloader_1 = DataLoader(train_mnist(train1), batch_size=batch_size, shuffle=True, drop_last=True)
#val_dataloader_1 = DataLoader(train_mnist[val1], batch_size=batch_size, shuffle=True, drop_last=True)

print(train_index1[:100])
train1_mnist = torch.utils.data.Subset(train_mnist, train_index1)
val1_mnist = torch.utils.data.Subset(train_mnist, val_index1)

print(len(train1_mnist))

[  0   1   2   3   4   5   6   7   8   9  11  13  14  15  16  17  18  19
  20  21  22  23  24  26  27  29  32  33  34  35  36  37  38  39  41  42
  43  46  47  48  49  50  51  52  53  55  56  59  60  61  62  64  65  68
  69  70  71  72  73  75  76  77  78  79  80  81  82  83  86  87  90  94
  96  97  98 100 101 102 105 109 110 112 114 117 118 120 122 123 124 126
 127 128 130 131 133 135 136 137 138 139]
45000


In [53]:
train_dataloader_1 = DataLoader(train1_mnist, batch_size=batch_size, shuffle=True, drop_last=True)
val_dataloader_1 = DataLoader(val1_mnist, batch_size=batch_size, shuffle=True, drop_last=True)

In [55]:
training_batch = len(train_dataloader_1)
print(training_batch)
model_tr = CNN()
criterion_tr = nn.CrossEntropyLoss()
optimizer_tr = torch.optim.Adam(model_tr.parameters(), lr=learning_rate)


for epoch in range(training_epochs):
    avg_loss = 0
    step = 0

    for inputs, labels in train_dataloader_1:

        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer_tr.zero_grad()

        outputs = model_tr(inputs)
        loss = criterion_tr(outputs, labels)
        loss.backward()
        optimizer_tr.step()

        avg_loss += loss/training_batch

    print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_loss))

450
linears 0 : Linear(in_features=9216, out_features=128, bias=True)
[Epoch:    1] cost = 0.204892352
[Epoch:    2] cost = 0.0551045835
[Epoch:    3] cost = 0.0344592296
[Epoch:    4] cost = 0.0254920963
[Epoch:    5] cost = 0.0174343381
[Epoch:    6] cost = 0.0143420631
[Epoch:    7] cost = 0.0131712202
[Epoch:    8] cost = 0.0103758061
[Epoch:    9] cost = 0.00980160385
[Epoch:   10] cost = 0.00815232564
[Epoch:   11] cost = 0.00846363977
[Epoch:   12] cost = 0.00520470878
[Epoch:   13] cost = 0.00490274932
[Epoch:   14] cost = 0.00857084431
[Epoch:   15] cost = 0.00497053238


In [56]:
avg_loss_val1 = 0
val_batch = len(val_dataloader_1)
total1 = 0
correct1 = 0

with torch.no_grad():
    for inputs_test, labels_test in val_dataloader_1:

        inputs_test = inputs_test.to(device)
        labels_test = labels_test.to(device)
        outputs_test = model_tr(inputs_test)

        loss_test = criterion_tr(outputs_test, labels_test)
        avg_loss_val1+= loss_test/val_batch

        _, predicted = torch.max(outputs_test.data, 1)
        for label, prediction in zip(labels_test, predicted):
            if label == prediction:
                correct1 += 1
            total1 += 1

accuracy1 = 0
if total1 != 0:
    accuracy1 = correct1 / total1

print('test loss : %f'%avg_loss_val1)
print('accuracy : %f'%accuracy1)

test loss : 0.066538
accuracy : 0.986533


In [58]:
def training(tr_dataloader, cnn_model):

    training_batch = len(tr_dataloader)
    tr_criterion = nn.CrossEntropyLoss()
    tr_optimizer = torch.optim.Adam(cnn_model.parameters(), lr=learning_rate)


    for epoch in range(2):
        avg_loss = 0

        for inputs, labels in tr_dataloader:

            inputs = inputs.to(device)
            labels = labels.to(device)

            tr_optimizer.zero_grad()

            outputs = cnn_model(inputs)
            loss = tr_criterion(outputs, labels)
            loss.backward()
            tr_optimizer.step()

            avg_loss += loss/training_batch

        print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_loss))

model_tr2 = CNN()
training(train_dataloader_1, model_tr2)

linears 0 : Linear(in_features=9216, out_features=128, bias=True)
[Epoch:    1] cost = 0.221597999
[Epoch:    2] cost = 0.0555653647


In [60]:
def testing(te_dataloader, cnn_model):
    te_avg_loss = 0
    te_batch = len(val_dataloader_1)
    te_total = 0
    te_correct = 0

    tr_criterion = nn.CrossEntropyLoss()
    tr_optimizer = torch.optim.Adam(cnn_model.parameters(), lr=learning_rate)


    with torch.no_grad():
        for inputs_te, labels_te in te_dataloader:

            inputs_te = inputs_te.to(device)
            labels_te = labels_te.to(device)
            outputs_te = cnn_model(inputs_te)

            loss_te = tr_criterion(outputs_te, labels_te)
            te_avg_loss+= loss_te/te_batch

            _, predicted = torch.max(outputs_te.data, 1)
            for label, prediction in zip(labels_te, predicted):
                if label == prediction:
                    te_correct += 1
                te_total += 1

    te_accuracy = 0
    if te_total != 0:
        te_accuracy = te_correct / te_total

    print('test loss : %f'%te_avg_loss)
    print('accuracy : %f'%te_accuracy)

testing(val_dataloader_1, model_tr2)

test loss : 0.059631
accuracy : 0.982467
