In [2]:
import torch
# from google.colab import drive
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import torchvision
# drive.mount('/content/gdrive')

In [3]:
DATA_DIR = '~/python/mnist_training/mnist_train.csv'

In [4]:
def get_data():
  data = pd.read_csv(DATA_DIR).to_numpy()
  Y = data[:, 0]
  X = data[:, 1:]
  X = np.reshape(X, (len(X), 28, 28))
  X, Y = shuffle(X, Y)
  return X, Y

In [5]:
def y2indicator(Y):
  N = len(Y)
  K = len(set(Y))
  y_ind = np.zeros((N, K))
  for i in range(N):
    y_ind[i, int(Y[i])] = 1
  return y_ind

In [6]:
def train(model, loss, optimizer, inputs, labels):
  model.train()

  inputs = torch.autograd.Variable(data=inputs, requires_grad=False)
  labels = torch.autograd.Variable(data=labels, requires_grad=False)

  optimizer.zero_grad()

  logits = model.forward(inputs)
  output = loss.forward(logits, labels)

  output.backward()

  optimizer.step()

  return output.item()

In [7]:
def get_cost(model, loss, inputs, labels):
  model.eval()

  inputs = torch.autograd.Variable(data=inputs, requires_grad=False)
  labels = torch.autograd.Variable(data=labels, requires_grad=False)

  logits = model.forward(inputs)
  output = loss.forward(logits, labels)

  return output.item()

In [8]:
def predict(model, inputs):
  inputs = torch.autograd.Variable(inputs, requires_grad=False)
  logits = model.forward(inputs)
  return logits.data.numpy().argmax(axis=1)

In [10]:
validation_split = 0.2

X, Y = get_data()
Xtrain = X[:-int(validation_split*len(X))]
Ytrain = Y[:-int(validation_split*len(X))]
Xtest = X[-int(validation_split*len(X)):]
Ytest = Y[-int(validation_split*len(X)):]

N, D = Xtrain.shape[1], Xtrain.shape[2]
K = len(set(Ytrain) | set(Ytest))

model = torchvision.models.vgg19_bn()
# model.add_module('Dropout1', torch.nn.Dropout(p=0.2))
# model.add_module('Dense1', torch.nn.Linear(D, 500))
# model.add_module('Batchnorm1', torch.nn.BatchNorm1d(500))
# model.add_module('ReLU1', torch.nn.ReLU())
# model.add_module('Dropout2', torch.nn.Dropout(p=0.2))
# model.add_module('Dense2', torch.nn.Linear(500, 300))
# model.add_module('Batchnorm2', torch.nn.BatchNorm1d(300))
# model.add_module('ReLU2', torch.nn.ReLU())
# model.add_module('Dropout3', torch.nn.Dropout(p=0.2))
# model.add_module('Dense3', torch.nn.Linear(300, K))
# model.add_module('Softmax', torch.nn.Softmax(K))

loss = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters())

Xtrain = torch.from_numpy(Xtrain).float()
Ytrain = torch.from_numpy(Ytrain).long()
Xtest = torch.from_numpy(Xtest).float()
Ytest = torch.from_numpy(Ytest).long()
# Ytest = torch.from_numpy(Ytest).float()

epochs = 15
batch_size = 32
num_batches = Xtrain.size()[0] // batch_size

costs = []
accuracies = []
val_costs = []
for i in range(epochs):
  cost = 0.
  for j in range(num_batches):
    Xbatch = Xtrain[j*batch_size:(j+1)*batch_size]
    Ybatch = Ytrain[j*batch_size:(j+1)*batch_size]
    cost += train(model, loss, optimizer, Xbatch, Ybatch)

  val_cost = get_cost(model, loss, Xtest, Ytest)
  val_costs.append(val_cost)
  costs.append(cost)
  Ypred = predict(model, Xtest)
  accuracy = np.mean(Ypred == Ytest)
  accuracies.append(accuracy)
  print(f'Cost: {cost}, accuracy: {accuracy}, validation cost: {val_cost}, i: {i}')
plt.plot(val_cost, label='val_cost')
plt.plot(costs, label='cost')
plt.legend()
# plt.plot(acc)
plt.show()



RuntimeError: Given groups=1, weight of size [64, 3, 3, 3], expected input[1, 48000, 28, 28] to have 3 channels, but got 48000 channels instead

In [None]:
plt.plot(val_costs, label='val_cost')

In [None]:
plt.plot(accuracies)