In [None]:
"""
Your ultimate task for this week is to build your first neural network [almost] from scratch and pure PyTorch.

This time you will solve the same digit recognition problem, but at a larger scale

* 10 different letters
* 20k samples

We want you to build a network that reaches at least 80% accuracy and has at least 2 linear layers in it. Naturally, it should be nonlinear to beat logistic regression.

With 10 classes you will need to use __Softmax__ at the top instead of sigmoid and train using __categorical crossentropy__  (see [here](http://wiki.fast.ai/index.php/Log_Loss)).  Write your own loss or use `torch.nn.functional.nll_loss`. Just make sure you understand what it accepts as input.
"""

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# from notmnist import load_notmnist
# X_train, y_train, X_test, y_test = load_notmnist(letters='ABCDEFGHIJ')
# X_train, X_test = X_train.reshape([-1, 784]), X_test.reshape([-1, 784])

# convert to torch tensors
X_train = torch.from_numpy(X_train).type(torch.FloatTensor)
y_train = torch.from_numpy(y_train).type(torch.LongTensor)
X_test = torch.from_numpy(X_test).type(torch.FloatTensor)
y_test = torch.from_numpy(y_test).type(torch.LongTensor)

# create a dataset
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
test_dataset = torch.utils.data.TensorDataset(X_test, y_test)

# create a data loader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False)

# create a model
model = nn.Sequential(
    nn.Linear(784, 100), # 28*28 pixel iamges = 784 pixels per image in, 100 features out
    nn.ReLU(),
    nn.Linear(100, 10), # 100 features in, 10 classes out
    nn.LogSoftmax(dim=1) # log probabilities for each of the 10 classes
)

# create an optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001) 

# create a loss function
loss_fn = nn.NLLLoss() # negative log likelihood loss

# train the model
for epoch in range(10):
    for X_batch, y_batch in train_loader:
        # forward pass
        y_pred = model(X_batch)
        # compute loss
        loss = loss_fn(y_pred, y_batch)
        # zero gradients
        optimizer.zero_grad()
        # backward pass
        loss.backward()
        # update weights
        optimizer.step()

# evaluate the model
correct = 0
total = 0
for X_batch, y_batch in test_loader:
    y_pred = model(X_batch)
    _, predicted = torch.max(y_pred.data, 1)
    total += y_batch.size(0)
    correct += (predicted == y_batch).sum().item()

print('Accuracy: %d %%' % (100 * correct / total))

