In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7fcca00845d0>

In [3]:
test_sentence = """When forty winters shall besiege thy brow,
And dig deep trenches in thy beauty's field,
Thy youth's proud livery so gazed on now,
Will be a totter'd weed of small worth held:
Then being asked, where all thy beauty lies,
Where all the treasure of thy lusty days;
To say, within thine own deep sunken eyes,
Were an all-eating shame, and thriftless praise.
How much more praise deserv'd thy beauty's use,
If thou couldst answer 'This fair child of mine
Shall sum my count, and make my old excuse,'
Proving his beauty by succession thine!
This were to be new made when thou art old,
And see thy blood warm when thou feel'st it cold.""".split()

In [4]:
vocab = set(test_sentence)
word_to_ix = {word: i for i, word in enumerate(vocab)}

In [6]:
embeds = nn.Embedding(len(vocab), 5)  # 2 words in vocab, 5 dimensional embeddings
lookup_tensor = torch.tensor([ word_to_ix[w] for w in word_to_ix], dtype=torch.long)
hello_embed = embeds(lookup_tensor)
print(hello_embed)

tensor([[ 1.8197e-01, -1.2673e+00, -5.9425e-01, -1.4850e-01, -2.5285e-01],
        [-1.3820e+00,  9.3916e-01,  3.5618e-01,  4.9380e-01, -1.0002e+00],
        [-6.8304e-01, -8.3323e-01, -3.4162e-01,  7.1815e-01,  8.0668e-01],
        [-7.7834e-01, -1.2114e-01,  3.7646e-01,  2.2569e+00, -4.6711e-01],
        [ 1.3315e-01, -7.5470e-01, -1.6823e-01,  3.4665e-01, -1.1639e+00],
        [ 4.7478e-01, -5.0918e-01,  1.6730e+00, -4.5682e-01, -8.1052e-02],
        [ 8.7875e-01, -5.9558e-01, -2.3440e-01, -5.4519e-01,  2.7512e-01],
        [-1.1834e+00, -8.2016e-01,  3.0568e-01,  1.3983e-01, -6.3713e-01],
        [ 4.4416e-01,  4.8962e-01,  1.2514e+00, -1.7716e+00, -2.0931e-02],
        [ 5.8456e-01,  6.1586e-01, -4.9985e-01, -8.8783e-01, -7.8470e-01],
        [-5.2750e-01,  2.9954e-01, -3.9653e-01, -5.3095e-01,  9.3562e-01],
        [-8.8727e-01,  7.5477e-01, -1.7974e+00,  9.0870e-01, -3.3137e-01],
        [-3.4890e-01,  7.6789e-01, -3.3286e-01, -9.1728e-01, -3.3329e-01],
        [-5.2900e-01,  1.

In [10]:
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5

# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1, 1, -1), hidden)

# alternatively, we can do the entire sequence all at once.
# the first value returned by LSTM is all of the hidden states throughout
# the sequence. the second is just the most recent hidden state
# (compare the last slice of "out" with "hidden" below, they are the same)
# The reason for this is that:
# "out" will give you access to all hidden states in the sequence
# "hidden" will allow you to continue the sequence and backpropagate,
# by passing it as an argument  to the lstm at a later time
# Add the extra 2nd dimension
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))  # clean out hidden state
out, hidden = lstm(inputs, hidden)
print(out)
print(hidden)

tensor([[[ 0.0637,  0.3064,  0.0738]],

        [[ 0.0136,  0.2445,  0.0861]],

        [[ 0.2833,  0.0139, -0.0708]],

        [[ 0.1037,  0.0319, -0.1022]],

        [[-0.1818, -0.0321, -0.1054]]], grad_fn=<StackBackward>)
(tensor([[[-0.1818, -0.0321, -0.1054]]], grad_fn=<StackBackward>), tensor([[[-0.2193, -0.0922, -0.1856]]], grad_fn=<StackBackward>))


In [27]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)


training_data = [
    # Tags are: DET - determiner; NN - noun; V - verb
    # For example, the word "The" is a determiner
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]
word_to_ix = {}
# For each words-list (sentence) and tags-list in each tuple of training_data
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:  # word has not been assigned an index yet
            word_to_ix[word] = len(word_to_ix)  # Assign each word with a unique index
print(word_to_ix)
tag_to_ix = {"DET": 0, "NN": 1, "V": 2}  # Assign each tag with a unique index

# These will usually be more like 32 or 64 dimensional.
# We will keep them small, so we can see how the weights change as we train.
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}


In [28]:
class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [29]:
training_data[0]

(['The', 'dog', 'ate', 'the', 'apple'], ['DET', 'NN', 'V', 'DET', 'NN'])

In [30]:
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# See what the scores are before training
# Note that element i,j of the output is the score for tag j for word i.
# Here we don't need to train, so the code is wrapped in torch.no_grad()
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    print(tag_scores)

for epoch in range(300):  # again, normally you would NOT do 300 epochs, it is toy data
    for sentence, tags in training_data:
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Get our inputs ready for the network, that is, turn them into
        # Tensors of word indices.
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)

        # Step 3. Run our forward pass.
        tag_scores = model(sentence_in)

        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()

# See what the scores are after training
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)

    # The sentence is "the dog ate the apple".  i,j corresponds to score for tag j
    # for word i. The predicted tag is the maximum scoring tag.
    # Here, we can see the predicted sequence below is 0 1 2 0 1
    # since 0 is index of the maximum value of row 1,
    # 1 is the index of maximum value of row 2, etc.
    # Which is DET NOUN VERB DET NOUN, the correct sequence!
    print(tag_scores)

tensor([[-0.9255, -1.1150, -1.2883],
        [-0.9011, -1.0753, -1.3756],
        [-0.9077, -1.0948, -1.3396],
        [-0.8726, -1.0482, -1.4629],
        [-0.9126, -1.1718, -1.2423]])
tensor([[-0.0518, -3.0430, -5.8769],
        [-3.8572, -0.0406, -3.9820],
        [-5.7582, -2.5251, -0.0869],
        [-0.1053, -2.3963, -4.7273],
        [-3.8658, -0.0464, -3.7141]])


## Convolutional NN on MINST

In [1]:
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import random
import math
from torch.utils.tensorboard import SummaryWriter
from matplotlib import pyplot


from pathlib import Path
import requests
import pickle
import gzip

import torch
import math
import torch.nn.functional as F
from torch import nn
from torch import optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from torch.autograd import Variable
pd.options.display.float_format = "{:,.4f}".format

In [83]:
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()



In [84]:
X_train = X_train.reshape(X_train.shape[0], 784)
X_test = X_test.reshape(X_test.shape[0], 784)
X_train = X_train.astype('float32') 
X_test  = X_test.astype('float32')
X_train /= 255 # Original data is uint8 (0-255). Scale it to range [0,1].
X_test  /= 255
print("Training X matrix shape", X_train.shape)
print("Testing X matrix shape", X_test.shape) 

Training X matrix shape (60000, 784)
Testing X matrix shape (10000, 784)


In [85]:
batch_size = 32

torch_X_train = torch.from_numpy(X_train)
torch_y_train = torch.from_numpy(y_train).type(torch.LongTensor) # data type is long

# create feature and targets tensor for test set.
torch_X_test = torch.from_numpy(X_test)
torch_y_test = torch.from_numpy(y_test).type(torch.LongTensor) # data type is long

# Pytorch train and test sets
# train = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
# test = torch.utils.data.TensorDataset(torch_X_test,torch_y_test)

# data loader
# train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = True)
# test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle = True)

In [78]:
torch_X_train = torch_X_train.view(-1, 1,28,28).float()
torch_X_test = torch_X_test.view(-1,1,28,28).float()
print(torch_X_train.shape)
print(torch_X_test.shape)

torch.Size([60000, 1, 28, 28])
torch.Size([10000, 1, 28, 28])


In [85]:
28*28

784

In [86]:
class Net2nn(nn.Module):
    def __init__(self):
        super(Net2nn, self).__init__()
        self.fc1 = nn.Linear(784, 200)
        self.fc2 = nn.Linear(200, 200)
        self.fc3 = nn.Linear(200, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [4]:
class CNN(nn.Module):   
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
        self.conv3 = nn.Conv2d(32,64, kernel_size=5)
        self.fc1 = nn.Linear(3*3*64, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        #x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(F.max_pool2d(self.conv3(x),2))
        x = F.dropout(x, p=0.5, training=self.training)
        x = x.view(-1,3*3*64 )
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [27]:
class CNN(nn.Module): 
    def __init__(self):
        super(CNN, self).__init__()
        
        # Convolution 1
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=0)
        self.relu1 = nn.ReLU()
        
        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
     
        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=0)
        self.relu2 = nn.ReLU()
        
        # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        # Fully connected 1
        self.fc1 = nn.Linear(32 * 5 * 5, 10) 
    
    def forward(self, x):
        # Set 1
        out = self.cnn1(x)
        out = self.relu1(out)
        out = self.maxpool1(out)
        
        # Set 2
        out = self.cnn2(out)
        out = self.relu2(out)
        out = self.maxpool2(out)
        
        #Flatten
        out = out.view(out.size(0), -1)

        #Dense
        out = self.fc1(out)
        
        return out


In [55]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0.0
    correct = 0

    for idx, (data, target) in enumerate(train_loader):
        output = model(data)
        loss = criterion(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        prediction = output.argmax(dim=1, keepdim=True)
        correct += prediction.eq(target.view_as(prediction)).sum().item()  
        
    return train_loss / len(train_loader), correct/len(train_loader.dataset)

In [56]:
def validation(model, test_loader, criterion):
    model.eval()
    test_loss = 0.0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            
            test_loss += criterion(output, Variable(target)).item()
            prediction = output.argmax(dim=1, keepdim=True)
            correct += prediction.eq(target.view_as(prediction)).sum().item()

    test_loss /= len(test_loader)
    correct /= len(test_loader.dataset)

    return (test_loss, correct)

In [137]:
centralized_model = Net2nn()
centralized_optimizer = torch.optim.SGD(centralized_model.parameters(), lr=0.01, momentum=0.9)
centralized_criterion = nn.CrossEntropyLoss()

In [41]:
centralized_model = CNN()
centralized_optimizer = torch.optim.SGD(centralized_model.parameters(), lr=0.01, momentum=0.9)
centralized_criterion = nn.CrossEntropyLoss()

In [93]:
torch_X_train.shape,torch_X_test.shape,torch_y_train.shape,torch_y_test.shape

(torch.Size([60000, 784]),
 torch.Size([10000, 784]),
 torch.Size([60000]),
 torch.Size([10000]))

In [95]:
torch_X_train.type(),torch_X_test.type(),torch_y_train.type(),torch_y_test.type()

('torch.FloatTensor',
 'torch.FloatTensor',
 'torch.LongTensor',
 'torch.LongTensor')

In [43]:
torch_X_train = torch_X_train.reshape(60000,784)*255
torch_X_test = torch_X_test.reshape(10000,784)*255

In [135]:
train_ds = TensorDataset(torch_X_train, torch_y_train)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

# valid_ds = TensorDataset(x_valid, y_valid)
# valid_dl = DataLoader(valid_ds, batch_size=batch_size)

test_ds = TensorDataset(torch_X_test, torch_y_test)
test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=True)

In [80]:
torch_X_train.type()

'torch.FloatTensor'

In [97]:
numEpoch=5

In [138]:
print("------ Centralized Model ------")
for epoch in range(numEpoch):
    central_train_loss, central_train_accuracy = train(centralized_model, train_dl, centralized_criterion, centralized_optimizer)
    central_test_loss, central_test_accuracy = validation(centralized_model, test_dl, centralized_criterion)
    
    print("epoch: {:3.0f}".format(epoch+1) + " | train accuracy: {:7.4f}".format(central_train_accuracy) + " | test accuracy: {:7.4f}".format(central_test_accuracy))

print("------ Training finished ------")

------ Centralized Model ------
epoch:   1 | train accuracy:  0.8911 | test accuracy:  0.9539
epoch:   2 | train accuracy:  0.9621 | test accuracy:  0.9669
epoch:   3 | train accuracy:  0.9747 | test accuracy:  0.9719
epoch:   4 | train accuracy:  0.9807 | test accuracy:  0.9776
epoch:   5 | train accuracy:  0.9844 | test accuracy:  0.9746
------ Training finished ------


In [109]:
torch_X_train_cnn = torch_X_train.reshape(-1, 1,28,28)
torch_X_test_cnn = torch_X_test.reshape(-1, 1,28,28)

In [110]:
torch_X_train_cnn.shape,torch_X_test_cnn.shape,torch_y_train.shape,torch_y_test.shape

(torch.Size([60000, 1, 28, 28]),
 torch.Size([10000, 1, 28, 28]),
 torch.Size([60000]),
 torch.Size([10000]))

In [111]:
torch_X_train_cnn.type(),torch_X_test_cnn.type(),torch_y_train.type(),torch_y_test.type()

('torch.FloatTensor',
 'torch.FloatTensor',
 'torch.LongTensor',
 'torch.LongTensor')

In [115]:
train_ds = TensorDataset(torch_X_train_cnn, torch_y_train)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=False)

# valid_ds = TensorDataset(x_valid, y_valid)
# valid_dl = DataLoader(valid_ds, batch_size=batch_size)

test_ds = TensorDataset(torch_X_test_cnn, torch_y_test)
test_dl = DataLoader(test_ds, batch_size=batch_size*2, shuffle=False)

In [116]:
centralized_model = CNN()
centralized_optimizer = torch.optim.SGD(centralized_model.parameters(), lr=0.01, momentum=0.9)
centralized_criterion = nn.CrossEntropyLoss()

In [117]:
print("------ Centralized Model ------")
for epoch in range(numEpoch):
    central_train_loss, central_train_accuracy = train(centralized_model, train_dl, centralized_criterion, centralized_optimizer)
    central_test_loss, central_test_accuracy = validation(centralized_model, test_dl, centralized_criterion)
    
    print("epoch: {:3.0f}".format(epoch+1) + " | train accuracy: {:7.4f}".format(central_train_accuracy) + " | test accuracy: {:7.4f}".format(central_test_accuracy))

print("------ Training finished ------")

------ Centralized Model ------
epoch:   1 | train accuracy:  0.9406 | test accuracy:  0.9700
epoch:   2 | train accuracy:  0.9788 | test accuracy:  0.9779
epoch:   3 | train accuracy:  0.9843 | test accuracy:  0.9832
epoch:   4 | train accuracy:  0.9871 | test accuracy:  0.9857
epoch:   5 | train accuracy:  0.9889 | test accuracy:  0.9864
------ Training finished ------


In [None]:
centralized_model.cnn1.weight.data

In [127]:
a.

<generator object Module.parameters at 0x7f21f13e2580>

In [134]:
centralized_model.cnn1.weight.data
centralized_model.cnn2.weight.data
centralized_model.fc1.weight.data

tensor([[-1.8593e-02, -6.3795e-02, -7.5633e-02,  ..., -1.7269e-02,
         -1.8003e-02,  9.0840e-03],
        [ 2.8413e-02,  1.5507e-01,  8.2361e-02,  ..., -8.8406e-03,
          3.2208e-02,  3.3335e-02],
        [-4.6236e-02,  3.3244e-02,  8.5386e-02,  ...,  2.8067e-02,
         -1.8551e-02, -3.5313e-02],
        ...,
        [-1.6281e-04, -1.0020e-01, -8.1097e-02,  ..., -2.3337e-02,
          3.1707e-02, -2.4676e-02],
        [-5.8024e-03, -1.8188e-01, -8.8518e-02,  ...,  7.3785e-03,
         -7.3470e-03, -1.1191e-02],
        [-7.6109e-02, -1.3122e-01, -1.4460e-01,  ..., -2.2987e-02,
         -2.4031e-02,  1.7256e-02]])