In [1]:
import torch
import numpy as np

In [2]:
from torchvision import datasets
import torchvision.transforms as transforms

# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 20

# convert data to torch.FloatTensor
transform = transforms.ToTensor()

# choose the training and test datasets
train_data = datasets.MNIST(root='data', train=True,
                                   download=False, transform=transform)
test_data = datasets.MNIST(root='data', train=False,
                                  download=False, transform=transform)

# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
    num_workers=num_workers)

In [3]:
import torch.nn as nn
import torch.nn.functional as F

## Define the NN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.layers = nn.Sequential(
            nn.Linear(28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        # flatten image input
        x = x.view(-1, 28 * 28)
        # add hidden layer, with relu activation function
        output = F.softmax(self.layers(x))  
        return output

# initialize the NN
model = Net()
print(model)
        

Net(
  (layers): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
## Specify loss and optimization functions

# specify loss function
criterion = nn.CrossEntropyLoss()
model.load_state_dict(torch.load('mnist.pth'))
# specify optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [7]:
# number of epochs to train the model
n_epochs = 30  # suggest training between 20-50 epochs

model.train() # prep model for training

for epoch in range(n_epochs):
    # monitor training loss
    train_loss = 0.0
    
    ###################
    # train the model #
    ###################
    for data, target in train_loader:
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update running training loss
        train_loss += loss.item()*data.size(0)
        
    # print training statistics 
    # calculate average loss over an epoch
    train_loss = train_loss/len(train_loader.dataset)
    torch.save(model.state_dict(), 'mnist.pth')
    print('Epoch: {} \tTraining Loss: {:.6f}'.format(
        epoch+1, 
        train_loss
        ))



Epoch: 1 	Training Loss: 2.278233
Epoch: 2 	Training Loss: 1.978062
Epoch: 3 	Training Loss: 1.695597
Epoch: 4 	Training Loss: 1.649341
Epoch: 5 	Training Loss: 1.635431
Epoch: 6 	Training Loss: 1.628009
Epoch: 7 	Training Loss: 1.623096
Epoch: 8 	Training Loss: 1.619445
Epoch: 9 	Training Loss: 1.616527
Epoch: 10 	Training Loss: 1.614068
Epoch: 11 	Training Loss: 1.611922
Epoch: 12 	Training Loss: 1.610011
Epoch: 13 	Training Loss: 1.608284
Epoch: 14 	Training Loss: 1.606704
Epoch: 15 	Training Loss: 1.605242
Epoch: 16 	Training Loss: 1.603865
Epoch: 17 	Training Loss: 1.602557
Epoch: 18 	Training Loss: 1.601310
Epoch: 19 	Training Loss: 1.600122
Epoch: 20 	Training Loss: 1.598989
Epoch: 21 	Training Loss: 1.597905
Epoch: 22 	Training Loss: 1.596857
Epoch: 23 	Training Loss: 1.595837
Epoch: 24 	Training Loss: 1.594825
Epoch: 25 	Training Loss: 1.587593
Epoch: 26 	Training Loss: 1.545370
Epoch: 27 	Training Loss: 1.535258
Epoch: 28 	Training Loss: 1.530003
Epoch: 29 	Training Loss: 1.5

In [6]:
test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
model.eval() # prep model for *evaluation*

for data, target in test_loader:
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(data)
    # calculate the loss
    loss = criterion(output, target)
    # update test loss 
    test_loss += loss.item()*data.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)
    # compare predictions to true label
    correct = np.squeeze(pred.eq(target.data.view_as(pred)))
    # calculate test accuracy for each object class
    for i in range(batch_size):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1

# calculate and print avg test loss
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(10):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            str(i), 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))



Test Loss: 1.525036

Test Accuracy of     0: 98% (967/980)
Test Accuracy of     1: 97% (1111/1135)
Test Accuracy of     2: 93% (968/1032)
Test Accuracy of     3: 93% (940/1010)
Test Accuracy of     4: 94% (930/982)
Test Accuracy of     5: 87% (780/892)
Test Accuracy of     6: 96% (920/958)
Test Accuracy of     7: 95% (977/1028)
Test Accuracy of     8: 90% (881/974)
Test Accuracy of     9: 92% (936/1009)

Test Accuracy (Overall): 94% (9410/10000)


In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn import metrics
import matplotlib.pyplot as plt
base_model_state_dict = torch.load("mnist.pth")
print(base_model_state_dict)


OrderedDict([('layers.0.weight', tensor([[-0.0257,  0.0153, -0.0149,  ...,  0.0121, -0.0327, -0.0113],
        [ 0.0036,  0.0118,  0.0057,  ..., -0.0330,  0.0328, -0.0177],
        [-0.0334,  0.0222,  0.0084,  ...,  0.0267,  0.0290,  0.0150],
        ...,
        [-0.0264,  0.0132, -0.0152,  ...,  0.0126,  0.0142,  0.0229],
        [ 0.0287,  0.0194,  0.0126,  ...,  0.0167,  0.0206,  0.0134],
        [ 0.0302, -0.0192, -0.0185,  ...,  0.0163,  0.0287, -0.0252]])), ('layers.0.bias', tensor([-1.3767e-02, -1.7466e-02, -9.4647e-03,  5.9409e-03,  1.6467e-02,
         3.9640e-02, -3.3521e-02, -1.7707e-02,  3.8793e-02,  3.3052e-02,
         4.6234e-02,  4.9658e-02,  1.1988e-02, -6.2132e-03,  3.6456e-02,
         3.1284e-02,  2.3535e-02,  5.0652e-03,  3.1584e-03, -1.0833e-02,
        -1.4996e-02, -2.8061e-02, -5.4911e-02,  5.6590e-02,  7.8679e-04,
        -5.3288e-03,  5.4412e-03, -2.9851e-02,  1.1851e-02, -1.4605e-02,
         5.0437e-02,  1.0080e-02,  6.1243e-02, -1.3783e-02,  3.2884e-02,
  

In [18]:
print(base_model_state_dict['layers.0.weight'].cpu().numpy().shape)
print(base_model_state_dict['layers.0.bias'].cpu().numpy().reshape(-1,1).shape)
a = np.hstack((base_model_state_dict['layers.0.weight'].cpu().numpy(),base_model_state_dict['layers.0.bias'].cpu().numpy().reshape(-1,1)))
b = np.hstack((base_model_state_dict['layers.2.weight'].cpu().numpy(),base_model_state_dict['layers.2.bias'].cpu().numpy().reshape(-1,1)))
c = np.hstack((base_model_state_dict['layers.4.weight'].cpu().numpy(),base_model_state_dict['layers.4.bias'].cpu().numpy().reshape(-1,1)))
print(a.shape,b.shape,c.shape)
a = np.vstack((a.reshape(-1,1),b.reshape(-1,1)))
a = np.vstack((a.reshape(-1,1),c.reshape(-1,1)))


a = a.reshape(-1,1)
print(a.shape)

(512, 784)
(512, 1)
(512, 785) (512, 513) (10, 513)
(669706, 1)


In [None]:
k_means = KMeans(n_clusters=256)

k_means.fit(a)
y_predict = k_means.predict(a)

plt.scatter(a[:,0],a[:,0],c=y_predict)
print(k_means.predict((a[:30,:])))
print(k_means.cluster_centers_)
np.save('256.npy',k_means.cluster_centers_)
print(k_means.inertia_)

[238  32 218 130  48  49  64 191 197 243  11 139 142 238 138  45 231 130
 163  11 175 139  77 194 249  96  89 123  77  32]
[[ 3.54634784e-02]
 [-1.67179406e-02]
 [ 3.45787103e-03]
 [-3.23950499e-02]
 [ 1.76184736e-02]
 [ 6.63152635e-02]
 [-1.36610374e-01]
 [-4.40531038e-02]
 [ 1.98211119e-01]
 [-1.50254369e-03]
 [ 5.43002449e-02]
 [ 3.05306874e-02]
 [-1.05894953e-02]
 [-2.38550100e-02]
 [ 1.11204451e-02]
 [-7.87975416e-02]
 [ 1.10937171e-01]
 [ 2.56222282e-02]
 [-2.17148051e-01]
 [-5.52386381e-02]
 [ 4.81988601e-02]
 [ 3.04292113e-01]
 [ 4.26697358e-02]
 [-6.71587978e-03]
 [ 1.52097419e-01]
 [ 7.75616895e-03]
 [-2.08740532e-02]
 [-3.92614529e-02]
 [-2.64439508e-02]
 [-1.07876435e-01]
 [ 2.21790802e-02]
 [ 7.39600211e-02]
 [ 1.53630590e-02]
 [-6.47041351e-02]
 [-1.30365174e-02]
 [-3.09851598e-02]
 [-2.51414955e-01]
 [ 3.88773009e-02]
 [ 8.63359943e-02]
 [-3.61812413e-02]
 [-1.73028722e-01]
 [-3.84042901e-03]
 [ 3.29592489e-02]
 [ 2.44655356e-01]
 [-4.75251153e-02]
 [ 3.07169510e-04]
 [ 