<a href="https://colab.research.google.com/github/AronnePiperno/IML-Face-Search-Engine-La-Boccia/blob/master/mnist_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
from torchvision import datasets, transforms
import torch.nn.functional as F
import timeit

In [None]:
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(0)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
transform = transforms.Compose([
transforms.CenterCrop(26),
transforms.Resize((28,28)),
transforms.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.05, hue=0.05),
transforms.RandomRotation(10),      
transforms.RandomAffine(5),

# convert the image to a pytorch tensor
transforms.ToTensor(), 

# normalise the images with mean and std of the dataset
transforms.Normalize((0.1307,), (0.3081,)) 
])

In [None]:
train_dataset = datasets.MNIST('./data',train=True,transform=transform,download=True)
train_dataloader = Data.DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 192367930.02it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 42281219.48it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 41683340.24it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 5793956.44it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        # define a conv layer with output channels as 16, kernel size of 3 and stride of 1
        self.conv11 = nn.Conv2d(1, 16, 3, 1) # Input = 1x28x28  Output = 16x26x26
        self.conv12 = nn.Conv2d(1, 16, 5, 1) # Input = 1x28x28  Output = 16x24x24
        self.conv13 = nn.Conv2d(1, 16, 7, 1) # Input = 1x28x28  Output = 16x22x22
        self.conv14 = nn.Conv2d(1, 16, 9, 1) # Input = 1x28x28  Output = 16x20x20

        # define a conv layer with output channels as 32, kernel size of 3 and stride of 1
        self.conv21 = nn.Conv2d(16, 32, 3, 1) # Input = 16x26x26 Output = 32x24x24
        self.conv22 = nn.Conv2d(16, 32, 5, 1) # Input = 16x24x24 Output = 32x20x20
        self.conv23 = nn.Conv2d(16, 32, 7, 1) # Input = 16x22x22 Output = 32x16x16
        self.conv24 = nn.Conv2d(16, 32, 9, 1) # Input = 16x20x20  Output = 32x12x12

        # define a conv layer with output channels as 64, kernel size of 3 and stride of 1
        self.conv31 = nn.Conv2d(32, 64, 3, 1) # Input = 32x24x24 Output = 64x22x22
        self.conv32 = nn.Conv2d(32, 64, 5, 1) # Input = 32x20x20 Output = 64x16x16
        self.conv33 = nn.Conv2d(32, 64, 7, 1) # Input = 32x16x16 Output = 64x10x10
        self.conv34 = nn.Conv2d(32, 64, 9, 1) # Input = 32x12x12 Output = 64x4x4


        # define a max pooling layer with kernel size 2
        self.maxpool = nn.MaxPool2d(2) # Output = 64x11x11
        #self.maxpool1 = nn.MaxPool2d(1)
        # define dropout layer with a probability of 0.25
        self.dropout1 = nn.Dropout(0.25)
        # define dropout layer with a probability of 0.5
        self.dropout2 = nn.Dropout(0.5)

        # define a linear(dense) layer with 128 output features
        self.fc11 = nn.Linear(64*11*11, 256)
        self.fc12 = nn.Linear(64*8*8, 256)      # after maxpooling 2x2
        self.fc13 = nn.Linear(64*5*5, 256)
        self.fc14 = nn.Linear(64*2*2, 256)

        # define a linear(dense) layer with output features corresponding to the number of classes in the dataset
        self.fc21 = nn.Linear(256, 128)
        self.fc22 = nn.Linear(256, 128)
        self.fc23 = nn.Linear(256, 128)
        self.fc24 = nn.Linear(256, 128)

        self.fc33 = nn.Linear(128*4,10)
        #self.fc33 = nn.Linear(64*3,10)


    def forward(self, inp):
        # Use the layers defined above in a sequential way (folow the same as the layer definitions above) and 
        # write the forward pass, after each of conv1, conv2, conv3 and fc1 use a relu activation. 


        x = F.relu(self.conv11(inp))
        x = F.relu(self.conv21(x))
        x = F.relu(self.maxpool(self.conv31(x)))
        #print(x.shape)
        #x = torch.flatten(x, 1)
        x = x.view(-1,64*11*11)
        x = self.dropout1(x)
        x = F.relu(self.fc11(x))
        x = self.dropout2(x)
        x = self.fc21(x)

        y = F.relu(self.conv12(inp))
        y = F.relu(self.conv22(y))
        y = F.relu(self.maxpool(self.conv32(y)))
        #x = torch.flatten(x, 1)
        y = y.view(-1,64*8*8)
        y = self.dropout1(y)
        y = F.relu(self.fc12(y))
        y = self.dropout2(y)
        y = self.fc22(y)

        z = F.relu(self.conv13(inp))
        z = F.relu(self.conv23(z))
        z = F.relu(self.maxpool(self.conv33(z)))
        #x = torch.flatten(x, 1)
        z = z.view(-1,64*5*5)
        z = self.dropout1(z)
        z = F.relu(self.fc13(z))
        z = self.dropout2(z)
        z = self.fc23(z)

        ze = F.relu(self.conv14(inp))
        ze = F.relu(self.conv24(ze))
        ze = F.relu(self.maxpool(self.conv34(ze)))
        #x = torch.flatten(x, 1)
        ze = ze.view(-1,64*2*2)
        ze = self.dropout1(ze)
        ze = F.relu(self.fc14(ze))
        ze = self.dropout2(ze)
        ze = self.fc24(ze)

        out_f = torch.cat((x, y, z, ze), dim=1)
        #out_f1 = torch.cat((out_f, ze), dim=1)
        out = self.fc33(out_f)

        output = F.log_softmax(out, dim=1)
        return output

In [None]:
model = Net().to(device)

In [None]:
losses_1 = []
losses_2 = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
        
  for batch_idx, (data, target) in enumerate(train_loader):
            # send the image, target to the device
    data, target = data.to(device), target.to(device)
            # flush out the gradients stored in optimizer
    optimizer.zero_grad()
            # pass the image to the model and assign the output to variable named output
    output = model(data)
            # calculate the loss (use nll_loss in pytorch)
    loss = F.nll_loss(output, target)
            # do a backward pass
    loss.backward()
            # update the weights
    optimizer.step()
          
    if batch_idx % 100 == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
      epoch, batch_idx * len(data), len(train_loader.dataset),
      100. * batch_idx / len(train_loader), loss.item()))
      losses_1.append(loss.item())
      losses_2.append(100. * batch_idx / len(train_loader))
            
    

In [None]:
model = Net().to(device)
learning_rate = []
def adjust_learning_rate(optimizer, iter, each):
    # sets the learning rate to the initial LR decayed by 0.1 every 'each' iterations
    lr = 0.001 * (0.95 ** (iter // each))
    state_dict = optimizer.state_dict()
    for param_group in state_dict['param_groups']:
        param_group['lr'] = lr
    optimizer.load_state_dict(state_dict)
    print("Learning rate = ",lr)
    return lr


## Define Adam Optimiser with a learning rate of 0.01
optimizer =  torch.optim.Adam(model.parameters(),lr=0.001)

start = timeit.default_timer()
for epoch in range(1,100):
  lr = adjust_learning_rate(optimizer, epoch, 1.616)
  learning_rate.append(lr)
  train(model, device, train_dataloader, optimizer, epoch)
  #test(model, device, test_dataloader)
stop = timeit.default_timer()
print('Total time taken: {} seconds'.format(int(stop - start)))



Learning rate =  0.001
Learning rate =  0.00095
Learning rate =  0.00095
Learning rate =  0.0009025
Learning rate =  0.000857375
Learning rate =  0.000857375
Learning rate =  0.0008145062499999999
Learning rate =  0.0008145062499999999
Learning rate =  0.0007737809374999998
Learning rate =  0.0007350918906249999
Learning rate =  0.0007350918906249999
Learning rate =  0.0006983372960937497
Learning rate =  0.0006634204312890623
Learning rate =  0.0006634204312890623
Learning rate =  0.0006302494097246091
Learning rate =  0.0006302494097246091
Learning rate =  0.0005987369392383787
Learning rate =  0.0005688000922764596
Learning rate =  0.0005688000922764596
Learning rate =  0.0005403600876626366
Learning rate =  0.0005403600876626366
Learning rate =  0.0005133420832795048
Learning rate =  0.00048767497911552955
Learning rate =  0.00048767497911552955
Learning rate =  0.000463291230159753
Learning rate =  0.00044012666865176535
Learning rate =  0.00044012666865176535
Learning rate =  0.0

In [None]:
torch.save(model.state_dict(), "mod.pt")

In [None]:
import os
from torchvision import transforms
import cv2
import PIL

model.eval()

#transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])


results = {}
for file in os.listdir('/content/drive/MyDrive/mnist/images/'):
    img_name = file.split('.')[0]
    #image = cv2.imread(os.path.join('/content/drive/MyDrive/mnist/images/', file))
    #image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = PIL.Image.open(os.path.join('/content/drive/MyDrive/mnist/images/', file))
    image = image.convert('L')
    image = transform(image)
    image = image.to(device)
    output = model(image)
    pred = output.argmax(dim=1, keepdim=True)[0]
    results[img_name] = pred.item()

print(results)

{'8797': 4, '9162': 0, '9134': 0, '8653': 5, '9244': 0, '8752': 0, '9182': 3, '9884': 7, '8778': 8, '9349': 5, '8856': 3, '9792': 4, '9747': 5, '9164': 7, '8530': 8, '9227': 3, '9513': 5, '9778': 6, '9577': 2, '8983': 0, '9053': 6, '9670': 3, '9842': 6, '9591': 1, '9793': 6, '9531': 6, '9111': 2, '8708': 1, '8740': 1, '8565': 7, '9922': 4, '8745': 1, '9913': 2, '8690': 9, '8676': 5, '9478': 5, '9135': 2, '8728': 8, '8807': 9, '9886': 6, '9641': 1, '9959': 8, '9535': 2, '9229': 3, '9374': 7, '8705': 9, '9753': 0, '8687': 6, '9242': 7, '9687': 0, '9930': 0, '9085': 5, '9599': 1, '9304': 2, '9693': 6, '8636': 1, '8881': 0, '9665': 6, '9353': 4, '9133': 5, '9973': 9, '9311': 0, '9875': 8, '8601': 5, '9544': 9, '9579': 3, '9824': 8, '8716': 3, '8809': 1, '9915': 4, '9038': 0, '9493': 5, '9061': 1, '9076': 6, '8641': 7, '9479': 3, '9436': 0, '9145': 2, '9064': 4, '9642': 9, '9848': 1, '9235': 3, '9491': 3, '8541': 7, '8593': 9, '9829': 9, '8753': 3, '8957': 3, '9905': 3, '9239': 0, '9438': 1

In [None]:
query = dict()
query['groupname'] = "La Boccia"

query["images"] = results

print(query)

{'groupname': 'La Boccia', 'images': {'8797': 4, '9162': 0, '9134': 0, '8653': 5, '9244': 0, '8752': 0, '9182': 3, '9884': 7, '8778': 8, '9349': 5, '8856': 3, '9792': 4, '9747': 5, '9164': 7, '8530': 8, '9227': 3, '9513': 5, '9778': 6, '9577': 2, '8983': 0, '9053': 6, '9670': 3, '9842': 6, '9591': 1, '9793': 6, '9531': 6, '9111': 2, '8708': 1, '8740': 1, '8565': 7, '9922': 4, '8745': 1, '9913': 2, '8690': 9, '8676': 5, '9478': 5, '9135': 2, '8728': 8, '8807': 9, '9886': 6, '9641': 1, '9959': 8, '9535': 2, '9229': 3, '9374': 7, '8705': 9, '9753': 0, '8687': 6, '9242': 7, '9687': 0, '9930': 0, '9085': 5, '9599': 1, '9304': 2, '9693': 6, '8636': 1, '8881': 0, '9665': 6, '9353': 4, '9133': 5, '9973': 9, '9311': 0, '9875': 8, '8601': 5, '9544': 9, '9579': 3, '9824': 8, '8716': 3, '8809': 1, '9915': 4, '9038': 0, '9493': 5, '9061': 1, '9076': 6, '8641': 7, '9479': 3, '9436': 0, '9145': 2, '9064': 4, '9642': 9, '9848': 1, '9235': 3, '9491': 3, '8541': 7, '8593': 9, '9829': 9, '8753': 3, '8957

In [None]:
import json
with open("query.json", "w") as write_file:
    json.dump(query, write_file)