<a href="https://colab.research.google.com/github/MQSchleich/PytorchTransferLearning/blob/master/ComputerVision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Goal
The aim is to get a general understanding of transferlearning and check the performance on MNIST. 
# Requirements
For training and predictions you should use a GPU. Only the final layer of the model was trained to see, if the performance gains could achieve good results. If you run into memory problems try running the training and test prediciton in different runs. 

# Model 
Import model, set parameters and import required packages

In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import numpy as np
import matplotlib.pyplot as plt
from torch.utils import data
from PIL import Image
plt.ion()   # interactive mode
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")



## model 
model_conv = torchvision.models.resnet18(pretrained=True)
for param in model_conv.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 10)

model_conv = model_conv.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opoosed to before.
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)


# Data Loading and Preprocessing 

In [0]:
with np.load('prediction-challenge-01-data.npz') as fh:
    data_x = fh['data_x']
    data_y = fh['data_y']
    test_x = fh['test_x']
    
transformed_x = []
preprocess_train = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
for i in range(len(data_x)): 
  temp_x = np.squeeze(data_x[i])
  i=Image.fromarray(temp_x).convert("RGB")
  input_tensor = preprocess_train(i)
  input_batch = input_tensor.squeeze(0) 

  transformed_x.append(input_batch)
data_x = transformed_x



## Check the transformation 

In [0]:
display(i)

# PyTorch Pipeline 

In [0]:

tensor_x = torch.stack(data_x)
tensor_y = torch.Tensor(data_y).long()
tensor_y = torch.nn.functional.one_hot(tensor_y, num_classes=10)
image_datasets = data.TensorDataset(tensor_x, tensor_y)
# transform to torch tensor
# create your datset
train_len = int(0.9*(len(data_x)))
val_len = len(data_y)- train_len
train, val = data.random_split(image_datasets, lengths=[train_len, val_len])

data_img = {"train": train , "val":val}
dataloaders = {x: torch.utils.data.DataLoader(data_img[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(data_img[x]) for x in ['train', 'val']}

# Set-Up Image Viewer

In [0]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy()
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))


# Training the Model 
The output of the model can be adjusted, however if using cross-entropy as a loss function, one has to take care about the dimensions in the for-loop for training.

In [0]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    _, lab = torch.max(labels, 1)
                    loss = criterion(outputs, lab)
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == lab)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model, "modelres50.pt")
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [0]:
model_conv = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler, num_epochs=25)

# Evaluation of the Model
## Data Processing 


In [0]:
import numpy as np
import torchvision
import torchvision.transforms as transforms

#load test_set 
with np.load('prediction-challenge-01-data.npz') as fh:
    test_x = fh['test_x']

transformed_x = []
preprocess_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
for i in range(len(test_x)): 
  temp_x = np.squeeze(test_x[i])
  i=Image.fromarray(temp_x).convert("RGB")
  display(i)
  input_tensor = preprocess_test(i)
  input_batch = input_tensor.squeeze(0) 
  transformed_x.append(input_batch)

test_x = transformed_x
display(i)

## Predicitions

In [0]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
predictions = []
model_conv = torch.load("modelres50.pt")
print(torch.cuda.is_available())
model_conv.eval()
for i in range(0, len(test_x), 4):
  temp = test_x[i:i+4]
  temp = torch.stack(temp)
  prediction = (model_conv(temp.cuda()))
  _, preds = torch.max(prediction, 1)
  predictions.append(preds.cpu().numpy())
prediction = np.array(predictions).flatten()
# THAT'S YOUR JOB

# MAKE SURE THAT YOU HAVE THE RIGHT FORMAT
assert prediction.ndim == 1
assert prediction.shape[0] == 2000

# AND SAVE EXACTLY AS SHOWN BELOW
np.save('prediction.npy', prediction)