# 102Flowers Image Classifier

This is the main notebook for the project. See the associated report (WIP) for more information.

### Imports

In [None]:
import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torchvision import datasets, transforms, models

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.io

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

### Hyperparameters

In [None]:
# TODO: Set hyperparameters.
training_batch_size = 64
validation_batch_size = 64
test_batch_size = 64
epochs = 5
learning_rate = 0

### Device

In [None]:
# Default to CPU
device = torch.device("cpu")

# Switch to GPU if available
if torch.cuda.is_available():
	print(f"Found {torch.cuda.device_count()} GPUs. Using cuda:0.")
	device = torch.device("cuda:0")
else:
	print("No GPUs found, using CPU.")

### Load Dataset

The size of the images are too large. CNN can accept images of any size but the standard is for images to be resized to 224x224. This is not essential however it is common for CNN to be trained using 224x224 images. This is used due to speed. Higher image size means that classification is more accurate however will decrease the speed at which the model trains. We should experiment with model sizes

In [None]:
training_data = datasets.Flowers102(
    root = "data",
    split = "train",
    transform=transforms.Compose([transforms.Resize(256), transforms.ToTensor()]),
    download=True,
    # This will transform the labels into one hot encoding
    # This may be extra work though as they are numbers 1-102
    # target_transform=transforms.Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value = 1))
)

validation_data = datasets.Flowers102(
    root = "data",
    split = "val",
    transform=transforms.Compose([transforms.Resize(256), transforms.ToTensor()]),
    download=True
)

testing_data = datasets.Flowers102(
    root = "data",
    split = "test",
    transform=transforms.Compose([transforms.Resize(256), transforms.ToTensor()]),
    download=True
)

### DataLoaders

In [None]:
train_dataloader = DataLoader(training_data, batch_size=training_batch_size, shuffle=True)
validation_dataloader = DataLoader(validation_data, batch_size=validation_batch_size, shuffle=True)
test_dataloader = DataLoader(testing_data, batch_size=test_batch_size, shuffle=True)   

### Model

In [None]:
onehot = F.one_hot(torch.tensor([e for e in range(0,102)]), num_classes=102)
onehot

In [None]:
# Each image is 591x500 pixels

class NeuralNetwork(nn.Module):
	def __init__(self):
		super(NeuralNetwork, self).__init__()
		self.layers = nn.Sequential(
			#nn.ReLU(),
   			#nn.Conv2d(1, 6, (5,5), (1,1), (0,0)),
			#nn.Linear(None, 102)
   		)

	def forward(self, x):
		x = self.layers(x)
		return x
  
model = NeuralNetwork().to(device)

### Labels

In [None]:
mat = scipy.io.loadmat('data/flowers-102/imagelabels.mat')
print(set(mat['labels'][0]))

### Loss Function

There are many different loss functions that we can use:
- Mean Absolute Error: torch.nn.L1Loss
    - This should be used for regression, target contains outliers (robust at handling noise)
- Smooth L1 Loss: torch.nn.SmoothL1Loss
    - This should be used for regression, when features have large values (well suited to most probelms)
- Mean Squared Error Loss (MSE): torch.nn.MSELoss
    - This should be used for regression, numerical values are not large, the problem is not very high dimensional
- Cross Entropy Loss : torch.nn.CrossEntropyLoss
    - This should be used for classification tasks where we are making a confident model and we require higher precision and recall values.
- Negative Log-Likelihood Loss : torch.nn.NLLLoss
    - This is useful for classification, smaller quicker training and for simple tasks
- There are various other types of loss functions available but these are inappropriate for our project.

In [None]:
# According to the above research the following the most appropriate loss function

loss_fn = nn.CrossEntropyLoss()

### Optimiser

In [None]:
optimiser = None

### Training

In [None]:
def train(dataloader, model, loss_fn, optimiser) :
    #Train the model
         
    #Compute the error of the prediction
        
    #Backpropagation
    
    pass

### Conduct Training Process

In [None]:
for t in range(epochs):
    train(train_dataloader, model, loss_fn, optimiser)