# Public Image Dataset

In [None]:
import torch
import torch.nn as nn
from torchvision import transforms, datasets
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from torch.autograd import Variable
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, KFold
from torch.utils.data import random_split, DataLoader, SubsetRandomSampler, ConcatDataset
from sklearn.metrics import *
import pandas as pd
import seaborn as sb

### Variables

In [None]:
LR = 0.0001 # learning rate
TRANS = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor()])
use_gpu = torch.cuda.is_available()

### Data input & preprocessing

In [None]:
data = datasets.ImageFolder('drive/MyDrive/weather',transform=TRANS)
print(data.class_to_idx)

### Model Construct

In [None]:
class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    self.conv1 = nn.Sequential(
        nn.Conv2d(3,16,5,1,2),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.conv2 = nn.Sequential(
        nn.Conv2d(16,32,5,1,2),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.out = nn.Linear(32*56*56,4)
  def forward(self, x):
    x = self.conv1(x)
    x = self.conv2(x)
    x = x.view(x.size(0),-1)
    output = self.out(x)
    return output

def reset_weights(m):
  if hasattr(m,'reset_parameters'):
    m.reset_parameters()

### Function Define

In [None]:
def ML(cnn,data,EPOCH,BATCH,log):
  kfold = KFold(n_splits=3,shuffle=True)
  optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)
  loss_func = nn.CrossEntropyLoss()

  acc = 0
  rec = [0,0,0,0]
  prec = [0,0,0,0]
  f1 = [0,0,0,0]
  matrix = np.zeros((4,4)).tolist()

  for fold, (train_idx, test_idx) in enumerate(kfold.split(data)):
    cnn.apply(reset_weights)
    if log == 1:
      print('-------Fold :',fold,'start--------')
    train_subset = SubsetRandomSampler(train_idx)
    test_subset = SubsetRandomSampler(test_idx)

    train_data = DataLoader(data,batch_size=BATCH,sampler=train_subset)
    test_data = DataLoader(data,batch_size=1,sampler=test_subset)

    for epoch in range(EPOCH):
      if log == 1:
        print('-- Epoch :',epoch,'--')
      for i, (x,y) in enumerate(train_data):
        output = cnn(x)
        loss = loss_func(output, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    pred_y = []
    real_y = []
    for x,y in test_data:
      valid_output = cnn(x)
      pred_y.append(torch.max(valid_output,1)[1].data.numpy().squeeze().tolist())
      real_y.append(y[0].tolist())
    acc += accuracy_score(real_y,pred_y)
    rec += recall_score(real_y,pred_y,average=None)
    prec += precision_score(real_y,pred_y,average=None)
    f1 += f1_score(real_y,pred_y,average=None)
    matrix += confusion_matrix(real_y,pred_y)
  acc /= 3
  rec /= 3
  prec /= 3
  f1 /= 3
  matrix /= 3
  matrix = matrix.tolist()
  scores = [acc,rec,prec,f1]

  for i in matrix:
    for j in range(len(i)):
      i[j] = round(i[j])
  if log == 1:
    print('------------ end -------------')
  return scores, matrix

def testing(test_data,cnn):
  test_data = DataLoader(test_data,batch_size=1)
  pred_y = []
  real_y = []
  # print(len(test_data))
  for x,y in test_data:
    # print(x,y)
    valid_output = cnn(x)
    pred_y.append(torch.max(valid_output,1)[1].data.numpy().squeeze().tolist())
    real_y.append(y[0].tolist())
  # print(pred_y)
  # print(real_y)
  acc = accuracy_score(real_y,pred_y)
  rec = recall_score(real_y,pred_y,average=None)
  prec = precision_score(real_y,pred_y,average=None)
  f1 = f1_score(real_y,pred_y,average=None)
  matrix = confusion_matrix(real_y,pred_y)
  matrix = matrix.tolist()
  scores = [acc,rec,prec,f1]

  # for i in matrix:
  #   for j in range(len(i)):
  #     i[j] = round(i[j])
  return scores, matrix

def result(data,scores,matrix):
  print('accrucy score :',scores[0])

  form = pd.DataFrame(scores[1:],columns=[i for i in data.class_to_idx],index=['recall score','precision score','f1 score'])
  display(form)

  form = pd.DataFrame(matrix,data.class_to_idx,data.class_to_idx)
  sb.heatmap(form,annot=True,fmt='d',linewidth=5,cmap='YlGnBu')
  plt.xlabel('predicted')
  plt.ylabel('real')
  plt.title('confusion matrix')
  plt.show()

### Training with different hyper-parameter

In [None]:
cnn = CNN()

scores, matrix = ML(cnn,data,1,64,0)
result(data,scores,matrix)

In [None]:
cnn = CNN()

scores, matrix = ML(cnn,data,5,64,0)
result(data,scores,matrix)

In [None]:
cnn = CNN()

scores, matrix = ML(cnn,data,20,64,0)
result(data,scores,matrix)

In [None]:
cnn = CNN()

scores, matrix = ML(cnn,data,5,8,0)
result(data,scores,matrix)

In [None]:
cnn = CNN()

scores, matrix = ML(cnn,data,5,256,0)
result(data,scores,matrix)

### Data Augmentation (flip)

In [None]:
FLIP = transforms.Compose([transforms.RandomHorizontalFlip(p=1),transforms.Resize((224,224)),transforms.ToTensor()])
flip_data = datasets.ImageFolder('drive/MyDrive/weather',transform=FLIP)

flip_data = ConcatDataset([data,flip_data])

In [None]:
cnn = CNN()

scores, matrix = ML(cnn,flip_data,20,64,0)
result(data,scores,matrix)

print('--- testing ---')
scores, matrix = testing(data,cnn)
result(data,scores,matrix)

In [None]:
scores, matrix = testing(data,cnn)
result(data,scores,matrix)

### Data Augmentation (a variety of methods)

In [None]:
AUG = [transforms.RandomHorizontalFlip(p=1),
    transforms.RandomCrop(size=300,pad_if_needed=True),
    transforms.ColorJitter(brightness=0.5,contrast=0,saturation=0,hue=0),
    transforms.ColorJitter(brightness=0,contrast=0.5,saturation=0,hue=0),
    transforms.ColorJitter(brightness=0,contrast=0,saturation=0.5,hue=0),
    transforms.ColorJitter(brightness=0,contrast=0,saturation=0,hue=0.5),
    transforms.RandomRotation(degrees=5,expand=False,fill=None)]

big_data = data
for i in range(len(AUG)):
  BIG = transforms.Compose([AUG[i],transforms.Resize((224,224)),transforms.ToTensor()])
  aug_data = datasets.ImageFolder('drive/MyDrive/weather',transform=BIG)

  big_data = ConcatDataset([aug_data,big_data])

In [None]:
cnn = CNN()

scores, matrix = ML(cnn,big_data,20,64,0)
result(data,scores,matrix)

print('--- testing ---')
scores, matrix = testing(data,cnn)
result(data,scores,matrix)