# Machine Perception Assignment
Please note: for all proper referencing of the code adapted / used in this notebook, please scroll to the bottom 

## Task 1

In [None]:
#Code to connect your google drive with google colaboratory
from google.colab import drive
drive.mount('/content/gdrive')

#Importing some useful packages

import numpy as np #Numpy library provides various useful functions and operators for scientific computing
import cv2 as cv #openCV is a key library that provides various useful functions for computer vision
import os #Honestly this one is a bit optional. 
import glob #again just optional
from matplotlib import pyplot as plt
from google.colab.patches import cv2_imshow 

Image processing

In [None]:
# Runs a set of image processing (Gaussian blur + Thresholding) and returns processed image
def image_process_set_1(img):
  gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
  blurred = cv.GaussianBlur(gray, (9,9), 0)
  threshold = cv.adaptiveThreshold(blurred, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY_INV, 25, 9)
  #cv2_imshow(threshold)
  return threshold

In [None]:
# Runs a set of image processing (Gaussian blur + Thresholding) and returns processed image
def image_process_set_2(img):
  gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
  #cv2_imshow(cv.bitwise_not(gray))
  blurred_inv = cv.GaussianBlur(cv.bitwise_not(gray), (3,3), 0)
  threshold = cv.threshold(blurred_inv, 0, 255, cv.THRESH_BINARY_INV | cv.THRESH_OTSU)[1]
  #cv2_imshow(threshold)
  return threshold

Sign detection

In [None]:
# Detects the black box region around the numbers through MSER
# This method has been modified and retrieved from Practical 3 (Retrieve Blobs)
def detect_number_sign(processed_img, og_img):
  mser = cv.MSER_create(max_area=15000, min_area=1000) #Reduce amount of regions detected via area
  regions, _ = mser.detectRegions(processed_img) #Detects all regions in the processed image
  #Finds the "best" rectangular/box region out of all detected regions that would match the black area around numbers
  best_ratio = find_closest_sign_ratio(regions) 
  return return_number_sign(best_ratio, regions, og_img)

#Finds the closest ratio to what building sign could be
def find_closest_sign_ratio(regions):
  #Obtained through trial and error of MSER detected regions ratios on train set
  number_sign_ratio = 0.566 #Best medium (Out of all images, this seems to be the best ratio value to capture to signs)
  ratios = []
  #img = img.copy()
  for p in regions: #Grabs all ratios of each detected region
      xmax, ymax = np.amax(p, axis=0)
      xmin, ymin = np.amin(p, axis=0)
      h = ymax - ymin #Height distance
      w = xmax - xmin #Width distance
      ratios.append(h/w)
      #cv.rectangle(img, (xmin,ymax), (xmax,ymin), (0, 255, 0), 3)
      #cv.putText(img,str((h/w)),(xmin,ymax), cv.FONT_HERSHEY_SIMPLEX, 0.5,(255,255,255),1,cv.LINE_AA)
  #cv2_imshow(img)

  #This is the most crucial part of detecting the region as it finds the best region based on ratio for the building sign
  #First finds the absolute difference of all the found ratios of detects regions
  #Then selects the smallest diff out of all diffs since closest to 0.6 is a sign
  best_ratio = min(ratios, key=lambda cur_ratio:abs(cur_ratio - number_sign_ratio))
  
  return best_ratio   

#Goes through all the regions and finds the region with best ratio
def return_number_sign(best_ratio, regions, og_img):
  for p in regions:
      xmax, ymax = np.amax(p, axis=0)
      xmin, ymin = np.amin(p, axis=0)
      h = ymax - ymin #Height distance
      w = xmax - xmin #Width distance
      ratio = h/w 

      if(ratio == best_ratio): #Once we find the best region
          cv.rectangle(og_img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 1)
          img_region = og_img[ymin:ymax, xmin:xmax] #Crop only to region

  return img_region #Return the "best" region that we think is the sign 

Digits detection

In [None]:
# Retrieves digits / numbers from obtained number sign area from previous step
def get_digits(sign_img):
  threshold = image_process_set_2(sign_img)
  output = cv.connectedComponentsWithStats(threshold, 8, cv.CV_32S)
  (numLabels, labels, stats, centroids) = output
  digits = []
  for i in range(numLabels):
    # For each label/item, go over and get stats, then create rectangle and circles on image
    x = stats[i, cv.CC_STAT_LEFT]
    y = stats[i, cv.CC_STAT_TOP]
    w = stats[i, cv.CC_STAT_WIDTH]
    h = stats[i, cv.CC_STAT_HEIGHT]
    area = stats[i, cv.CC_STAT_AREA]
    (cX, cY) = centroids[i]
    ratio = h/w
    # X,Y Starting point - End point is top right of rectangle in this case where it is just x + width and y + height
    if area >= 152 and area < 1000 and ratio >= 0.9:
    #  digits.append(sign_img[y:y+h, x:x+w])
      cv.rectangle(sign_img, (x, y), ((x + w), (y + h)), (0, 0, 255), 1)
    #else:
      #cv.putText(sign_img,str(area),(x,(y+10)), cv.FONT_HERSHEY_SIMPLEX, 0.4,(255,255,255),1,cv.LINE_AA)

Task 1 Main Program

In [None]:
# Main Task 1 program that runs all required components
def task_1(filenames):
  for idx, i in enumerate(filenames):
    img = cv.imread(i)
    processed_img = image_process_set_1(img)
    detected_sign = detect_number_sign(processed_img, img)

    print(f"Image {idx+1}:")
    get_digits(detected_sign)
    cv2_imshow(img)

Task 1 Training Results:

In [None]:
train_path = '/content/gdrive/MyDrive/MP/Assignment/BuildingSignageDetection/train' #Defining the path
train_filenames = glob.glob(os.path.join(train_path, '*.jpg')) # image files with format png  in the folder
task_1(train_filenames)

Task 1 Validation Results:

In [None]:
val_path = '/content/gdrive/MyDrive/MP/Assignment/BuildingSignageDetection/val' #Defining the path
val_filenames = glob.glob(os.path.join(val_path, '*.jpg')) # image files with format png  in the folder
task_1(val_filenames)

Task 1 Test Results: 

In [None]:
test_path = '/content/gdrive/MyDrive/MP/Assignment/BuildingSignageDetection/test' #Defining the path
test_filenames = glob.glob(os.path.join(test_path, '*.jpg')) # image files with format png  in the folder
task_1(test_filenames)

## Task 2

Note: Please do not forget to run the 3 code blocks below to be able to run Task 2 code

In [None]:
!pip install d2l==0.17.2

In [None]:
#Code to connect your google drive with google colaboratory
from google.colab import drive
drive.mount('/content/gdrive')

import torch
import torchvision
from torch import nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from d2l import torch as d2l

import numpy as np 
from google.colab.patches import cv2_imshow 
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

Common methods for both approaches to train /evaluate models

In [None]:
"""The following code has been retrived from https://d2l.ai/_modules/d2l/torch.html and has been modified"""
"""Also taken and modified from my Machine Learning (COMP3010) assignment from last semester"""

#Create trainer for training process (Used in train_model mathod)
def create_trainer(net, learning_rate, param_group):
  trainer = None #By default trainer is nothing
  #If statement for different type of architectures since VGG and AlexNet do not have a fc component
  if net.__class__.__name__ == "ResNet": #ResNet
      if param_group: # If true, then the learning rate is 10 times greater for final layer
          params_1x = [param for name, param in net.named_parameters()
              if name not in ["fc.weight", "fc.bias"]]
          trainer = torch.optim.SGD([{'params': params_1x},
                                  {'params': net.fc.parameters(),
                                      'lr': learning_rate * 10}],
                                  lr=learning_rate, weight_decay=0.001)
      else: # Uses current parameters for the trainer
          trainer = torch.optim.SGD(net.parameters(), lr=learning_rate,
                                  weight_decay=0.001)
  elif net.__class__.__name__ == "VGG" or net.__class__.__name__ == "AlexNet": #VGG or AlexNet
      if param_group: # If true, then the learning rate is 10 times greater for final layer
          params_1x = [param for name, param in net.named_parameters()
              if name not in ["classifier.6.weight", "classifier.6.bias"]]
          trainer = torch.optim.SGD([{'params': params_1x},
                                  {'params': net.classifier[6].parameters(),
                                      'lr': learning_rate * 10}],
                                  lr=learning_rate, weight_decay=0.001)
      else: # Uses current parameters for the trainer
          trainer = torch.optim.SGD(net.parameters(), lr=learning_rate,
                                  weight_decay=0.001)
  elif net.__class__.__name__ == "NewModel": #For Model without classifier
      trainer = torch.optim.SGD(net.parameters(), lr=learning_rate,
                                  weight_decay=0.001)
  return trainer


# The purpose of the following method is to train the model and also evaluate val/test set
def train_model(net, train_iter, val_iter, test_iter, loss, num_epochs, lr, 
                param_group, devices=d2l.try_all_gpus()):
  
  trainer = create_trainer(net, lr, param_group)
  
  """Train a model with multiple GPUs (defined in Chapter 13)."""
  timer, num_batches = d2l.Timer(), len(train_iter)
  animator = None
  if test_iter: #If there is a test set
    animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0, 1],
                          legend=['train loss', 'train acc', 'val acc', 'test acc'])
  elif val_iter: #If there is only validation set
    animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0, 1],
                          legend=['train loss', 'train acc', 'val acc'])
  else: #If there is no validation or test set
    animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0, 1],
                          legend=['train loss', 'train acc'])
  net = nn.DataParallel(net, device_ids=devices).to(devices[0])
  for epoch in range(num_epochs):
      # Sum of training loss, sum of training accuracy, no. of examples,
      # no. of predictions
      metric = d2l.Accumulator(4)
      for i, (features, labels) in enumerate(train_iter):
          timer.start()
          l, acc = d2l.train_batch_ch13(
              net, features, labels, loss, trainer, devices)
          metric.add(l, acc, labels.shape[0], labels.numel())
          timer.stop()
          if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
            if not val_iter: #No validation set
              animator.add(epoch + (i + 1) / num_batches,
                            (metric[0] / metric[2], metric[1] / metric[3]))
            elif test_iter: #Using test set
              animator.add(epoch + (i + 1) / num_batches,
                            (metric[0] / metric[2], metric[1] / metric[3],
                            None, None))
            else: #Only validation set
              animator.add(epoch + (i + 1) / num_batches,
                            (metric[0] / metric[2], metric[1] / metric[3],
                            None))
              
      val_acc = None
      test_acc = None

      if test_iter: #If we are also doing evaluation on test set
        val_acc = d2l.evaluate_accuracy_gpu(net, val_iter)
        animator.add(epoch + 1, (None, None, val_acc, None))
        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        animator.add(epoch + 1, (None, None, None, test_acc))
      elif val_iter: #If we are doing evaluation only on validation set
        val_acc = d2l.evaluate_accuracy_gpu(net, val_iter)
        animator.add(epoch + 1, (None, None, val_acc))
      
  if test_iter: #If we are also doing evaluation on test set
    print(f'loss {metric[0] / metric[2]:.3f}, train acc '
          f'{metric[1] / metric[3]:.3f}, val acc {val_acc:.3f}, test acc {test_acc:.3f}')
  elif val_iter: #If no test set, but validation set
    print(f'loss {metric[0] / metric[2]:.3f}, train acc '
          f'{metric[1] / metric[3]:.3f}, val acc {val_acc:.3f}')
  else: #If only training set
    print(f'loss {metric[0] / metric[2]:.3f}, train acc ' f'{metric[1] / metric[3]:.3f}')
    
  print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec on '
          f'{str(devices)}')

Data loading:

In [None]:
#Params
bs = 64 #Batch size
device = ('cuda' if torch.cuda.is_available() else 'cpu')

transformations = transforms.Compose([
    transforms.Resize(150),
    transforms.CenterCrop(149),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

test_transformations = transforms.Compose([
    transforms.CenterCrop(149),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

# training data loading
train_dataset = datasets.ImageFolder(root='/content/gdrive/MyDrive/MP/Assignment/coral image classification/train', transform=transformations)  
train_iter = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=2)

# validation data loading
valid_dataset = datasets.ImageFolder(root='/content/gdrive/MyDrive/MP/Assignment/coral image classification/val', transform=transformations)
val_iter = DataLoader(valid_dataset, batch_size=bs, shuffle=False, num_workers=2)

# testing data loading
test_dataset = datasets.ImageFolder(root='/content/gdrive/MyDrive/MP/Assignment/coral image classification/test', transform=test_transformations)
test_iter = DataLoader(test_dataset, batch_size=bs, shuffle=False, num_workers=2)

Common parameters

In [None]:
#Gets all available GPUs
devices = d2l.try_all_gpus()
criterion = nn.CrossEntropyLoss(reduction='mean') #Use cross entropy loss as this is a classification problem and it uses softmax by default

### Approach 1 (Modern ML [Cross Entropy Loss + Softmax])

##### Determine which approach is better - Fine-tune pre-trained vs train from scratch (Using ResNet18 as base)

In [None]:
# Model Creation - Scratch AlexNet
alexnet = torchvision.models.alexnet(pretrained=False)
alexnet.classifier[6] = nn.Linear(alexnet.classifier[6].in_features, 2)
nn.init.xavier_uniform_(alexnet.classifier[6].weight);

""" Train Model From Scratch """
train_model(alexnet, train_iter, val_iter, test_iter=None, loss=criterion, 
            num_epochs=5, lr=5e-2, param_group=False, devices=devices)

In [None]:
# Model Creation - Pretrained AlexNet
alexnet = torchvision.models.alexnet(pretrained=True)
alexnet.classifier[6] = nn.Linear(alexnet.classifier[6].in_features, 2)
nn.init.xavier_uniform_(alexnet.classifier[6].weight);

""" Finetune Pretrained Model """
train_model(alexnet, train_iter, val_iter, test_iter=None, loss=criterion, 
            num_epochs=5, lr=6e-4, param_group=True, devices=devices) #5e-4

##### Determine which model is the best out of AlexNet, VGG16 and ResNet18 (Using Pre-trained which is best from previous test)

In [None]:
# Model Creation - Pretrained VGG16
vgg16 = torchvision.models.vgg16(pretrained=True)
vgg16.fc = nn.Linear(vgg16.classifier[6].in_features, 2)
nn.init.xavier_uniform_(vgg16.classifier[6].weight);

""" Finetune Pretrained Model """
train_model(vgg16, train_iter, val_iter, test_iter=None, loss=criterion, 
            num_epochs=5, lr=3e-3, param_group=True, devices=devices)

In [None]:
# Model Creation - Pretrained ResNet18
resnet18 = torchvision.models.resnet18(pretrained=True)
resnet18.fc = nn.Linear(resnet18.fc.in_features, 2)
nn.init.xavier_uniform_(resnet18.fc.weight);

""" Finetune Pretrained Model """
train_model(resnet18, train_iter, val_iter, test_iter=None, loss=criterion, 
            num_epochs=5, lr=1e-2, param_group=True, devices=devices)

##### Best model train / evaluation (Pre-trained ResNet18) with test set (Once available)

In [None]:
# Model Creation - Pretrained ResNet18
resnet18 = torchvision.models.resnet18(pretrained=True)
resnet18.fc = nn.Linear(resnet18.fc.in_features, 2)
nn.init.xavier_uniform_(resnet18.fc.weight);

""" Finetune Pretrained Model """
train_model(resnet18, train_iter, val_iter, test_iter=test_iter, loss=criterion, 
            num_epochs=5, lr=1e-2, param_group=True, devices=devices)

### Approach 2 (SVM)

##### Class to make new model from ResNet-18 (Feature extraction)

In [None]:
"""The following code has been retrieved and slightly modified from my Machine Learning (COMP3010) Assignment"""

# Class definition of a new model that extracts features specifically from ResNet and applies a classifier after the features
class NewModel(nn.Module):
  def __init__(self, pretrained_model):
    super(NewModel, self).__init__()
    self.flatten = nn.Flatten()
    self.new_model = nn.Sequential(*list(pretrained_model.children())[:-1]) #Cuts the pretrained model to remove classifier or fc layer
  
  def forward(self, x): #The path of calls when training
    x = self.new_model(x) # Input first goes through modified ResNet
    features = self.flatten(x) # Applies flattening on the data
    return features #Return flattened features for classifier to use

##### Method to load entire dataset into memory to be used for SVM

In [None]:
#Retrieves all images and their corresponding labels in the same order
def load_dataset_into_memory(dataset): 
  set_X = list()
  set_Y = list()
  for i in range(len(dataset)): 
      img, label = dataset[i]
      set_X.append(img)
      set_Y.append(label)
  set_X = torch.stack(set_X)
  set_Y = torch.IntTensor(set_Y)
  return set_X, set_Y

##### SVM Methods for fitting and classification

In [None]:
# Method to fit the SVM classifier on the extracted training dataset features
def SVM_fitting():
  #Training dataset loading and features extraction
  train_X, train_Y = load_dataset_into_memory(train_dataset)
  train_features = new_net(train_X.to('cuda'))
  train_features = train_features.cpu()
  del train_X

  #Fitting SVM classifier on extracted features from training dataset 
  with torch.no_grad(): # No grad is temporary on so it doesn't store the graph or variables for computing gradients
    svm_classifier.fit(train_features, train_Y)
  del train_features
  del train_Y

#Method to use SVM as a classifier on exctracted validation / testing set features
def SVM_classification(test_flag):
  #Validation dataset loading and features extraction
  val_X, val_Y = load_dataset_into_memory(valid_dataset)
  val_features = new_net(val_X.to('cuda'))
  val_features = val_features.cpu()
  del val_X

  #Prediction using SVM classifier
  with torch.no_grad(): # No grad is temporary on so it doesn't store the graph or variables for computing gradients
    val_predictions = svm_classifier.predict(val_features)
  del val_features

  # Accuracy on Validation
  print("Validation Accuracy:", accuracy_score(val_Y, val_predictions)) # Calculate score of trained classifer comparing with validation run and validation labels
  del val_Y

  if test_flag:
    test_X, test_Y = load_dataset_into_memory(test_dataset)
    test_features = new_net(test_X.to('cuda'))
    test_features = test_features.cpu()
    del test_X

    #Prediction using SVM classifier
    with torch.no_grad(): # No grad is temporary on so it doesn't store the graph or variables for computing gradients
      test_predictions = svm_classifier.predict(test_features)
    del test_features

    # Accuracy on Test
    print("Test Accuracy:", accuracy_score(test_Y, test_predictions)) # Calculate score of trained classifer comparing with test run and test labels
    del test_Y

##### SVM Classification on fine-tuned pre-trained ResNet18



In [None]:
res_net = torchvision.models.resnet18(pretrained=True)

new_net = NewModel(res_net).to(device) # Removes classifier layer (Last CNN)
#No need for validation set to be evaluated as we do not have a classifier
train_model(new_net, train_iter, val_iter=None, test_iter=None, loss=criterion, 
            num_epochs=5, lr=1e-1, param_group=None, devices=devices) 

In [None]:
svm_classifier = SVC(kernel='linear')
SVM_fitting()
SVM_classification(test_flag=False)

In [None]:
svm_classifier = SVC(kernel='rbf')
SVM_fitting()
SVM_classification(test_flag=False)

In [None]:
svm_classifier = SVC(kernel='poly')
SVM_fitting()
SVM_classification(test_flag=False)

##### SVM Classification on non-fine-tuned pre-trained ResNet18


In [None]:
res_net = torchvision.models.resnet18(pretrained=True)

new_net = NewModel(res_net).to(device) # Removes classifier layer (Last CNN)
svm_classifier = SVC(kernel='linear')
SVM_fitting()
SVM_classification(test_flag=False)

In [None]:
res_net = torchvision.models.resnet18(pretrained=True)

new_net = NewModel(res_net).to(device) # Removes classifier layer (Last CNN)
svm_classifier = SVC(kernel='rbf')
SVM_fitting()
SVM_classification(test_flag=False)

In [None]:
res_net = torchvision.models.resnet18(pretrained=True)

new_net = NewModel(res_net).to(device) # Removes classifier layer (Last CNN)
svm_classifier = SVC(kernel='poly')
SVM_fitting()
SVM_classification(test_flag=False)

##### Evaluation of best method with validation and test set (Once available)



In [None]:
res_net = torchvision.models.resnet18(pretrained=True)

new_net = NewModel(res_net).to(device) # Removes classifier layer (Last CNN)
#No need for validation set to be evaluated as we do not have a classifier
train_model(new_net, train_iter, val_iter=None, test_iter=None, loss=criterion, 
            num_epochs=5, lr=1e-1, param_group=None, devices=devices) 

svm_classifier = SVC(kernel='linear')
SVM_fitting()
SVM_classification(test_flag=True)

### References for all code adapted / used in this notebook 


Maganha, Felipe. 2022. “Detecting handwriting regions with openCV and Python”. Accessed September 21, 2022. https://felipemeganha.medium.com/detecting-handwriting-regions-with-opencv-and-python-ff0b1050aa4e <br>
 
OpenCV. n.d. “OpenCV: Image Thresholding”. Accessed September 20, 2022. https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html. <br>
 
OpenCV. n.d. “OpenCV: cv::MSER Class Reference”. Accessed September 21, 2022. https://docs.opencv.org/4.x/d3/d28/classcv_1_1MSER.html. <br>
 
Outram Harrison. 2022. COMP3010 - Tutorial 7. Youtube video, 53:27. https://www.youtube.com/watch?v=NVcJhMtRErE.<br>
 
Outram Harrison. 2022. “Tutorial 7 – Extracting features out of networks” Curtin University. Accessed May 6, 2022. https://colab.research.google.com/drive/1WHbHe9I_AQo1W_bdZOr9fP4cGb6543d7. <br>
 
PyTorch Team. n.d. “RESNET” PyTorch. Accessed May 3, 2022. https://pytorch.org/hub/pytorch_vision_resnet/. <br>
 
PyTorch Team. 2017. “TORCHVISION.MODELS” PyTorch. Accessed May 3, 2022. https://pytorch.org/vision/0.8/models.html. <br>
 
PyTorch Team. 2022. “TRAINING A CLASSIFIER” PyTorch. Accessed May 3, 2022. https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html. <br>
 
Zhang, Aston, Lipton, C., Zachary. Li, Mu, & Smola, Alexander J. 2020. “Source code for d2l.torch” Dive into Deep Learning. Accessed April 30, 2022. https://d2l.ai/_modules/d2l/torch.html. <br>
 
Zhang, Aston, Lipton, C., Zachary. Li, Mu, & Smola, Alexander J. 2020. “13.11. Fully Convolutional Networks” Dive into Deep Learning. Accessed May 2, 2022. https://d2l.ai/chapter_computer-vision/fcn.html.<br>

### Extra Testing Code (Not used)

In [None]:
#Code to connect your google drive with google colaboratory
from google.colab import drive
drive.mount('/content/gdrive')

import cv2 as cv #openCV is a key library that provides various useful functions for computer vision
import os
from skimage import io, transform, data
import numpy as np #Numpy library provides various useful functions and operators for scientific computing
from google.colab.patches import cv2_imshow 
from sklearn import svm

#Image transformations (Resize then center crop)
def transforms(img, dim_resize=150, dim_crop=149):
  #Resize
  img_resize = cv.resize(img, (dim_resize,dim_resize), interpolation = cv.INTER_AREA)

  #Center Crop [Retrieved from: https://stackoverflow.com/questions/61927877/how-to-crop-opencv-image-from-center]
  center = img_resize.shape
  x = center[1]/2 - dim_crop/2
  y = center[0]/2 - dim_crop/2
  img_crop = img_resize[int(y):int(y+dim_crop), int(x):int(x+dim_crop)]
  return img_crop

# Data loading retrieved and modified from https://kapernikov.com/tutorial-image-classification-with-scikit-learn/
def load_with_transforms(val_flag, dim_resize=150, dim_crop=149):
    data = dict()
    data['label'] = []
    data['filename'] = []
    data['data'] = []   

    path = '/content/gdrive/MyDrive/MP/Assignment/coral image classification/val' if val_flag else '/content/gdrive/MyDrive/MP/Assignment/coral image classification/train'
    # read all images in PATH, resize and write to DESTINATION_PATH
    for subdir in os.listdir(path):
      current_path = os.path.join(path, subdir)
 
      for file in os.listdir(current_path):
        if file[-3:] in {'jpg', 'png'}:
            img = cv.imread(os.path.join(current_path, file))
            img = cv.cvtColor(img, cv.COLOR_RGB2RGBA)
            img = transforms(img, dim_resize, dim_crop)
            data['label'].append(subdir[:-4])
            data['filename'].append(file)
            data['data'].append(img)
            #print(img.shape)
    X = np.array(data['data'])
    Y = np.array(data['label'])
    return X,Y

In [None]:
train_X, train_Y = load_with_transforms(val_flag=False, dim_resize=150, dim_crop=149)
svm_model = svm.SVC()
svm_model.fit(train_X, train_Y)

In [None]:
val_X, val_Y = load_with_transforms(val_flag=False, dim_resize=150, dim_crop=149)