# Deep Learning 
## Image Classification using Shallow ML Algorithms 

### 1. Datasets:
1. The CIFAR-10 dataset - the dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images. 

2. Fashion MNIST - a dataset of Zalando's article images—consisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes.

### 2. Loading Libraries



In [3]:
import numpy as np
import pandas as pd
import cv2
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from sklearn.cluster import KMeans
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
import dataframe_image as dfi
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

ModuleNotFoundError: No module named 'numpy'

#### 3. Configuration & Function Definition 

In [3]:
config = {
    "dataset": "fashionmnist",
    "n_clusters": 100,
    "param_decision_tree": {
        "n_estimators": [50, 150, 200, 500], #number of trees
        "max_depth": [5, 10, 7, 25, 50], #depth of the trees
        "min_samples_split": [2, 4, 7, 9, 10], #min num. of samples to split
        "min_samples_leaf": [1, 3, 5] #min num. of samples in a leaf node
    },
    "param_svc": {
        "kernel": ['rbf', 'linear'] # hyperplane type
    },
    "knearest_neighbor_param": {
        "n_neighbors": [3, 5, 7], #num. of neighboors
        "p": [1, 2] # power parameter - manhattan_distance = 1 (l1);and euclidean_distance = 2 (l2)
    },
    "cifar10_labels": ["Airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"],
    "fashionmnist_labels": ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]
    #based on documentation
}

In [4]:
def process_torch_datasets(dataiterator):
    count = 0 #track the batches
    all_dataset = []
    labels = []
    iterator = iter(dataiterator) #convert into acctual iterator
    for batch, label_batch in iterator:
        for (image, lab) in zip(batch, label_batch):
            img = image.numpy()
            img = img.transpose(1, 2, 0) #from (C, H, W) to (H, W, C)
            img = (img * 255).astype(np.uint8) #rescale img [0, 1] to [0, 255]
            all_dataset.append(img)
            labels.append(lab)

    return np.array(all_dataset), np.array(labels)

In [5]:
def extract_color_histogram(image_array):

    # Calculate the histograms for each channel
    if image_array.shape[2] == 3: # is the 3rd dim. in RGB form
        hist = cv2.calcHist([image_array], [0, 1, 2], None, [16, 16, 16], [0, 256, 0, 256, 0, 256])
        #3d hist. for RGb
    else:
        hist = cv2.calcHist([image_array], [0], None, [256], [0, 256]) #for greyscale 1D hist.
    
    histogram = cv2.normalize(hist, hist).flatten() #to 1D array

    return histogram


In [6]:

def extract_features(image_array):

    if image_array.shape[2] == 3:
        #convert to grayscale
        image_array = cv2.cvtColor(image_array, cv2.COLOR_BGR2GRAY)

    #detect distinct features with SIFT
    sift = cv2.SIFT_create()

    #SIFT keypoints and descriptors
    kp, des = sift.detectAndCompute(image_array, None)

    return kp, des

#### Loading & Processing Datasets

In [7]:
transform = transforms.Compose([
    transforms.ToTensor() #transf. to tensor
])

dataset_name = config["dataset"].lower()

if dataset_name == "cifar10":
    trainset = datasets.CIFAR10("./data", download=True, train=True, transform=transform)
    testset = datasets.CIFAR10("./data", download=True, train=False, transform=transform)
elif dataset_name == "fashionmnist":
    trainset = datasets.FashionMNIST("./data", download=True, train=True, transform=transform)
    testset = datasets.FashionMNIST("./data", download=True, train=False, transform=transform)



In [8]:
train_loader = DataLoader(trainset, batch_size=64, shuffle=True)
test_loader = DataLoader(testset, batch_size=64, shuffle=True)

In [9]:
train_images, train_labels = process_torch_datasets(train_loader)
test_images, test_labels = process_torch_datasets(test_loader)