In [2]:
# IMPORT PACKAGES
# ---------------
!pip install opencv-python


import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB 
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, f1_score
from skimage.io import imread
from skimage.transform import resize
from skimage.util import random_noise
from skimage.filters import gaussian
import cv2



In [33]:
img_data = []       # Non-flattened images
img_flattened = []  # Flattened images (needed for noise generation)
img_target = []     # Class belonging to each image
classes = []
num_img = 100
dir_init = './caltech-101/'

n = 0
# Iterate through all folders in caltech101 file
for category in os.listdir(dir_init):
    
    if(category == '.DS_Store'):
        continue
        
    dir_img = os.path.join(dir_init, category)
    
    total_imgs = os.listdir(dir_img)
    if len(total_imgs) >= 100:       # If there are at least 100 images in the class
        
        classes.append(category)
        n_i = 0
        
        for img in os.listdir(dir_img):
            if(img == '.DS_Store'):
                continue
                
            img = imread(os.path.join(dir_img, img))   # Read the image as a vector 
            img_resized = resize(img, (150, 150, 3))
            img_data.append(img_resized)
            img_flattened.append(img_resized.flatten())
            img_target.append(classes.index(category))
            
            n_i += 1
            if (n_i == num_img):
                break


gaussian_images = {}
gaussian_images[0] = img_flattened

for std in range(1,11):
    temp = []
    for img in img_data:
        temp.append(random_noise(img,var= (std/100),mode='gaussian').flatten())
        
    gaussian_images[std/100] = temp
    

# Lens blur image generation

blur_images = {}
blur_images[1] = img_flattened

for k in range(2,11):
    temp = []
    for img in img_data:
        temp.append(cv2.blur(img,(k,k)).flatten())
        
    blur_images[k] = temp
    
    
# Salt and pepper image generation
    
sp_images = {}
sp_images[0] = img_flattened
for variance in [0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5]:

    temp = []

    for img in img_data:
        temp.append(random_noise(img, amount = variance, mode='s&p').flatten())
        
    sp_images[variance] = temp


In [34]:
# K-NEAREST NEIGHBOURS (KNN)
# --------------------------


# 0% NOISE
# --------
# Move images to dataframe
flat = np.array(img_flattened)
target = np.array(img_target)
df_0 = pd.DataFrame(flat)
df_0['Target'] = target

# Separate input features and targets
x_0 = df_0.iloc[:,:-1]
y_0 = df_0.iloc[:,-1]
x_0_train, x_0_test, y_0_train, y_0_test = train_test_split(x_0, y_0, test_size = 0.20, random_state = 1, stratify = y_0)

# Create and train the model
'''
knn_temp = KNeighborsClassifier()
k_range = list(range(1, 15))
param_grid = dict(n_neighbors=k_range)
knn = GridSearchCV(knn_temp, param_grid, scoring = 'accuracy', cv = 5, verbose = 10)
knn.fit(X_train, y_train)
'''
knn = KNeighborsClassifier(n_neighbors = 8)
knn.fit(x_0_train, y_0_train)


# GAUSSIAN NOISE
# --------------
knn_gaussian_accuracy_test = {}
knn_gaussian_accuracy_train = {}
knn_gaussian_f_test = {}
knn_gaussian_f_train = {}

y_0_pred_test = knn.predict(x_0_test.values)
y_0_pred_train = knn.predict(x_0_train.values)
knn_gaussian_accuracy_test[0] = accuracy_score(y_0_pred_test, y_0_test)
knn_gaussian_accuracy_train[0] = accuracy_score(y_0_pred_train, y_0_train)
knn_gaussian_f_test[0] = f1_score(y_0_test, y_0_pred_test, average = 'micro')
knn_gaussian_f_train[0] = f1_score(y_0_train, y_0_pred_train, average = 'micro')

for key, value in gaussian_images.items():
    flat = np.array(value)
    target = np.array(img_target)
    df = pd.DataFrame(flat)
    df['Target'] = target

    # Separate input features and targets
    x = df.iloc[:,:-1]
    y = df.iloc[:,-1]

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20,  random_state = 1, stratify = y)
    y_pred_test = knn.predict(x_test.values)
    y_pred_train = knn.predict(x_train.values)
    knn_gaussian_accuracy_test[key] = accuracy_score(y_pred_test, y_test)
    knn_gaussian_accuracy_train[key] = accuracy_score(y_pred_train, y_train)
    knn_gaussian_f_test[key] = f1_score(y_test, y_pred_test, average = 'macro')
    knn_gaussian_f_train[key] = f1_score(y_train, y_pred_train, average = 'macro')


# BLUR NOISE
# ----------
knn_blur_accuracy_test = {}
knn_blur_accuracy_train = {}
knn_blur_f_test = {}
knn_blur_f_train = {}

knn_blur_accuracy_test[1] = knn_gaussian_accuracy_test[0]
knn_blur_accuracy_train[1] = knn_gaussian_accuracy_train[0]
knn_blur_f_test[1] = knn_gaussian_f_test[0]
knn_blur_f_train[1] = knn_gaussian_f_train[0]

for key, value in blur_images.items():
    flat = np.array(value)
    target = np.array(img_target)
    df = pd.DataFrame(flat)
    df['Target'] = target

    # Separate input features and targets
    x = df.iloc[:,:-1]
    y = df.iloc[:,-1]

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20,  random_state = 1, stratify = y) # try also with random_state = 1
    y_pred_test = knn.predict(x_test.values)
    y_pred_train = knn.predict(x_train.values)
    knn_blur_accuracy_test[key] = accuracy_score(y_pred_test, y_test)
    knn_blur_accuracy_train[key] = accuracy_score(y_pred_train, y_train)
    knn_blur_f_test[key] = f1_score(y_test, y_pred_test, average = 'micro')
    knn_blur_f_train[key] = f1_score(y_train, y_pred_train, average = 'micro')
    
    
# IMPULSE NOISE
# -------------
knn_sp_accuracy_test = {}
knn_sp_accuracy_train = {}
knn_sp_f_test = {}
knn_sp_f_train = {}

knn_sp_accuracy_test[0] = knn_gaussian_accuracy_test[0]
knn_sp_accuracy_train[0] = knn_gaussian_accuracy_train[0]
knn_sp_f_test[0] = knn_gaussian_f_test[0]
knn_sp_f_train[0] = knn_gaussian_f_train[0]

for key, value in sp_images.items():
    flat = np.array(value)
    target = np.array(img_target)
    df = pd.DataFrame(flat)
    df['Target'] = target

    # Separate input features and targets
    x = df.iloc[:,:-1]
    y = df.iloc[:,-1]

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20,  random_state = 1, stratify = y) # try also with random_state = 1
    y_pred_test = knn.predict(x_test.values)
    y_pred_train = knn.predict(x_train.values)
    knn_sp_accuracy_test[key] = accuracy_score(y_pred_test, y_test)
    knn_sp_accuracy_train[key] = accuracy_score(y_pred_train, y_train)
    knn_sp_f_test[key] = f1_score(y_test, y_pred_test, average = 'micro')
    knn_sp_f_train[key] = f1_score(y_train, y_pred_train, average = 'micro')

In [35]:
print("knn_test_acc_gaussian= ", knn_gaussian_accuracy_test)
print("knn_train_acc_gaussian= ", knn_gaussian_accuracy_train)
  
print("knn_test_f_gaussian= ", knn_gaussian_f_test)
print("knn_train_f_gaussian= ", knn_gaussian_f_train)

knn_test_acc_gaussian=  {0: 0.625, 0.01: 0.6083333333333333, 0.02: 0.6041666666666666, 0.03: 0.5875, 0.04: 0.5708333333333333, 0.05: 0.5541666666666667, 0.06: 0.5583333333333333, 0.07: 0.5583333333333333, 0.08: 0.5291666666666667, 0.09: 0.5333333333333333, 0.1: 0.5291666666666667}
knn_train_acc_gaussian=  {0: 0.684375, 0.01: 0.6677083333333333, 0.02: 0.6666666666666666, 0.03: 0.6677083333333333, 0.04: 0.6541666666666667, 0.05: 0.6447916666666667, 0.06: 0.6333333333333333, 0.07: 0.621875, 0.08: 0.6135416666666667, 0.09: 0.6041666666666666, 0.1: 0.596875}
knn_test_f_gaussian=  {0: 0.6144555876972096, 0.01: 0.5979508769337987, 0.02: 0.5983777962920165, 0.03: 0.5844453112327123, 0.04: 0.5691741196096024, 0.05: 0.5482098357197682, 0.06: 0.5598902445095032, 0.07: 0.5512811090378339, 0.08: 0.530492102918655, 0.09: 0.5343505575419224, 0.1: 0.5281706664771181}
knn_train_f_gaussian=  {0: 0.6830097371035708, 0.01: 0.6643748150856049, 0.02: 0.6630901096092547, 0.03: 0.6670788856267392, 0.04: 0.652

In [36]:
print("knn_test_acc_blur= ", knn_blur_accuracy_test)
print("knn_train_acc_blur= ", knn_blur_accuracy_train)
  
print("knn_test_f_blur= ", knn_blur_f_test)
print("knn_train_f_blur= ", knn_blur_f_train)

knn_test_acc_blur=  {1: 0.625, 2: 0.6125, 3: 0.625, 4: 0.6166666666666667, 5: 0.6166666666666667, 6: 0.6125, 7: 0.6083333333333333, 8: 0.6041666666666666, 9: 0.6041666666666666, 10: 0.6}
knn_train_acc_blur=  {1: 0.684375, 2: 0.6802083333333333, 3: 0.6802083333333333, 4: 0.6791666666666667, 5: 0.678125, 6: 0.6770833333333334, 7: 0.675, 8: 0.6739583333333333, 9: 0.6677083333333333, 10: 0.665625}
knn_test_f_blur=  {1: 0.625, 2: 0.6125, 3: 0.625, 4: 0.6166666666666667, 5: 0.6166666666666667, 6: 0.6125, 7: 0.6083333333333333, 8: 0.6041666666666666, 9: 0.6041666666666666, 10: 0.6}
knn_train_f_blur=  {1: 0.684375, 2: 0.6802083333333333, 3: 0.6802083333333333, 4: 0.6791666666666667, 5: 0.678125, 6: 0.6770833333333334, 7: 0.675, 8: 0.6739583333333333, 9: 0.6677083333333333, 10: 0.665625}


In [37]:
print("knn_test_acc_sp= ", knn_sp_accuracy_test)
print("knn_train_acc_sp= ", knn_sp_accuracy_train)
  
print("knn_test_f_sp= ", knn_sp_f_test)
print("knn_train_f_sp= ", knn_sp_f_train)

knn_test_acc_sp=  {0: 0.625, 0.05: 0.5958333333333333, 0.1: 0.5708333333333333, 0.15: 0.5541666666666667, 0.2: 0.5458333333333333, 0.25: 0.49583333333333335, 0.3: 0.4875, 0.35: 0.42083333333333334, 0.4: 0.31666666666666665, 0.45: 0.2708333333333333, 0.5: 0.22083333333333333}
knn_train_acc_sp=  {0: 0.684375, 0.05: 0.6739583333333333, 0.1: 0.6510416666666666, 0.15: 0.634375, 0.2: 0.5989583333333334, 0.25: 0.5760416666666667, 0.3: 0.5395833333333333, 0.35: 0.4979166666666667, 0.4: 0.46041666666666664, 0.45: 0.39166666666666666, 0.5: 0.328125}
knn_test_f_sp=  {0: 0.625, 0.05: 0.5958333333333333, 0.1: 0.5708333333333333, 0.15: 0.5541666666666667, 0.2: 0.5458333333333333, 0.25: 0.49583333333333335, 0.3: 0.4875, 0.35: 0.42083333333333334, 0.4: 0.31666666666666665, 0.45: 0.2708333333333333, 0.5: 0.22083333333333333}
knn_train_f_sp=  {0: 0.684375, 0.05: 0.6739583333333333, 0.1: 0.6510416666666666, 0.15: 0.634375, 0.2: 0.5989583333333334, 0.25: 0.5760416666666667, 0.3: 0.5395833333333333, 0.35: 

In [38]:
print(blur_images.keys())
print(sp_images.keys())
print(gaussian_images.keys())

dict_keys([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
dict_keys([0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5])
dict_keys([0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1])
