# Imports

In [1]:
import pandas as pd
from sklearn import svm
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import GridSearchCV, HalvingGridSearchCV
import os
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score,confusion_matrix
from glob import glob
from PIL import Image
from sklearn.decomposition import PCA
import seaborn as sns

# Dataset Preprocessing

In [2]:
train_path = '../car-damage-dataset/data1a/training'
test_path = '../car-damage-dataset/data1a/validation'

train_files = glob(train_path + '/*/*.jp*g')
test_files = glob(test_path + '/*/*.jp*g')

In [3]:
os.listdir(test_path)

['00-damage', '01-whole']

In [4]:
def load_images_as_vectors(images):
    """
    helper function to convert glob to vector
    """
    
    number_of_images = len(images)
    w = 256
    h = 256
    channels = 3

    X = np.empty(shape= (number_of_images, w*h*channels))

    labels = np.empty(number_of_images)

    for i, sample in enumerate(images):

        image_vector = Image.open(sample) 
        
        # Resize to 256 x 256
        resized_image_vector = np.array(image_vector.resize((256, 256)))

        # Scale to [0,1]
        norm_image_vector = resized_image_vector / 255

        flattened_image_vector = norm_image_vector.flatten()

        X[i] = np.pad(flattened_image_vector, (0, (w*h*channels) - len(flattened_image_vector) ))

        if sample.split("\\")[-2] == "00-damage":
            labels[i] = 0

        else:
            labels[i] = 1

    return X, labels


In [5]:
np.pad([1,2,3], (0,0))

array([1, 2, 3])

### Dimensionality Reduction

In [6]:

X1, y1 = load_images_as_vectors(train_files)

X2, y2 = load_images_as_vectors(test_files)

X = np.append(X1, X2, axis=0)

y = np.append(y1, y2, axis=0)

# Set the n_components=3
principal=PCA(n_components=30)

principal.fit(X)

X=principal.transform(X)


## Test/Train splits

In [7]:


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

In [8]:
len(y)

2300

## Model Development

In [9]:
# param_grid = {'C':[0.1,1,10,100],'gamma':[0.0001,0.001,0.1,1],'kernel':['rbf','poly']}

param_grid = { 
    'C':[0.01,0.1,1,100,1000,10000],
    'kernel':['rbf','poly','sigmoid','linear'],
    'degree':[2,3,4,5,6],
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001]
    }

svc = svm.SVC()

# model = GridSearchCV(svc,param_grid, n_jobs=-1, cv=5, verbose=10)

model = HalvingGridSearchCV(svc, param_grid, n_jobs=-1, cv=5, verbose=1)


model.fit(X_train, y_train)


print(model.best_params_)

n_iterations: 5
n_required_iterations: 7
n_possible_iterations: 5
min_resources_: 20
max_resources_: 1840
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 840
n_resources: 20
Fitting 5 folds for each of 840 candidates, totalling 4200 fits
----------
iter: 1
n_candidates: 280
n_resources: 60
Fitting 5 folds for each of 280 candidates, totalling 1400 fits
----------
iter: 2
n_candidates: 94
n_resources: 180
Fitting 5 folds for each of 94 candidates, totalling 470 fits


In [None]:
y_pred = model.predict(X_test)

# y_pred

## Model Performance Review

In [None]:
accuracy_score(y_pred, y_test)

In [None]:
print(classification_report(y_pred, y_test))


In [None]:
confusion_matrix(y_test, y_pred)

sns.heatmap(
   pd.DataFrame(
    confusion_matrix(y_test, y_pred),
    index = ['Damaged', 'Non-damaged'],
    columns = ['Damaged', 'Non-damaged']
), 
    annot=True, 
    fmt='g', 
    cmap='Blues'
    )

In [None]:
# TODO: AUC/ROC
# TODO: Precision
# TODO: Recall
# TODO: F1-score
# TODO: Confusion Matrix
# TODO: Highlight pixels