# Imports

In [1]:
import pandas as pd
from sklearn import svm
from sklearn.model_selection import GridSearchCV
import os
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.preprocessing import image
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score,confusion_matrix
from glob import glob
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler


# Dataset Preprocessing

In [2]:
train_path = '../car-damage-dataset/data1a/training'
test_path = '../car-damage-dataset/data1a/validation'

train_files = glob(train_path + '/*/*.jp*g')
test_files = glob(test_path + '/*/*.jp*g')

In [3]:
os.listdir(test_path)

['00-damage', '01-whole']

In [14]:
def load_images_as_vectors(images):
    """
    helper function to convert glob to vector
    """
    
    number_of_images = len(images)
    w = 256
    h = 256
    channels = 3

    X = np.empty(shape= (number_of_images, w*h*channels))

    labels = np.empty(number_of_images)

    for i, sample in enumerate(images):

        image_vector = Image.open(sample) 
        
        # Resize to 256 x 256
        resized_image_vector = np.array(image_vector.resize((256, 256)))

        # Scale to [0,1]
        norm_image_vector = resized_image_vector / 255

        flattened_image_vector = norm_image_vector.flatten()

        X[i] = np.pad(flattened_image_vector, (0, (w*h*channels) - len(flattened_image_vector) ))

        if sample.split("\\")[-2] == "00-damage":
            labels[i] = 0

        else:
            labels[i] = 1

    return X, labels


In [13]:
np.pad([1,2,3], (0,0))

array([1, 2, 3])

### Dimensionality Reduction

In [15]:
# TODO: PCA
X, y = load_images_as_vectors(train_files)

# Set the n_components=3
principal=PCA(n_components=30)

principal.fit(X)

X=principal.transform(X)


## Test/Train splits

In [16]:


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

## Model Development

In [17]:
param_grid = {'C':[0.1,1,10,100],'gamma':[0.0001,0.001,0.1,1],'kernel':['rbf','poly']}


svc = svm.SVC()

model = GridSearchCV(svc,param_grid)


model.fit(X_train, y_train)


print(model.best_params_)

{'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}


In [18]:
y_pred = model.predict(X_test)

y_pred

array([1., 0., 1., 1., 1., 0., 1., 0., 1., 0., 0., 0., 1., 1., 0., 1., 0.,
       0., 1., 0., 1., 0., 1., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 0.,
       1., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0.,
       0., 0., 1., 0., 1., 0., 1., 0., 0., 1., 1., 1., 0., 1., 1., 0., 0.,
       1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1.,
       0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 0., 1., 1., 1., 1.,
       1., 1., 1., 0., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 0., 1., 1.,
       0., 1., 1., 1., 1., 0., 0., 1., 1., 1., 0., 0., 1., 1., 0., 0., 1.,
       0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0.,
       1., 1., 1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1., 0.,
       1., 0., 0., 0., 1., 1., 1., 0., 1., 1., 0., 1., 0., 0., 1., 1., 0.,
       0., 0., 1., 1., 0., 1., 1., 1., 0., 0., 1., 0., 0., 0., 0., 1., 1.,
       0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0.,
       0., 1., 0., 0., 0.

## Model Performance Review

In [19]:
accuracy_score(y_pred, y_test)

0.6793478260869565

In [None]:
# TODO: AUC/ROC
# TODO: Precision
# TODO: Recall
# TODO: F1-score
# TODO: Confusion Matrix
# TODO: Highlight pixels