# model generation (using support vector)
heavily inspired by: https://github.com/aditi-govindu/Image-Classsification-using-sklearn/blob/main/Image_Classification_using_SVM.ipynb

In [None]:
%pip install numpy
%pip install scikit-image
%pip install scikit-learn
%pip install matplotlib

In [None]:
DATA_DIR = "../Kamera_Image_res/z3_classified/"
MODEL_NAME = "model"
RANDOM_SEED = 101

In [None]:
from skimage.io import imread
from skimage.transform import resize
import numpy as np
from pathlib import Path

DATA_DIR = Path(DATA_DIR)

features = ["clean", "dirty"]
images = []
flat = []
target = []

for feature in features:
    feature_idx = features.index(feature)
    path = DATA_DIR / feature
    for img in path.glob("./*.jpg"):
        img_arr = imread(img)
        img_resize = resize(img_arr, (150,150,3))

        flat.append(img_resize.flatten())
        images.append(img_resize)
        target.append(feature_idx)

flat = np.array(flat)
images = np.array(flat)
target = np.array(target)

In [None]:
import pandas as pd

df = pd.DataFrame(flat)
df["Target"] = target
df

In [None]:
from sklearn.model_selection import train_test_split

x = df.iloc[:,:-1].values
y = target

print("Input data dimensions:",x.shape)
print("Output data dimensions:",y.shape)

# train / test split
x_train,x_test,y_train,y_test = train_test_split(x,y,shuffle=True,test_size = 0.3,random_state=101,stratify=y)
print("# input training data:",x_train.shape)
print("# input testing data:",x_test.shape)
print("# output training data:",y_train.shape)
print("# output testing data:",y_test.shape)

In [None]:


# Check if testing and training data are divided in equal proportions
print("Labels\t\t   Image index considered")
print(np.unique(y_train,return_counts=True))
print(np.unique(y_test,return_counts=True))

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

# Set the parameters by cross-validation
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]}]
                    
# Apply GridSearchCV to find best parameters for given dataset
# verbose is used to describe the steps taken to find best parameters
cv = GridSearchCV(SVC(), tuned_parameters, refit = True,verbose= 3) 
cv.fit(x_train,y_train)

In [None]:


# Display parameters selected by GridSearchCV for SVM 3 classes
# Parameters obtained: {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
print("Best parameters to apply are:",cv.best_params_)
# Display model after hyperparameter tuning
svm = cv.best_estimator_
print("Model after tuning is:\n",svm)

In [None]:
# Predict the output of model after above parameters are applied to it
y_prediction = svm.predict(x_test)

In [None]:
# Evaluate the model using confusion matrix, classification report and accuracy

from sklearn.metrics import confusion_matrix,classification_report,accuracy_score

print("Confusion matrix results:\n",confusion_matrix(y_prediction,y_test))
print("\nClassification report of model:\n",classification_report(y_prediction,y_test))
print("Accuracy score:",100*accuracy_score(y_prediction,y_test))

In [None]:
import pickle
# Save SVM model in pickle file
pickle.dump(svm,open(f"{MODEL_NAME}.p","wb"))

In [None]:
# Read byte from pickle model
test_model = pickle.load(open(f"{MODEL_NAME}.p","rb"))

In [None]:
import matplotlib.pyplot as plt

# Testing for a new image
def test_img(path):
    flat_data = []
    img_array = imread(path)
    # Resize image
    img_resized = resize(img_array,(150,150,3))
    flat_data.append(img_resized.flatten())
    flat_data = np.array(flat_data)
    print("Dimensions of original image are:",img_array.shape)
    plt.imshow(img_resized)
    y_output = test_model.predict(flat_data)
    y_output = features[y_output[0]]
    print("PREDICTED OUTPUT IS:",y_output)

In [None]:
test_img("./test_data/dirty.jpg")

In [None]:
test_img("./test_data/clean.jpg")