#### Importing the required libraries

In [1]:
import os
import zipfile
import pickle
from skimage.io import imread
from skimage.transform import resize
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


#### Extracting data from drive


In [2]:
zipped_folder = "/content/drive/MyDrive/clf-data.zip"
target_folder = "/content/clf_data"
os.makedirs(target_folder, exist_ok=True)
with zipfile.ZipFile(zipped_folder, "r") as zip_ref:
    zip_ref.extractall(target_folder)

#### Data Preparation

In [3]:
input_dir = '/content/clf_data/clf-data'
categories = ['empty', 'not_empty']

In [4]:
data = []
labels = []
for category_idx, category in enumerate(categories):
    for file in os.listdir(os.path.join(input_dir, category)):
        img_path = os.path.join(input_dir, category, file)
        img = imread(img_path)
        img = resize(img, (15, 15))
        data.append(img.flatten())
        labels.append(category_idx)

In [5]:
print("Total number of images:-",len(data))
print("Total number of labels:-",len(labels))

Total number of images:- 6090
Total number of labels:- 6090


In [6]:
data = np.asarray(data)
labels = np.asarray(labels)

#### Performing Train test split

In [7]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

In [8]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((4872, 675), (1218, 675), (4872,), (1218,))

#### Training Classifier Model and performing hyper parameter tunning at same time

In [9]:
classifier = SVC()

parameters = [{'gamma': [0.01, 0.001, 0.0001], 'C': [1, 10, 100, 1000]}]

grid_search = GridSearchCV(classifier, parameters)

grid_search.fit(x_train, y_train)

#### Testing performance of the model

In [10]:
best_estimator = grid_search.best_estimator_
y_prediction = best_estimator.predict(x_test)
score = accuracy_score(y_prediction, y_test)

print('{}% of samples were correctly classified'.format(str(score * 100)))

100.0% of samples were correctly classified


#### Exporting the model has pickle file

In [11]:
pickle.dump(best_estimator, open('/content/model.p', 'wb'))