# Image Classification Model Development

In [None]:
import pandas as pd 
from glob import glob
from sklearn.linear_model import LogisticRegression, Perceptron
from lazypredict.Supervised import LazyClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from PIL import Image
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from lightgbm import LGBMClassifier
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier

In [None]:
columns = [f'pixel_{i}' for i in range(1296)]
columns.append('label')
df = pd.DataFrame(columns=columns)

car_folders = glob('/home/anuraaga/Documents/Projects/Project-PredthePrice/train_image/test_image/*')
car_folders

In [None]:
for folder in car_folders:
    car_images = glob(folder+'/*.jpg')
    for img in car_images:
        image = Image\
            .open(img)\
            .convert("L")\
            .resize((36,36), Image.ADAPTIVE)
        pixel_values = np.array(image).reshape(-1)
        normalized_pixel_values = pixel_values / 255
        row_data = np.concatenate([normalized_pixel_values, [folder.split('/')[-1]]])
        ndf = pd.DataFrame([row_data], columns=columns)
        df = pd.concat([df, ndf], ignore_index=True)   

In [None]:
for c in columns[:-1]:
    df[c] = df[c].astype('float64')
df['label'] = df['label'].astype(str)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.to_parquet('new_image_dataset_36pixels.parquet', index=False, engine='fastparquet')

Run from here 

In [None]:
df = pd.read_parquet('new_image_dataset_36pixels.parquet')

In [None]:
df.head()

In [None]:
models = pd.read_csv('newmodels_choice.csv') # used lazypredict to choice the preferred model 
models.head()

In [None]:
df['label'].value_counts()

data is pretty imbalanced so we need oversampling

In [None]:
X = df.drop('label', axis=1)
y = df['label']

In [None]:
import seaborn as sns
sns.histplot(y)

In [None]:
from imblearn.over_sampling import RandomOverSampler
roversamp = RandomOverSampler()
newX, newY = roversamp.fit_resample(X, y)

In [None]:
newY.value_counts()

In [None]:
import seaborn as sns
sns.histplot(newY);

In [None]:
newY = newY.replace({'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3})
newY.value_counts()

In [None]:
nX_train, nX_test, ny_train, ny_test = train_test_split(newX, newY, test_size=0.3, random_state=42)

In [None]:
from sklearn.svm import SVC
clf = SVC(decision_function_shape='ovo', probability=True)
clf.fit(nX_train, ny_train)
ny_pred = clf.predict(nX_test)

In [None]:
print(f'Training Accuracy: {clf.score(nX_train, ny_train)*100:.2f}%')
print(f'Testing Accuracy: {clf.score(nX_test, ny_test)*100:.2f}%')

In [None]:
ConfusionMatrixDisplay(confusion_matrix(ny_test, ny_pred)).plot()

In [None]:
print(classification_report(ny_test, ny_pred))

In [None]:
# save model
import skops.io as sio 
sio.dump(clf, 'car_detection_model_svc_balanced.skops')

testing images

In [None]:
imagepath = 'test_image/Hatchback/PHOTO_342.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
columns = [f'pixel_{i}' for i in range(1296)]

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
len(normalized_pixel_values)

In [None]:
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3

In [None]:
clf.predict(ndf)

In [None]:
imagepath = 'test_image/Pickup/PHOTO_342.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3

In [None]:
clf.predict(ndf)

In [None]:
imagepath = 'test_image/Seden/PHOTO_548.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3

In [None]:
clf.predict(ndf)

In [None]:
imagepath = 'test_image/SUV/PHOTO_123.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

In [None]:
clf.predict(ndf)

In [None]:
imagepath = 'checks/pickup.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3

In [None]:
clf.predict(ndf)

In [None]:
imagepath = 'pickup2.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3

In [None]:
clf.predict(ndf)

In [None]:
imagepath = 'checks/sedan.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3

In [None]:
clf.predict(ndf)

In [None]:
imagepath = 'checks/sedan2.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3

In [None]:
clf.predict(ndf)

In [None]:
imagepath = 'suv.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3

In [None]:
clf.predict(ndf)

In [None]:
imagepath = 'unknown.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3

In [None]:
clf.predict(ndf)

In [None]:
import skops.io as sio 
from PIL import Image
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd
columns = [f'pixel_{i}' for i in range(1296)]
clf = sio.load('/home/anuraaga/Documents/Projects/Project-PredthePrice/docker/models/car_detection_model_svc.skops', trusted=True)

In [None]:
imagepath = 'sedan3.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3

In [None]:
clf.predict_proba(ndf)

In [None]:
imagepath = 'unknown2.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3

In [None]:
clf.predict(ndf)

In [None]:
clf.predict_proba(ndf)

In [None]:
imagepath = 'unknown3.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3

In [None]:
clf.predict(ndf)

In [None]:
clf.predict_proba(ndf)

In [None]:
imagepath = 'unknown4.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3

In [None]:
clf.predict(ndf)

In [None]:
clf.predict_proba(ndf)

In [None]:
imagepath = 'unknown6.jpg'
image = Image.open(imagepath).convert("L")
resized_image = image.resize((36,36), Image.ADAPTIVE)
pixel_values = np.array(resized_image)
plt.imshow(pixel_values, cmap='gray')

In [None]:
normalized_pixel_values = (pixel_values).reshape(-1) / 255
ndf = pd.DataFrame([normalized_pixel_values], columns=columns)

'Hatchback':0, 'Pickup':1, 'Seden':2, 'SUV':3

In [None]:
clf.predict(ndf)