## Mount google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import csv
import joblib

In [None]:
root_path = './drive/MyDrive/ImageClassification/'
data_path = './drive/MyDrive/ImageClassification/data/'
resized_data_path = './drive/MyDrive/ImageClassification/resized_data/'

vehicle_list = ['car', 'bus', 'motorcycle']
data_array = []

new_dimensions = (256, 256)

for vehicle in vehicle_list:
    for img_name in os.listdir(data_path+vehicle):
        img_path = data_path + vehicle + '/' + img_name
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        resized_image = cv2.resize(img, new_dimensions, interpolation=cv2.INTER_AREA)
        output_path = resized_data_path + vehicle + '/' + img_name
        cv2.imwrite(output_path, resized_image)
        flat_img = resized_image.flatten()
        data_array.append([vehicle]+list(flat_img))

In [None]:
data = pd.DataFrame(data_array)
data.rename(columns={0:'Label'}, inplace=True)
data.to_csv(root_path + 'img_data.csv')
data.head()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
X = data.drop(['Label'],axis=1)
y = data['Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
print("Số  dữ liệu tập train: %i và tập test: %i" % (X_train.shape[0], X_test.shape[0]))

In [None]:
models = {}
models["DecisionTree"] = DecisionTreeClassifier()
models["RandomForest"] = RandomForestClassifier()
models["SVC"] = SVC()
models["LogReg"] = LogisticRegression()

accuracy = {}
for key, model in models.items():
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    accuracy[key] = round(accuracy_score(y_test, y_pred), 4) * 100
    joblib.dump(model, root_path + f'{key}.pkl')

report = pd.DataFrame(index=models.keys(), columns=['Accuracy (%)'])
report['Accuracy (%)'] = accuracy.values()
print("\nĐộ chính xác các mô hình phân loại")
display(report)

In [None]:
svc_model = SVC()

# Lưới các tham số  cho SVC
svc_para_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf', 'poly']
}

kf = KFold(n_splits=5, shuffle=True)
grid_search = GridSearchCV(
    svc_model,
    svc_para_grid,
    cv=kf,
    scoring='accuracy')
grid_search.fit(X_train, y_train)

In [None]:
print('Mô hình SVC tốt nhất')
display(grid_search.best_estimator_)
print('Độ chính xác của mô hình', grid_search.best_score_)

In [None]:
joblib.dump(grid_search.best_estimator_, root_path + 'SVC_best_score.pkl')