In [12]:
# pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.3.1-cp38-cp38-win_amd64.whl (9.3 MB)
     ---------------------------------------- 9.3/9.3 MB 7.0 MB/s eta 0:00:00
Collecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-3.2.0-py3-none-any.whl (15 kB)
Installing collected packages: threadpoolctl, scikit-learn
Successfully installed scikit-learn-1.3.1 threadpoolctl-3.2.0
Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import numpy as np
import cv2
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
directory = "TImages/"

In [3]:
image_size = (244, 244)
classes = ["0", "1", "2"]

In [4]:
# Create an ImageDataGenerator instance with data augmentation settings
datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest',
    samplewise_center=True,
    samplewise_std_normalization=True,
)

In [5]:
training_data = []

def create_training_data():
    for category in classes:
        path = os.path.join(directory, category)
        class_num = classes.index(category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img))
                new_array = cv2.resize(img_array, image_size)

                # Generate and store augmented images
                augmented_images = []
                augmented_images.append(new_array)  # Original image
                img_array_aug = new_array.reshape((1,) + new_array.shape)
                i = 0
                for batch in datagen.flow(img_array_aug, batch_size=1):
                    augmented_images.append(batch[0])
                    i += 1
                    if i >= 5:  # Generate 5 augmented images per input image
                        break

                for augmented_image in augmented_images:
                    image_hsv = cv2.cvtColor(augmented_image, cv2.COLOR_BGR2HSV)
                    training_data.append([image_hsv, class_num])

            except Exception as e:
                pass

In [6]:
create_training_data()

In [7]:
lenofimage = len(training_data)
print(lenofimage)

5850


In [10]:
X=[]
y=[]

for categories, label in training_data:
    X.append(categories)
    y.append(label)
X= np.array(X).reshape(lenofimage,-1)

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, )

In [15]:
## SVM MODEL

In [16]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [17]:
# Feature Scaling
sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [18]:
# Hyperparameter Tuning using GridSearchCV
# param_grid = {'C': [0.1, 1, 10],
#               'kernel': ['linear', 'rbf', 'poly'],
#               'gamma': ['scale', 'auto', 0.1, 1, 10]
#               }
param_grid = {'C': [0.1 ,1, 5],
              'kernel': ['rbf', 'linear'],
              'gamma': ['scale']
#               'gamma': ['auto', 0.1, 1]
              }

In [19]:
# Train the SVM model
svm = SVC()
grid_search = GridSearchCV(svm, param_grid, cv=5, verbose=2)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=18.5min
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=22.3min
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=20.2min
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=19.1min
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=20.5min
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time= 7.0min
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time= 7.4min
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time= 7.9min
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time= 7.4min
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time= 7.8min
[CV] END .......................C=1, gamma=scale, kernel=rbf; total time=22.3min
[CV] END .......................C=1, gamma=scale,

In [20]:
# Get the best estimator and evaluate it
best_svm = grid_search.best_estimator_
y_test_pred = best_svm.predict(X_test)
accuracy = accuracy_score(y_test, y_test_pred)
print("Best SVM Parameters:", grid_search.best_params_)
print("Accuracy: {:.2f}%".format(accuracy * 100))

Best SVM Parameters: {'C': 5, 'gamma': 'scale', 'kernel': 'rbf'}
Accuracy: 94.70%


In [None]:
# work progressing...........


In [21]:
grid_search.cv_results_

{'mean_fit_time': array([552.6403183 , 377.35660434, 547.61807342, 367.47596931,
        557.7248826 , 659.78211789]),
 'std_fit_time': array([ 56.06954681,  15.05308075,  20.95909106,  18.79557445,
         42.06761957, 457.44229012]),
 'mean_score_time': array([654.92776756,  72.74788489, 745.93940368,  72.1356226 ,
        715.85854254, 137.85519862]),
 'std_score_time': array([32.14167496,  5.73342063, 56.43992296,  9.60856922, 51.61624563,
        83.43203638]),
 'param_C': masked_array(data=[0.1, 0.1, 1, 1, 5, 5],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_gamma': masked_array(data=['scale', 'scale', 'scale', 'scale', 'scale', 'scale'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
   

In [22]:
df = pd.DataFrame(grid_search.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,552.640318,56.069547,654.927768,32.141675,0.1,scale,rbf,"{'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}",0.935897,0.935897,0.935897,0.935897,0.934829,0.935684,0.000427,6
1,377.356604,15.053081,72.747885,5.733421,0.1,scale,linear,"{'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}",0.938034,0.938034,0.944444,0.945513,0.952991,0.943803,0.005556,2
2,547.618073,20.959091,745.939404,56.439923,1.0,scale,rbf,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}",0.935897,0.936966,0.936966,0.940171,0.934829,0.936966,0.001788,5
3,367.475969,18.795574,72.135623,9.608569,1.0,scale,linear,"{'C': 1, 'gamma': 'scale', 'kernel': 'linear'}",0.938034,0.938034,0.944444,0.945513,0.952991,0.943803,0.005556,2
4,557.724883,42.06762,715.858543,51.616246,5.0,scale,rbf,"{'C': 5, 'gamma': 'scale', 'kernel': 'rbf'}",0.948718,0.945513,0.955128,0.951923,0.955128,0.951282,0.003738,1
5,659.782118,457.44229,137.855199,83.432036,5.0,scale,linear,"{'C': 5, 'gamma': 'scale', 'kernel': 'linear'}",0.938034,0.938034,0.944444,0.945513,0.952991,0.943803,0.005556,2


In [23]:
df[['param_C', 'param_kernel','params',  'mean_test_score', 'rank_test_score']]

Unnamed: 0,param_C,param_kernel,params,mean_test_score,rank_test_score
0,0.1,rbf,"{'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}",0.935684,6
1,0.1,linear,"{'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}",0.943803,2
2,1.0,rbf,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}",0.936966,5
3,1.0,linear,"{'C': 1, 'gamma': 'scale', 'kernel': 'linear'}",0.943803,2
4,5.0,rbf,"{'C': 5, 'gamma': 'scale', 'kernel': 'rbf'}",0.951282,1
5,5.0,linear,"{'C': 5, 'gamma': 'scale', 'kernel': 'linear'}",0.943803,2


In [24]:
grid_search.best_score_

0.9512820512820512

In [25]:
grid_search.best_params_

{'C': 5, 'gamma': 'scale', 'kernel': 'rbf'}