In [1]:
import os
import numpy as np
import cv2
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
directory = "TImages/"

In [3]:
image_size = (244, 244)
classes = ["0", "1", "2"]

In [4]:
# Create an ImageDataGenerator instance with data augmentation settings
datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
)

In [5]:
training_data = []

def create_training_data():
    for category in classes:
        path = os.path.join(directory, category)
        class_num = classes.index(category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img))
                new_array = cv2.resize(img_array, image_size)

                # Generate and store augmented images
                augmented_images = []
                augmented_images.append(new_array)  # Original image
                img_array_aug = new_array.reshape((1,) + new_array.shape)
                i = 0
                for batch in datagen.flow(img_array_aug, batch_size=1):
                    augmented_images.append(batch[0])
                    i += 1
                    if i >= 3:  # Generate 3 augmented images per input image
                        break

                for augmented_image in augmented_images:
                    image_hsv = cv2.cvtColor(augmented_image, cv2.COLOR_BGR2HSV)
                    training_data.append([image_hsv, class_num])

            except Exception as e:
                pass

In [6]:
create_training_data()

In [7]:
lenofimage = len(training_data)
print(lenofimage)

3900


In [8]:
# Shuffle the training data to ensure randomness
np.random.shuffle(training_data)

In [9]:
# Separate images (X) and labels (y)
X = []
y = []

for features, label in training_data:
    X.append(features)
    y.append(label)

# Convert lists to numpy arrays
X = np.array(X)
y = np.array(y)

In [10]:
# Print the shapes to verify
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

Shape of X: (3900, 244, 244, 3)
Shape of y: (3900,)


In [11]:
from sklearn.model_selection import train_test_split

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
X_train_reshaped = X_train.reshape(len(X_train), -1)  # Flatten each image
X_test_reshaped = X_test.reshape(len(X_test), -1)
X_train_reshaped.shape

(3120, 178608)

In [14]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

In [15]:
# Feature Scaling
sc = StandardScaler()

X_train_fs = sc.fit_transform(X_train_reshaped)
X_test_fs = sc.transform(X_test_reshaped)


In [16]:
## NAIVE BAYES

In [17]:
from sklearn.naive_bayes import GaussianNB

In [18]:
# Define the parameter grid for hyperparameter tuning
param_grid = {
    'var_smoothing': [1e-5, 1e-9] # You can adjust this range
    
}

In [19]:
nb = GaussianNB()

In [20]:
# Create the GridSearch object without cross-validation
grid_search = GridSearchCV(nb, param_grid, cv=5, verbose=2)  # Set cv=None for no cross-validation

In [21]:
grid_search.class_prior_ = [0.5, 0.5]

In [22]:
# Fit the GridSearch object to your data
grid_search.fit(X_train_fs, y_train)


Fitting 5 folds for each of 2 candidates, totalling 10 fits
[CV] END ................................var_smoothing=1e-05; total time=   5.8s
[CV] END ................................var_smoothing=1e-05; total time=   6.1s
[CV] END ................................var_smoothing=1e-05; total time=   5.9s
[CV] END ................................var_smoothing=1e-05; total time=   7.0s
[CV] END ................................var_smoothing=1e-05; total time=   6.3s
[CV] END ................................var_smoothing=1e-09; total time=   6.6s
[CV] END ................................var_smoothing=1e-09; total time=   6.5s
[CV] END ................................var_smoothing=1e-09; total time=   6.2s
[CV] END ................................var_smoothing=1e-09; total time=   6.6s
[CV] END ................................var_smoothing=1e-09; total time=   6.9s


In [23]:
# Get the best estimator and evaluate it
best_nb = grid_search.best_estimator_
y_test_pred_nb = best_nb.predict(X_test_fs)
accuracy_nb = accuracy_score(y_test, y_test_pred_nb)
print("Best Gaussian Naive Bayes Parameters:", grid_search.best_params_)
print("Accuracy: {:.2f}%".format(accuracy_nb * 100))

Best Gaussian Naive Bayes Parameters: {'var_smoothing': 1e-05}
Accuracy: 24.10%


In [24]:
## for SVM

In [25]:
from sklearn.svm import SVC

In [26]:
# Hyperparameter Tuning using GridSearchCV
param_grid_svm = {'C': [5], 'gamma': ['auto'], 'kernel': ['rbf']}

In [27]:
# Train the SVM model
svm = SVC()
grid_search = GridSearchCV(svm, param_grid_svm, cv=5, verbose=2)
grid_search.fit(X_train_fs, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] END ........................C=5, gamma=auto, kernel=rbf; total time=27.1min
[CV] END ........................C=5, gamma=auto, kernel=rbf; total time=16.8min
[CV] END ........................C=5, gamma=auto, kernel=rbf; total time=11.8min
[CV] END ........................C=5, gamma=auto, kernel=rbf; total time=17.0min
[CV] END ........................C=5, gamma=auto, kernel=rbf; total time=12.5min


In [28]:
# # Get the best estimator and evaluate it
best_svm = grid_search.best_estimator_
y_test_pred_svm = best_svm.predict(X_test_fs)
accuracy_svm = accuracy_score(y_test, y_test_pred_svm)
print("Best SVM Parameters:", grid_search.best_params_)
print("Accuracy: {:.2f}%".format(accuracy_svm * 100))

Best SVM Parameters: {'C': 5, 'gamma': 'auto', 'kernel': 'rbf'}
Accuracy: 95.26%


In [29]:
## for KNN

In [30]:
from sklearn.neighbors import KNeighborsClassifier

In [31]:
# Define the parameter grid for hyperparameter tuning
param_grid = {
    'n_neighbors': [3, 5]  # You can adjust this range
}

In [32]:
# Create the KNN model
knn = KNeighborsClassifier(n_neighbors = 5, weights='uniform', p = 2, algorithm='auto') # metric = 'minkowski',

In [33]:
# Create the GridSearchCV object
grid_search = GridSearchCV(knn, param_grid, cv=5, verbose=2)

# Fit the GridSearchCV object to your data
grid_search.fit(X_train_fs, y_train)

Fitting 5 folds for each of 2 candidates, totalling 10 fits
[CV] END ......................................n_neighbors=3; total time=  12.1s
[CV] END ......................................n_neighbors=3; total time=  12.0s
[CV] END ......................................n_neighbors=3; total time=  11.5s
[CV] END ......................................n_neighbors=3; total time=  12.2s
[CV] END ......................................n_neighbors=3; total time=  11.3s
[CV] END ......................................n_neighbors=5; total time=  11.4s
[CV] END ......................................n_neighbors=5; total time=  11.3s
[CV] END ......................................n_neighbors=5; total time=  11.2s
[CV] END ......................................n_neighbors=5; total time=  10.8s
[CV] END ......................................n_neighbors=5; total time=  10.8s


In [34]:
# Get the best estimator and evaluate it
best_knn = grid_search.best_estimator_
y_test_pred_knn = best_knn.predict(X_test_fs)
accuracy_knn = accuracy_score(y_test, y_test_pred_knn)
print("Best KNN Parameters:", grid_search.best_params_)
print("Accuracy: {:.2f}%".format(accuracy_knn * 100))

Best KNN Parameters: {'n_neighbors': 5}
Accuracy: 94.49%


In [35]:
# GRADIENT BOOSTING

In [36]:
from sklearn.ensemble import GradientBoostingClassifier

In [37]:
# Hyperparameter Tuning using GridSearchCV
param_grid = {'n_estimators': [100, 150],
              'learning_rate': [0.1],
              'max_depth': [5],
              'max_features': ['log2'],
              'loss': ['log_loss'],
              'subsample': [0.5, 1]
            }

In [38]:
gb = GradientBoostingClassifier()

In [39]:
# Create the GridSearch object without cross-validation
grid_search = GridSearchCV(gb, param_grid, cv=5, verbose=2, error_score='raise')  # Set cv=None for no cross-validation

# Fit the GridSearch object to your data
grid_search.fit(X_train_fs, y_train)

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] END learning_rate=0.1, loss=log_loss, max_depth=5, max_features=log2, n_estimators=100, subsample=0.5; total time=   7.4s
[CV] END learning_rate=0.1, loss=log_loss, max_depth=5, max_features=log2, n_estimators=100, subsample=0.5; total time=   7.0s
[CV] END learning_rate=0.1, loss=log_loss, max_depth=5, max_features=log2, n_estimators=100, subsample=0.5; total time=   6.7s
[CV] END learning_rate=0.1, loss=log_loss, max_depth=5, max_features=log2, n_estimators=100, subsample=0.5; total time=   7.3s
[CV] END learning_rate=0.1, loss=log_loss, max_depth=5, max_features=log2, n_estimators=100, subsample=0.5; total time=   6.8s
[CV] END learning_rate=0.1, loss=log_loss, max_depth=5, max_features=log2, n_estimators=100, subsample=1; total time=  10.7s
[CV] END learning_rate=0.1, loss=log_loss, max_depth=5, max_features=log2, n_estimators=100, subsample=1; total time=  10.4s
[CV] END learning_rate=0.1, loss=log_loss, max_depth=5,

In [40]:
# Get the best estimator and evaluate it
best_gb = grid_search.best_estimator_
y_test_pred_gb = best_gb.predict(X_test_fs)
accuracy_gb = accuracy_score(y_test, y_test_pred_gb)
print("Best Gradient Boosting Parameters:", grid_search.best_params_)
print("Accuracy: {:.2f}%".format(accuracy_gb * 100))

Best Gradient Boosting Parameters: {'learning_rate': 0.1, 'loss': 'log_loss', 'max_depth': 5, 'max_features': 'log2', 'n_estimators': 150, 'subsample': 1}
Accuracy: 96.54%


In [41]:
## RANDOM FOREST

In [42]:
from sklearn.ensemble import RandomForestClassifier

In [43]:
param_grid = {
    'n_estimators': [80, 100, 120],
    'max_depth': [None, 20, 25],
    'min_samples_split': [5, 10],
    'min_samples_leaf': [2, 4],
    'max_features': [ 'sqrt', 'log2'],
    'bootstrap': [False]
}


In [44]:
# Create the Random Forest model
rf = RandomForestClassifier()

In [45]:
# Create the GridSearchCV object
grid_search = GridSearchCV(rf, param_grid, cv=5, verbose=2, error_score='raise')

In [46]:
# Fit the GridSearchCV object to your data
grid_search.fit(X_train_fs, y_train)

Fitting 5 folds for each of 72 candidates, totalling 360 fits
[CV] END bootstrap=False, max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 6.2min
[CV] END bootstrap=False, max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 4.2min
[CV] END bootstrap=False, max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 3.9min
[CV] END bootstrap=False, max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 7.6min
[CV] END bootstrap=False, max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 4.0min
[CV] END bootstrap=False, max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time= 4.4min
[CV] END bootstrap=False, max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estim

[CV] END bootstrap=False, max_depth=None, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=120; total time= 3.3min
[CV] END bootstrap=False, max_depth=None, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.6s
[CV] END bootstrap=False, max_depth=None, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.9s
[CV] END bootstrap=False, max_depth=None, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.6s
[CV] END bootstrap=False, max_depth=None, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.6s
[CV] END bootstrap=False, max_depth=None, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.3s
[CV] END bootstrap=False, max_depth=None, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   5.2s
[CV] END bootstrap=False, max_d

[CV] END bootstrap=False, max_depth=None, max_features=log2, min_samples_leaf=4, min_samples_split=10, n_estimators=120; total time=   6.0s
[CV] END bootstrap=False, max_depth=None, max_features=log2, min_samples_leaf=4, min_samples_split=10, n_estimators=120; total time=   5.9s
[CV] END bootstrap=False, max_depth=20, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 1.5min
[CV] END bootstrap=False, max_depth=20, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 1.5min
[CV] END bootstrap=False, max_depth=20, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 1.5min
[CV] END bootstrap=False, max_depth=20, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 1.5min
[CV] END bootstrap=False, max_depth=20, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 1.4min
[CV] END bootstrap=False, max_depth=20, 

[CV] END bootstrap=False, max_depth=20, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=120; total time= 2.1min
[CV] END bootstrap=False, max_depth=20, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=120; total time= 2.1min
[CV] END bootstrap=False, max_depth=20, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.2s
[CV] END bootstrap=False, max_depth=20, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.3s
[CV] END bootstrap=False, max_depth=20, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.3s
[CV] END bootstrap=False, max_depth=20, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.3s
[CV] END bootstrap=False, max_depth=20, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.4s
[CV] END bootstrap=False, max_depth=20, max_

[CV] END bootstrap=False, max_depth=20, max_features=log2, min_samples_leaf=4, min_samples_split=10, n_estimators=120; total time=   5.8s
[CV] END bootstrap=False, max_depth=20, max_features=log2, min_samples_leaf=4, min_samples_split=10, n_estimators=120; total time=   5.8s
[CV] END bootstrap=False, max_depth=25, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 1.7min
[CV] END bootstrap=False, max_depth=25, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 1.8min
[CV] END bootstrap=False, max_depth=25, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 1.8min
[CV] END bootstrap=False, max_depth=25, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 1.8min
[CV] END bootstrap=False, max_depth=25, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time= 1.8min
[CV] END bootstrap=False, max_depth=25, max_

[CV] END bootstrap=False, max_depth=25, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=120; total time= 2.7min
[CV] END bootstrap=False, max_depth=25, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=120; total time= 2.5min
[CV] END bootstrap=False, max_depth=25, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.2s
[CV] END bootstrap=False, max_depth=25, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.4s
[CV] END bootstrap=False, max_depth=25, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.2s
[CV] END bootstrap=False, max_depth=25, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.2s
[CV] END bootstrap=False, max_depth=25, max_features=log2, min_samples_leaf=2, min_samples_split=5, n_estimators=80; total time=   4.3s
[CV] END bootstrap=False, max_depth=25, max_

[CV] END bootstrap=False, max_depth=25, max_features=log2, min_samples_leaf=4, min_samples_split=10, n_estimators=120; total time=   6.1s
[CV] END bootstrap=False, max_depth=25, max_features=log2, min_samples_leaf=4, min_samples_split=10, n_estimators=120; total time=   6.0s


In [47]:
# Get the best estimator and evaluate it
best_rf = grid_search.best_estimator_
y_test_pred_rf = best_rf.predict(X_test_fs)
accuracy_rf = accuracy_score(y_test, y_test_pred_rf)
print("Best Random Forest Parameters:", grid_search.best_params_)
print("Accuracy: {:.2f}%".format(accuracy_rf * 100))

Best Random Forest Parameters: {'bootstrap': False, 'max_depth': None, 'max_features': 'log2', 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Accuracy: 96.67%


In [None]:
# import numpy as np
# import matplotlib.pyplot as plt

# # List of model names (e.g., model names or labels for each model)
# model_names = ["Naive Bayes", "SVM", "KNN", "Random Forest", "Gradient Boosting"]

# # List of training accuracy scores corresponding to each model
# train_accuracy_scores = [
#     accuracy_score(y_train, best_nb.predict(X_train_fs)),
#     accuracy_score(y_train, best_svm.predict(X_train_fs)),
#     accuracy_score(y_train, best_knn.predict(X_train_fs)),
#     accuracy_score(y_train, best_rf.predict(X_train_fs)),
#     accuracy_score(y_train, best_gb.predict(X_train_fs)),    
#    ]

# # List of test accuracy scores corresponding to each model
# test_accuracy_scores = [
#     (y_test,y_test_pred_nb),
#     (y_test,y_test_pred_svm),
#     (y_test,y_test_pred_knn),
#     (y_test,y_test_pred_rf),
#     (y_test,y_test_pred_gb),
#   ]

# # Set the width of each bar
# bar_width = 0.35

# # Generate an array of indices for the x-axis positions of bars
# indices = np.arange(len(model_names))

# # Create subplots for training and test accuracy
# plt.figure(figsize=(10, 6))

# # Training accuracy subplot
# plt.bar(indices - bar_width/2, train_accuracy_scores, bar_width, label='Train', color='skyblue')

# # Test accuracy subplot
# plt.bar(indices + bar_width/2, test_accuracy_scores, bar_width, label='Test', color='lightcoral')

# # Set x-axis labels and tick positions
# plt.xticks(indices, model_names, rotation=45, ha='right')

# # Set axis labels and title
# plt.xlabel('Models')
# plt.ylabel('Accuracy')
# plt.title('Training and Test Accuracy for Different Models')

# # Add a legend
# plt.legend(loc='upper right')

# # Show the plot
# plt.tight_layout()
# plt.show()
