In [1]:
import os
import numpy as np
import cv2
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
directory = "TImages/"

In [3]:
image_size = (244, 244)
classes = ["0", "1", "2"]

In [4]:
# Create an ImageDataGenerator instance with data augmentation settings
datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
)

In [5]:
training_data = []

def create_training_data():
    for category in classes:
        path = os.path.join(directory, category)
        class_num = classes.index(category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img))
                new_array = cv2.resize(img_array, image_size)

                # Generate and store augmented images
                augmented_images = []
                augmented_images.append(new_array)  # Original image
                img_array_aug = new_array.reshape((1,) + new_array.shape)
                i = 0
                for batch in datagen.flow(img_array_aug, batch_size=1):
                    augmented_images.append(batch[0])
                    i += 1
                    if i >= 3:  # Generate 3 augmented images per input image
                        break

                for augmented_image in augmented_images:
                    image_hsv = cv2.cvtColor(augmented_image, cv2.COLOR_BGR2HSV)
                    training_data.append([image_hsv, class_num])

            except Exception as e:
                pass

In [6]:
create_training_data()

In [7]:
lenofimage = len(training_data)
print(lenofimage)

3900


In [8]:
# Shuffle the training data to ensure randomness
np.random.shuffle(training_data)

In [9]:
# Separate images (X) and labels (y)
X = []
y = []

for features, label in training_data:
    X.append(features)
    y.append(label)

# Convert lists to numpy arrays
X = np.array(X)
y = np.array(y)

In [10]:
X = X / 255.0

In [11]:
# Print the shapes to verify
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

Shape of X: (3900, 244, 244, 3)
Shape of y: (3900,)


In [32]:
from sklearn.model_selection import train_test_split

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [34]:
# X_train_reshaped = X_train.reshape(len(X_train), -1)  # Flatten each image
# X_test_reshaped = X_test.reshape(len(X_test), -1)
# X_test_reshaped.shape

In [35]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model

In [36]:
# Load a pre-trained CNN model (e.g., VGG16) and remove the top classification layer.
base_model = VGG16(weights='imagenet', include_top=False)
model = Model(inputs=base_model.input, outputs=base_model.get_layer('block5_pool').output)

In [37]:
# Use the CNN to extract features from your images.
def extract_features(X):
    X = tf.keras.applications.vgg16.preprocess_input(X)
    features = model.predict(X)
    return features

In [38]:
# Extract features from both training and test sets
X_train_features = extract_features(X_train)
X_test_features = extract_features(X_test)



In [39]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score


In [43]:
X_train_reshaped = X_train_features.reshape(len(X_train_features), -1)  # Flatten each image
X_test_reshaped = X_test_features.reshape(len(X_test_features), -1)
X_train_reshaped.shape

(3120, 25088)

In [44]:
# Feature Scaling
sc = StandardScaler()

X_train_fs = sc.fit_transform(X_train_reshaped)
X_test_fs = sc.transform(X_test_reshaped)

In [45]:
## NAIVE BAYES

In [46]:
from sklearn.naive_bayes import GaussianNB

In [47]:
# Define the parameter grid for hyperparameter tuning
param_grid = {
    'var_smoothing': [1e-9] # You can adjust this range
    
}

In [48]:
nb = GaussianNB()

In [49]:
# Create the GridSearch object without cross-validation
grid_search_nb = GridSearchCV(nb, param_grid, cv=5, verbose=2)  # Set cv=None for no cross-validation

In [50]:
grid_search_nb.class_prior_ = [0.5, 0.5]

In [51]:
# Fit the GridSearch object to your data
grid_search_nb_train = grid_search_nb.fit(X_train_fs, y_train)


Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] END ................................var_smoothing=1e-09; total time=   0.7s
[CV] END ................................var_smoothing=1e-09; total time=   0.7s
[CV] END ................................var_smoothing=1e-09; total time=   0.8s
[CV] END ................................var_smoothing=1e-09; total time=   0.8s
[CV] END ................................var_smoothing=1e-09; total time=   0.8s


In [52]:
# Get the best estimator and evaluate it
best_nb = grid_search_nb.best_estimator_
y_test_pred_nb = best_nb.predict(X_test_fs)
accuracy_nb = accuracy_score(y_test, y_test_pred_nb)
print("Best Gaussian Naive Bayes Parameters:", grid_search_nb.best_params_)
print("Accuracy: {:.2f}%".format(accuracy_nb * 100))

Best Gaussian Naive Bayes Parameters: {'var_smoothing': 1e-09}
Accuracy: 58.33%


In [53]:
## for SVM

In [54]:
from sklearn.svm import SVC

In [55]:
# Hyperparameter Tuning using GridSearchCV
param_grid_svm = {'C': [5], 'gamma': ['auto'], 'kernel': ['rbf']}

In [56]:
# Train the SVM model
svm = SVC()
grid_search_svm = GridSearchCV(svm, param_grid_svm, cv=5, verbose=2)
grid_search_svm_train = grid_search_svm.fit(X_train_fs, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] END ........................C=5, gamma=auto, kernel=rbf; total time=  28.0s
[CV] END ........................C=5, gamma=auto, kernel=rbf; total time=  28.8s
[CV] END ........................C=5, gamma=auto, kernel=rbf; total time=  29.3s
[CV] END ........................C=5, gamma=auto, kernel=rbf; total time=  28.1s
[CV] END ........................C=5, gamma=auto, kernel=rbf; total time=  31.5s


In [57]:
# # Get the best estimator and evaluate it
best_svm = grid_search_svm.best_estimator_
y_test_pred_svm = best_svm.predict(X_test_fs)
accuracy_svm = accuracy_score(y_test, y_test_pred_svm)
print("Best SVM Parameters:", grid_search_svm.best_params_)
print("Accuracy: {:.2f}%".format(accuracy_svm * 100))

Best SVM Parameters: {'C': 5, 'gamma': 'auto', 'kernel': 'rbf'}
Accuracy: 92.44%


In [58]:
## for KNN

In [59]:
from sklearn.neighbors import KNeighborsClassifier

In [60]:
# Define the parameter grid for hyperparameter tuning
param_grid = {
    'n_neighbors': [5]  # You can adjust this range
}

In [61]:
# Create the KNN model
knn = KNeighborsClassifier(n_neighbors = 5, weights='uniform', p = 2, algorithm='auto') # metric = 'minkowski',

In [62]:
# Create the GridSearchCV object
grid_search_knn = GridSearchCV(knn, param_grid, cv=5, verbose=2)

# Fit the GridSearchCV object to your data
grid_search_knn_train = grid_search_knn.fit(X_train_fs, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] END ......................................n_neighbors=5; total time=   1.2s
[CV] END ......................................n_neighbors=5; total time=   1.1s
[CV] END ......................................n_neighbors=5; total time=   1.1s
[CV] END ......................................n_neighbors=5; total time=   1.1s
[CV] END ......................................n_neighbors=5; total time=   1.1s


In [63]:
# Get the best estimator and evaluate it
best_knn = grid_search_knn.best_estimator_
y_test_pred_knn = best_knn.predict(X_test_fs)
accuracy_knn = accuracy_score(y_test, y_test_pred_knn)
print("Best KNN Parameters:", grid_search_knn.best_params_)
print("Accuracy: {:.2f}%".format(accuracy_knn * 100))

Best KNN Parameters: {'n_neighbors': 5}
Accuracy: 93.59%


In [64]:
# GRADIENT BOOSTING

In [65]:
from sklearn.ensemble import GradientBoostingClassifier

In [66]:
# Hyperparameter Tuning using GridSearchCV
param_grid = {'n_estimators': [150],
              'learning_rate': [0.1],
              'max_depth': [5],
              'max_features': ['log2'],
              'loss': ['log_loss'],
              'subsample': [1]
            }

In [67]:
gb = GradientBoostingClassifier()

In [68]:
# Create the GridSearch object without cross-validation
grid_search_gb = GridSearchCV(gb, param_grid, cv=5, verbose=2, error_score='raise')  # Set cv=None for no cross-validation

# Fit the GridSearch object to your data
grid_search_gb_train = grid_search_gb.fit(X_train_fs, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] END learning_rate=0.1, loss=log_loss, max_depth=5, max_features=log2, n_estimators=150, subsample=1; total time=   6.4s
[CV] END learning_rate=0.1, loss=log_loss, max_depth=5, max_features=log2, n_estimators=150, subsample=1; total time=   6.4s
[CV] END learning_rate=0.1, loss=log_loss, max_depth=5, max_features=log2, n_estimators=150, subsample=1; total time=   6.2s
[CV] END learning_rate=0.1, loss=log_loss, max_depth=5, max_features=log2, n_estimators=150, subsample=1; total time=   6.5s
[CV] END learning_rate=0.1, loss=log_loss, max_depth=5, max_features=log2, n_estimators=150, subsample=1; total time=   6.3s


In [69]:
# Get the best estimator and evaluate it
best_gb = grid_search_gb.best_estimator_
y_test_pred_gb = best_gb.predict(X_test_fs)
accuracy_gb = accuracy_score(y_test, y_test_pred_gb)
print("Best Gradient Boosting Parameters:", grid_search_gb.best_params_)
print("Accuracy: {:.2f}%".format(accuracy_gb * 100))

Best Gradient Boosting Parameters: {'learning_rate': 0.1, 'loss': 'log_loss', 'max_depth': 5, 'max_features': 'log2', 'n_estimators': 150, 'subsample': 1}
Accuracy: 92.82%


In [70]:
## RANDOM FOREST

In [71]:
from sklearn.ensemble import RandomForestClassifier

In [72]:
param_grid = {
    'n_estimators': [100],
    'max_depth': [None],
    'min_samples_split': [2],
    'min_samples_leaf': [1],
    'max_features': ['log2'],
    'bootstrap': [False]
}


In [73]:
# Create the Random Forest model
rf = RandomForestClassifier()

In [74]:
# Create the GridSearchCV object
grid_search_rf = GridSearchCV(rf, param_grid, cv=5, verbose=2, error_score='raise')

In [75]:
# Fit the GridSearchCV object to your data
grid_search_rf_train = grid_search_rf.fit(X_train_fs, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] END bootstrap=False, max_depth=None, max_features=log2, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   4.7s
[CV] END bootstrap=False, max_depth=None, max_features=log2, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   4.8s
[CV] END bootstrap=False, max_depth=None, max_features=log2, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   4.1s
[CV] END bootstrap=False, max_depth=None, max_features=log2, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   5.0s
[CV] END bootstrap=False, max_depth=None, max_features=log2, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   4.0s


In [76]:
# Get the best estimator and evaluate it
best_rf = grid_search_rf.best_estimator_
y_test_pred_rf = best_rf.predict(X_test_fs)
accuracy_rf = accuracy_score(y_test, y_test_pred_rf)
print("Best Random Forest Parameters:", grid_search_rf.best_params_)
print("Accuracy: {:.2f}%".format(accuracy_rf * 100))

Best Random Forest Parameters: {'bootstrap': False, 'max_depth': None, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Accuracy: 92.95%


In [81]:
train_list = [grid_search_nb_train.best_score_, 
              grid_search_svm_train.best_score_,
              grid_search_knn_train.best_score_,
              grid_search_gb_train.best_score_,
              grid_search_rf_train.best_score_,            
             ]
test_list = [accuracy_nb, accuracy_svm, accuracy_knn, accuracy_gb, accuracy_rf]
names = ['Naive Bayes', 'SVM', 'KNN', 'Gradient Boosting', 'Random Forest']

train_list = pd.DataFrame(train_list)
test_list = pd.DataFrame(test_list)
names = pd.DataFrame(names)

train_list.columns = ['train']
test_list.columns = ['test']
names.columns = ['names']


over_all_score = pd.concat([names, train_list, test_list], axis=1)
over_all_score

Unnamed: 0,names,train,test
0,Naive Bayes,0.501923,0.583333
1,SVM,0.936218,0.924359
2,KNN,0.936859,0.935897
3,Gradient Boosting,0.939423,0.928205
4,Random Forest,0.936538,0.929487


In [None]:
# from sklearn.metrics import classification_report
# import matplotlib.pyplot as plt
# import numpy as np

# # Assuming you have classification reports for each model
# classification_reports = {
#     "Naive Bayes": classification_report(y_test, y_test_pred_nb, output_dict=True),
#     "SVM": classification_report(y_test, y_test_pred_svm, output_dict=True),
#     "KNN": classification_report(y_test, y_test_pred_knn, output_dict=True),
#     "Gradient Boosting": classification_report(y_test, y_test_pred_gb, output_dict=True),
#     "Random Forest": classification_report(y_test, y_test_pred_rf, output_dict=True)
# }

# # Define the classes (modify this according to your classes)
# classes = ["0", "1", "2"]

# # Define the metrics you want to display (e.g., precision, recall, f1-score)
# metrics = ["precision", "recall", "f1-score"]

# # Create a separate line plot for each class and each metric
# for class_name in classes:
#     plt.figure(figsize=(10, 6))

#     for metric in metrics:
#         scores = [report[class_name][metric] for report in classification_reports.values()]
#         model_names = list(classification_reports.keys())

#         plt.plot(model_names, scores, marker='o', label=metric.capitalize())

#     plt.xlabel('Models')
#     plt.ylabel('Score')
#     plt.title(f'Scores for {class_name}')
#     plt.legend()
#     plt.grid(True)
#     plt.xticks(rotation=45)
    
#     # Save the graph as an image (optional)
#     plt.savefig(f'{model_name}_{metric}_{class_name}.png')

#     # Show the plot
#     plt.tight_layout()
#     plt.show()


In [77]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score
import seaborn as sns

In [None]:
# import numpy as np
# import matplotlib.pyplot as plt
# from sklearn.metrics import confusion_matrix
# import itertools

# # Assuming you have classification reports for each model
# classification_reports = {
#     "Naive Bayes": classification_report(y_test, y_test_pred_nb, output_dict=True),
#     "SVM": classification_report(y_test, y_test_pred_svm, output_dict=True),
#     "KNN": classification_report(y_test, y_test_pred_knn, output_dict=True),
#     "Gradient Boosting": classification_report(y_test, y_test_pred_gb, output_dict=True),
#     "Random Forest": classification_report(y_test, y_test_pred_rf, output_dict=True)
# }

# # Define class labels (modify according to your classes)
# class_names = ["0", "1", "2"]  # Modify this according to your classes

# # Create a function to plot a confusion matrix
# def plot_confusion_matrix(confusion, classes, title='Confusion matrix', cmap=plt.cm.Blues):
#     plt.imshow(confusion, interpolation='nearest', cmap=cmap)
#     plt.title(title)
#     plt.colorbar()
#     tick_marks = np.arange(len(classes))
#     plt.xticks(tick_marks, classes, rotation=45)
#     plt.yticks(tick_marks, classes)
#     fmt = 'd'
#     thresh = confusion.max() / 2.
#     for i, j in itertools.product(range(confusion.shape[0]), range(confusion.shape[1])):
#         plt.text(j, i, format(confusion[i, j], fmt),
#                  horizontalalignment="center",
#                  color="white" if confusion[i, j] > thresh else "black")
#     plt.ylabel('True label')
#     plt.xlabel('Predicted label')
#     plt.tight_layout()

# # Create subplots for each model
# for model_name, report in classification_reports.items():
#     plt.figure(figsize=(15, 5 * len(class_names)))
#     plt.suptitle(f'Confusion Matrix for {model_name}', fontsize=16)

#     for i, class_name in enumerate(class_names):
#         plt.subplot(len(class_names), 1, i + 1)
#         confusion = confusion_matrix(y_test, y_test_pred_nb)  # Replace with the appropriate model's predictions
#         plot_confusion_matrix(confusion, classes=class_names, title=f'Confusion Matrix for {class_name}')

#     plt.tight_layout()

# plt.show()


In [78]:
# Classification report
print("Naive Bayes Classification report")
print(classification_report(y_test, y_test_pred_nb))
print("SVM Classification report")
print(classification_report(y_test, y_test_pred_svm))
print("KNN Classification report")
print(classification_report(y_test, y_test_pred_knn))
print("Gradient Boosting Classification report")
print(classification_report(y_test, y_test_pred_gb))
print("Random Forest Classification report")
print(classification_report(y_test, y_test_pred_rf))

Naive Bayes Classification report
              precision    recall  f1-score   support

           0       0.94      0.61      0.74       723
           1       0.04      0.28      0.08        39
           2       0.03      0.11      0.05        18

    accuracy                           0.58       780
   macro avg       0.34      0.33      0.29       780
weighted avg       0.87      0.58      0.69       780

SVM Classification report
              precision    recall  f1-score   support

           0       0.93      1.00      0.96       723
           1       0.00      0.00      0.00        39
           2       0.00      0.00      0.00        18

    accuracy                           0.92       780
   macro avg       0.31      0.33      0.32       780
weighted avg       0.86      0.92      0.89       780

KNN Classification report
              precision    recall  f1-score   support

           0       0.94      1.00      0.97       723
           1       0.88      0.18      0.30

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# fig = plt.figure(figsize=(15,15))

# ax1 = fig.add_subplot(3, 3, 1) # row, column, position
# ax1.set_title('Random Forest Classification')

# ax2 = fig.add_subplot(3, 3, 2) # row, column, position
# ax2.set_title('KNN Classification')

# ax3 = fig.add_subplot(3, 3, 3)
# ax3.set_title('SVM Classification')

# ax4 = fig.add_subplot(3, 3, 4)
# ax4.set_title('Naive Bayes Classification')

# ax5 = fig.add_subplot(3, 3, 5)
# ax5.set_title('Gradient Boosting Classification')

# sns.heatmap(data=(y_test, y_test_pred_rf), annot=True, linewidth=0.7, linecolor='cyan', fmt='.0f', ax=ax1, cmap='BrBG')
# sns.heatmap(data=(y_test, y_test_pred_knn), annot=True, linewidth=0.7, linecolor='cyan', fmt='.0f', ax=ax2, cmap='BrBG')   
# sns.heatmap(data=(y_test, y_test_pred_svm), annot=True, linewidth=0.7, linecolor='cyan', fmt='.0f', ax=ax3, cmap='BrBG')
# sns.heatmap(data=(y_test, y_test_pred_nb), annot=True, linewidth=0.7, linecolor='cyan', fmt='.0f', ax=ax4, cmap='BrBG')
# sns.heatmap(data=(y_test, y_test_pred_gb), annot=True, linewidth=0.7, linecolor='cyan', fmt='.0f', ax=ax5, cmap='BrBG')
# plt.show()