In [None]:
import os
import cv2

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split,  cross_val_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.naive_bayes import GaussianNB
from skimage.feature import local_binary_pattern
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import RidgeClassifier
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt

import re

import seaborn as sns

from keras.preprocessing import image
from keras.models import Model
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.applications.resnet import ResNet152, preprocess_input
from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
from keras.applications.nasnet import NASNetLarge, preprocess_input
from keras.applications.efficientnet_v2 import EfficientNetV2L, preprocess_input
from keras.applications.efficientnet import EfficientNetB0, preprocess_input
from keras.applications.efficientnet import EfficientNetB7, preprocess_input
from google.colab import drive

from PIL import Image

from pycaret.classification import *

import random

In [None]:
pip install pycaret

In [None]:
drive.mount("/content/drive", force_remount=True)

In [None]:
base_adr = './drive/MyDrive/'

In [None]:
df = pd.read_csv(base_adr + 'leaves.csv', header=None)

In [None]:
df

In [None]:
# If you have specific column names, you can add them here
column_names = ['target', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
df.columns = column_names

In [None]:
len(df['target'].value_counts())

In [None]:
df

In [None]:
def sort_files_numerically(files):
    def extract_file_number(file_name):
        match = re.search(r"EX(\d+)", file_name)
        if match:
            return int(match.group(1))
        else:
            return float('inf')  # Handle cases where the pattern is not found
    return sorted(files, key=extract_file_number)

In [None]:
# Load EfficientNetB7 model pre-trained on ImageNet
base_model = EfficientNetB7(weights='imagenet')
# Select the layer from which you want to extract features.
# Typically, this is one of the last fully connected layers.
model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output)

In [None]:
def extract_features(img_path):
    # Load image with target size for ResNet152
    img = image.load_img(img_path, target_size=(600, 600))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)

    # Extract features
    resnet_features = model.predict(img_data)
    return resnet_features

In [None]:
# Directory containing your dataset images
dataset_directory = base_adr + 'leaves'

# List all directories and sort them numerically
directories = [d for d in os.listdir(dataset_directory) if os.path.isdir(os.path.join(dataset_directory, d))]
sorted_directories = sorted(directories, key=lambda x: int(x))

lbp_features_list = []
hist_features_list = []
efficient_net_features_list = []
photo_adrs = []

# Iterate through sorted directories
for dir_name in sorted_directories:
    dir_path = os.path.join(dataset_directory, dir_name)
    if os.path.isdir(dir_path):

        # List all files in the directory and sort them numerically
        files = [f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f))]
        sorted_files = sort_files_numerically(files)

        # Iterate through sorted files
        for image_name in sorted_files:
            image_path = os.path.join(dir_path, image_name)
            if image_path.endswith(('.JPG')):

                #------------------------------------------------------------------------------------------
                # Compute LBP

                image_file = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
                image_file = cv2.resize(image_file, (256, 256))

                P, R = 8, 1
                lbp = local_binary_pattern(image_file, P, R, method="uniform")

                (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2))

                hist = hist.astype("float")
                hist /= (hist.sum() + 1e-6)
                #------------------------------------------------------------------------------------------
                # Compute Coloring Histogram
                image_file = cv2.imread(image_path, cv2.COLOR_BGR2HSV)
                hsv_image = cv2.cvtColor(image_file, cv2.COLOR_BGR2HSV)

                bins=(8, 8, 8)
                hist_h = cv2.calcHist([hsv_image], [0], None, [bins[0]], [0, 256])
                hist_s = cv2.calcHist([hsv_image], [1], None, [bins[1]], [0, 256])
                hist_v = cv2.calcHist([hsv_image], [2], None, [bins[2]], [0, 256])

                hist_h = cv2.normalize(hist_h, hist_h).flatten()
                hist_s = cv2.normalize(hist_s, hist_s).flatten()
                hist_v = cv2.normalize(hist_v, hist_v).flatten()

                hist_features = np.concatenate([hist_h, hist_s, hist_v])
                #------------------------------------------------------------------------------------------
                # Compute EfficientNetB7 features
                efficient_net_features = extract_features(image_path)[0]
                #------------------------------------------------------------------------------------------

                lbp_features_list.append(hist)
                hist_features_list.append(hist_features)
                efficient_net_features_list.append(efficient_net_features)
                photo_adrs.append(image_path)

In [None]:
efficient_net_features_df = pd.DataFrame(efficient_net_features_list)
df_with_efficient_net = pd.concat([df, efficient_net_features_df], axis=1)

In [None]:
lbp_features_df = pd.DataFrame(lbp_features_list)
df_with_lbp = pd.concat([df_with_efficient_net, lbp_features_df], axis=1)

In [None]:
hist_features_df = pd.DataFrame(hist_features_list)
df_with_hist = pd.concat([df_with_lbp, hist_features_df], axis=1)

In [None]:
path_df = pd.DataFrame(photo_adrs)
df_train = pd.concat([df_with_hist, path_df], axis=1)

In [None]:
# Function to make column names unique
def make_column_names_unique(df):
    unique_col = ['target']
    for i in range(df.shape[1] - 1):
      unique_col.append(i)

    df.columns = unique_col
    return df

df_train = make_column_names_unique(df_train)

In [None]:
df_train

In [None]:
df_train.to_csv('dfWithEfficientNetB7withPath.csv', sep='\t', encoding='utf-8')

In [None]:
df_train = pd.read_csv('./dfWithEfficientNetB7withPath.csv', sep='\t', encoding='utf-8', index_col=False)

In [None]:
df_train

In [None]:
df_train = df_train.drop(columns=['Unnamed: 0', '0'])

In [None]:
df_train

# **Clustering**

In [None]:
y = df_train['target']
X = df_train.drop(columns=['target', '2609'])

In [None]:
X

In [None]:
# Fit PCA to the data
pca = PCA()
pca.fit(X)

# Get the explained variance ratio
explained_variance_ratio = pca.explained_variance_ratio_

In [None]:
# Calculate cumulative explained variance
cumulative_explained_variance = np.cumsum(explained_variance_ratio)

# Plot the cumulative explained variance
plt.figure(figsize=(8, 6))
plt.plot(range(1, len(cumulative_explained_variance) + 1), cumulative_explained_variance, marker='o', linestyle='--')
plt.xlabel('Number of Principal Components')
plt.ylabel('Cumulative Explained Variance')
plt.title('Explained Variance by Different Principal Components')
plt.grid(True)
plt.show()

In [None]:
# Fit PCA to the data
pca = PCA(n_components=75)
principal_components = pca.fit_transform(X)

In [None]:
principal_components.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [None]:
# Train a classifier on the transformed training set
# For simplicity, let's use LDA itself as the classifier
lda_classifier = LinearDiscriminantAnalysis()
lda_classifier.fit(X_train, y_train)

# Predict on the transformed test set
y_pred = lda_classifier.predict(X_test)

In [None]:
accuracy_score(y_pred, y_test)

# **Classification**

In [None]:
def LDA_dimension_reduction(df_final):
    # Separate features and labels
    X = df_final.drop('target', axis=1)
    y = df_final['target'].values

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

    X_train_photos = X_train['2609']
    X_test_photos = X_test['2609']

    X_train = X_train.drop('2609', axis=1).values
    X_test = X_test.drop('2609', axis=1).values

    # Initialize the LDA model
    lda = LinearDiscriminantAnalysis(n_components=None)  # None means LDA will infer the number of components automatically

    # Fit the LDA model
    lda.fit(X_train, y_train)

    # Transform the training and testing sets
    X_train_lda = lda.transform(X_train)
    X_test_lda = lda.transform(X_test)

    return X_train_lda, X_test_lda, y_train, y_test, X_train_photos, X_test_photos

In [None]:
def PCA_dimension_reduction(df_final):
    # Separate features and labels
    X = df_final.drop(columns=['target', '2609'])
    y = df_final['target'].values
    X_photos = df_final['2609']

    # Fit PCA to the data
    pca = PCA(n_components=75)
    X_pca = pca.fit_transform(X)

    # Create a DataFrame with the principal components
    pca_df = pd.DataFrame(data=X_pca)
    pca_df['photo'] = X_photos

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(pca_df, y, test_size=0.25)

    X_train_photos = X_train['photo']
    X_test_photos = X_test['photo']

    X_train = X_train.drop('photo', axis=1).values
    X_test = X_test.drop('photo', axis=1).values

    return X_train, X_test, y_train, y_test, X_train_photos, X_test_photos

In [None]:
X_train, X_test, y_train, y_test, X_train_photos, X_test_photos = PCA_dimension_reduction(df_train)

In [None]:
X_train_lda, X_test_lda, y_train, y_test, X_train_photos, X_test_photos = LDA_dimension_reduction(df_train)

In [None]:
# Combine training data
train_data = pd.DataFrame(X_train_lda)
train_data['target'] = y_train

In [None]:
# Combine testing data
test_data = pd.DataFrame(X_test_lda)
test_data['target'] = y_test

In [None]:
# Combine both into a single DataFrame
combined_data = pd.concat([train_data, test_data], axis=0)

In [None]:
combined_data.info()

In [None]:
# Reset the index of the DataFrame
combined_data = combined_data.reset_index(drop=True)

In [None]:
combined_data

In [None]:
# Initialize the setup
clf_setup = setup(data=combined_data, target='target')

In [None]:
# Compare models and select the best one
best_model = compare_models()

In [None]:
def logistic_regresion_model(X_train, X_test, y_train):
    # Create and fit the Logistic Regression model
    log_reg = LogisticRegression()
    log_reg.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = log_reg.predict(X_test)

    return log_reg, y_pred

In [None]:
def LDA_model(X_train, X_test, y_train):
    # Train a classifier on the transformed training set
    # For simplicity, let's use LDA itself as the classifier
    lda_classifier = LinearDiscriminantAnalysis()
    lda_classifier.fit(X_train, y_train)

    # Predict on the transformed test set
    y_pred = lda_classifier.predict(X_test)

    return lda_classifier, y_pred

In [None]:
def extra_trees_model(X_train, X_test, y_train):
  # Create and fit the Extra Trees Classifier model
  extra_trees_clf = ExtraTreesClassifier(random_state=42)
  extra_trees_clf.fit(X_train, y_train)

  # Make predictions on the test set
  y_pred = extra_trees_clf.predict(X_test)

  return extra_trees_clf, y_pred

In [None]:
def ridge_model(X_train, X_test, y_train):
  # Create and fit the Ridge Classifier model
  ridge_clf = RidgeClassifier()
  ridge_clf.fit(X_train, y_train)

  # Make predictions on the test set
  y_pred = ridge_clf.predict(X_test)

  return ridge_clf, y_pred

In [None]:
def KNN_model(X_train, X_test, y_train, K):
  # Create and fit the Extra Trees Classifier model
  KNN_clf = KNeighborsClassifier(n_neighbors=K)
  KNN_clf.fit(X_train, y_train)

  # Make predictions on the test set
  y_pred = KNN_clf.predict(X_test)

  return KNN_clf, y_pred

In [None]:
def display_images_from_directory(directory_path, images_per_row=5):
    # Get all image file paths from the directory
    image_files = [os.path.join(directory_path, f) for f in os.listdir(directory_path) if f.endswith(('png', 'JPG', 'jpeg', 'gif', 'bmp'))]

    # Number of images
    n_images = len(image_files)

    # Calculate the number of rows needed
    n_rows = (n_images + images_per_row - 1) // images_per_row

    # Create a figure with the appropriate number of subplots
    fig, axes = plt.subplots(n_rows, images_per_row, figsize=(images_per_row * 3, n_rows * 3))

    # Flatten axes for easy iteration
    axes = axes.flatten()

    for i, ax in enumerate(axes):
        if i < n_images:
            img = Image.open(image_files[i])
            ax.imshow(img)
            ax.axis('off')  # Hide the axes
        else:
            ax.axis('off')  # Hide the axes for empty subplots

    plt.tight_layout()
    plt.show()


In [None]:
def show_result(accuracy_scores):
  # Calculate statistical measures
  average_accuracy = np.mean(accuracy_scores)
  variance_accuracy = np.var(accuracy_scores)
  max_accuracy = np.max(accuracy_scores)
  min_accuracy = np.min(accuracy_scores)
  std_dev_accuracy = np.std(accuracy_scores)

  # Print the statistical measures
  print(f'Average Accuracy: {average_accuracy * 100:.2f}%')
  print(f'Variance: {variance_accuracy:.4f}')
  print(f'Max Accuracy: {max_accuracy * 100:.2f}%')
  print(f'Min Accuracy: {min_accuracy * 100:.2f}%')
  print(f'Standard Deviation: {std_dev_accuracy:.4f}')

  # Plot the continuous distribution of accuracy scores
  plt.figure(figsize=(10, 6))
  sns.kdeplot(accuracy_scores, shade=True)
  plt.title('Continuous Distribution of Accuracy Scores over 100 Iterations')
  plt.xlabel('Accuracy')
  plt.ylabel('Density')
  plt.grid(True)
  plt.show()

In [None]:
def mistake_analysis(y_pred, y_test, path):
  print(f'True Label = {y_test}')
  print(f'Predict Label = {y_pred}')

  true_dir = base_adr + f'leaves/{y_test}'
  pred_dir = base_adr + f'leaves/{y_pred}'

  print('------------------------------------------------')

  # Open the image using Pillow
  img = Image.open(path)
  # Display the image using Matplotlib
  plt.imshow(img)
  plt.axis('off')  # Hide the axis
  plt.show()


  print('------------------------------------------------')

  print('True Class Images:')
  display_images_from_directory(true_dir, 7)

  print('------------------------------------------------')
  print('Predict Class Images:')
  display_images_from_directory(pred_dir, 7)

  print('************************************************')

In [None]:
def model_analysis(dimension_reduction, classifier, df_train, iteration):
  ans = 0.0
  accuracy_scores = []

  for itr in range(iteration):
    if dimension_reduction == 'LDA':
      X_train, X_test, y_train, y_test, X_train_photos, X_test_photos = LDA_dimension_reduction(df_train)
    elif dimension_reduction == 'PCA':
      X_train, X_test, y_train, y_test, X_train_photos, X_test_photos = PCA_dimension_reduction(df_train)


    if classifier == "KNN":
      Ks = [3, 5, 7]
      K = random.randint(0, 2)
      model, y_pred = KNN_model(X_train, X_test, y_train, Ks[K])
    elif classifier == "ridge":
      model, y_pred = ridge_model(X_train, X_test, y_train)
    elif classifier == "logistic_regresion":
      model, y_pred = logistic_regresion_model(X_train, X_test, y_train)
    elif classifier == "LDA":
      model, y_pred = LDA_model(X_train, X_test, y_train)
    elif classifier == "extra_trees":
      model, y_pred = extra_trees_model(X_train, X_test, y_train)

    accuracy_scores.append(accuracy_score(y_pred, y_test))
    for i in range(len(y_pred)):
      #if y_pred[i] != y_test[i]:
      #  mistake_analysis(y_pred[i], y_test[i], X_test_photos.iloc[i])

  show_result(accuracy_scores)

In [None]:
df_train

In [None]:
model_analysis('PCA', 'KNN', df_train, 3)

In [None]:
model_analysis('ridge', df_train, 3)

In [None]:
model_analysis('logistic_regresion', df_train, 3)

In [None]:
model_analysis('PCA', 'LDA', df_train, 3)

In [None]:
model_analysis('PCA', 'extra_trees', df_train, 3)