In [None]:
import pandas as pd
import numpy as np
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from skimage.feature import hog
from skimage import exposure

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from skimage import exposure
from skimage import color
from skimage.filters import threshold_otsu
from skimage.morphology import binary_erosion
from skimage.measure import regionprops, find_contours
from tensorflow.keras.preprocessing.image import load_img, img_to_array

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import GridSearchCV

from utils import *
from feature_util import *





In [None]:
# Load the image paths and labels from the DataFrame
df = pd.read_csv('Dataset/cards.csv')

# For column names that contain space, replace the space with an underscore
df.columns = [c.replace(' ', '_') for c in df.columns]

# Add suits column
df['suit'] = df['labels'].str.split().str[-1]

# Remove rows with jokers
df = df[~df['suit'].str.contains('joker', case=False)]

# Remove unwanted columns
df = df.drop(columns = ['data_set'])
df = df.drop(columns = ['class_index'])
df = df.drop(columns = ['labels'])
df = df.drop(columns = ['card_type'])

# Add folder name to the filepath
df['filepaths'] = df['filepaths'].apply(lambda x: 'Dataset/' + x)
df.head()

In [None]:
# Split dataframe into train and test sets, stratified by the 'suit' column
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['suit'], random_state=42)

# Split train set into train and validation sets, stratified by the 'suit' column
train_df, val_df = train_test_split(train_df, test_size=0.2, stratify=train_df['suit'], random_state=42)

# Check class distribution in the train, validation, and test sets
train_class_distribution = train_df['suit'].value_counts(normalize=True)
val_class_distribution = val_df['suit'].value_counts(normalize=True)
test_class_distribution = test_df['suit'].value_counts(normalize=True)

print("Train Set - Class Distribution:")
print(train_class_distribution)

print("Validation Set - Class Distribution:")
print(val_class_distribution)

print("Test Set - Class Distribution:")
print(test_class_distribution)

## Augment Images

In [None]:
# Initialize data generator class

train_data_generator = ImageDataGenerator(
#    rescale = 1/255.0,   #Want 0 to 255 values for the color histogram
#     rotation_range= 45,
#     zoom_range= 0.2,
#     width_shift_range = 0.2,
#     height_shift_range = 0.2,
#     shear_range= 0.2,
#     horizontal_flip=True,
#     vertical_flip=True
 )

test_data_generator = ImageDataGenerator() #rescale = 1/255.0) #want 0 to 255 values for the color histogram

# Create data generators for train, validation, and test
batch_size = 32

train_generator = train_data_generator.flow_from_dataframe(
    dataframe = train_df,
    x_col = 'filepaths',
    y_col = 'suit',
    target_size = (224,224),
    batch_size = batch_size,
    class_mode = 'categorical',
    shuffle=False
)

valid_generator = test_data_generator.flow_from_dataframe( #change this to use test_data_generator since don't want to augment the valid images
    dataframe = val_df,
    x_col = 'filepaths',
    y_col = 'suit',
    target_size = (224,224),
    batch_size = batch_size,
    class_mode = 'categorical',
    shuffle=False
)

test_generator = test_data_generator.flow_from_dataframe( #change this to use test_data_generator since don't want to augment the valid images
    dataframe = test_df,
    x_col = 'filepaths',
    y_col = 'suit',
    target_size = (224,224),
    batch_size = batch_size,
    class_mode = 'categorical',
    shuffle=False
)


### Hue

In [None]:

def hist_hue(image):
    
    # Convert the image from RGB to HSV
    image_hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    
    # Calculate histograms for each channel
    hist_hue = cv2.calcHist([image_hsv], [0], None , [255], [0, 256]).astype(np.uint8)

    #normalize 
    hist_hue_normalized = (hist_hue - np.min(hist_hue)) / (np.max(hist_hue)  - np.min(hist_hue) )
    
    # Scale the normalized histogram to have values between 0 and 255
    hist_hue_scaled = (hist_hue_normalized * 255).astype(np.uint8)
    
    return hist_hue_scaled

# Function to compute hsv features for a batch of images
def extract_hue_features_from_generator(data_generator):
    hsv_features = []
    labels = []
    num_batches = len(data_generator)
    
    for _ in range(num_batches):
        batch_images, class_labels = data_generator.next()  # Get the next batch of augmented images (ignoring the labels)
        
        # Compute hue features for each image in the batch
        for image, label in zip(batch_images,class_labels):
            hue_feature = hist_hue(image).flatten()  
            hsv_features.append(hue_feature)
            label = list(label)
            labels.append(label)
            
    return np.array(hsv_features),labels




In [None]:

train_hue_features, train_labels = extract_hue_features_from_generator(train_generator)
valid_hue_features, valid_labels = extract_hue_features_from_generator(valid_generator)
test_hue_features, test_labels = extract_hue_features_from_generator(test_generator)

train_labels = np.array(train_labels)
valid_labels = np.array(valid_labels)
test_labels = np.array(test_labels)

### LDA Classification

In [None]:
# Initialize and fit the LDA model
lda_model = LinearDiscriminantAnalysis()
lda_model.fit(train_hue_features, train_generator.labels)

# Transform the validation features to the LDA space
valid_features_lda = lda_model.transform(valid_hue_features)

# Predict on the validation data
valid_predictions = lda_model.predict(valid_hue_features)


# Calculate the accuracy of the LDA model
accuracy = accuracy_score(valid_generator.labels,valid_predictions)
print("Validation Accuracy:", accuracy)

In [None]:
# Get the class names from the generator (assuming it has the 'class_indices' attribute)
class_indices = train_generator.class_indices
class_names = list(class_indices.keys())

# Calculate classification report for validation set
valid_report = classification_report(valid_generator.labels, valid_predictions)
print("Validation Classification Report:\n", valid_report)


In [None]:
confusion = confusion_matrix(valid_generator.labels,valid_predictions)
disp = ConfusionMatrixDisplay(confusion,display_labels = class_names)
disp.plot()

### Grid search LDA: 

In [None]:
param_grid = {
    'solver': ['svd', 'lsqr', 'eigen'],  # Different solver options for LDA
    'shrinkage': [None, 'auto', 0.1, 0.5],  # Shrinkage parameter (None means no shrinkage)
}

# Initialize the GridSearchCV with the LDA model and parameter grid
grid_search = GridSearchCV(lda_model, param_grid, cv=5)

# Fit the GridSearchCV on the training data
grid_search.fit(train_hue_features, train_generator.labels)

# Get the best LDA model from the grid search
best_lda_model = grid_search.best_estimator_

# Predict on the validation data using the best model
valid_predictions = best_lda_model.predict(valid_hue_features)

# Calculate the accuracy of the best LDA model
accuracy = accuracy_score(valid_generator.labels, valid_predictions)
print("Validation Accuracy with LDA:", accuracy)

# Print the best hyperparameters found by GridSearchCV
print("Best Hyperparameters:", grid_search.best_params_)

### hue and hog combined feature

In [None]:


def extract_hog_hue_features_from_generator(data_generator):
    hog_features = []
    hsv_features = []
    labels = []
    num_batches = len(data_generator)
    
    for _ in range(num_batches):
        batch_images, class_labels = data_generator.next()  # Get the next batch of augmented images (ignoring the labels)
        
        # Compute hue features for each image in the batch
        for image, label in zip(batch_images,class_labels):
           
            hue_feature = hist_hue(image).flatten()  
            hsv_features.append(hue_feature)
            label = list(label)
            labels.append(label)
            gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) 
            gray_image = gray_image.astype(np.uint8)
            hog_feature = hog(gray_image, orientations = 8, pixels_per_cell=(16, 16), cells_per_block=(4, 4))
            hog_features.append(hog_feature)
            
    hog_hue_feature = np.hstack((hog_features, hsv_features)) #hog_features 
            
    return np.array(hog_hue_feature), np.array(labels)


In [None]:
train_hog_hue_features, train_labels = extract_hog_hue_features_from_generator(train_generator)
valid_hog_hue_features, valid_labels = extract_hog_hue_features_from_generator(valid_generator)
test_hog_hue_features, test_labels = extract_hog_hue_features_from_generator(test_generator)

In [None]:
train_hog_hue_features.shape

In [None]:
clf = SVC(kernel = 'linear',C=0.1)
clf.fit(train_hog_hue_features,train_generator.labels)

In [None]:
# Predict on the validation data
valid_predictions = clf.predict(valid_hog_hue_features)

# Calculate the accuracy of the LDA model
accuracy = accuracy_score(valid_generator.labels,valid_predictions)
print("Validation Accuracy:", accuracy)

In [None]:
# Perform PCA on the HOG features.
pca = PCA(n_components = 100)
train_hog_hue_features_pca = pca.fit_transform(train_hog_hue_features)
valid_hog_hue_features_pca = pca.fit_transform(valid_hog_hue_features)

# Cumulative explained variance.
explained_variance_ratio = pca.explained_variance_ratio_
cumulative_explained_variance = np.cumsum(explained_variance_ratio)

# Determine the number of principal components to plot with increments of 100.
num_components = 100 #hog_hue_features_pca.shape[1]
components_range = np.arange(1, num_components + 1, 10)

plt.figure(figsize=(12, 6))
plt.plot(components_range, cumulative_explained_variance[components_range - 1], marker='o')
plt.xlabel('Number of Principal Components')
plt.ylabel('Cumulative Explained Variance')
plt.title('PCA - Cumulative Explained Variance')
plt.xticks(components_range, fontsize=6)
plt.grid(True)
plt.show()

In [None]:
# Initialize and fit the LDA model
lda_model = LinearDiscriminantAnalysis()
lda_model.fit(train_hog_hue_features_pca, train_generator.labels)

# Transform the validation features to the LDA space
#valid_features_lda = lda_model.transform(valid_hog_hue_features_pca)

# Predict on the validation data
valid_predictions = lda_model.predict(valid_hog_hue_features_pca)


# Calculate the accuracy of the LDA model
accuracy = accuracy_score(valid_generator.labels,valid_predictions)
print("Validation Accuracy:", accuracy)