## Imports

In [15]:
!pip install matplotlib scikit-image
!pip install scikeras

import pandas as pd
import numpy as np
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.wrappers.scikit_learn import KerasClassifier

from skimage import io
from skimage import filters
from skimage import exposure
from skimage import color
from skimage.io import imread
from skimage.color import rgb2gray
from skimage.feature import hog
from skimage.filters import gaussian


from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix
from sklearn.manifold import TSNE

from scipy.stats import loguniform

from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

[0m

## Data Load

In [2]:
# Load the image paths and labels from the DataFrame
df = pd.read_csv('Dataset/cards.csv')

# For column names that contain space, replace the space with an underscore
df.columns = [c.replace(' ', '_') for c in df.columns]

# Add suits column
df['suit'] = df['labels'].str.split().str[-1]

# Remove rows with jokers
df = df[~df['suit'].str.contains('joker', case=False)]

# Remove unwanted columns
df = df.drop(columns = ['data_set'])
df = df.drop(columns = ['class_index'])
df = df.drop(columns = ['labels'])
df = df.drop(columns = ['card_type'])

# Add folder name to the filepath
df['filepaths'] = df['filepaths'].apply(lambda x: 'Dataset/' + x)
df.head()

Unnamed: 0,filepaths,suit
0,Dataset/train/ace of clubs/001.jpg,clubs
1,Dataset/train/ace of clubs/002.jpg,clubs
2,Dataset/train/ace of clubs/003.jpg,clubs
3,Dataset/train/ace of clubs/004.jpg,clubs
4,Dataset/train/ace of clubs/005.jpg,clubs


## Train Test Split

In [3]:
# Split dataframe into train and test sets, stratified by the 'suit' column
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['suit'], random_state=42)

# Split train set into train and validation sets, stratified by the 'suit' column
train_df, val_df = train_test_split(train_df, test_size=0.2, stratify=train_df['suit'], random_state=42)

In [4]:
# Check class distribution in the train, validation, and test sets
train_class_distribution = train_df['suit'].value_counts(normalize=True)
val_class_distribution = val_df['suit'].value_counts(normalize=True)
test_class_distribution = test_df['suit'].value_counts(normalize=True)

print("Train Set - Class Distribution:")
print(train_class_distribution)

print("Validation Set - Class Distribution:")
print(val_class_distribution)

print("Test Set - Class Distribution:")
print(test_class_distribution)

Train Set - Class Distribution:
spades      0.269171
hearts      0.246010
diamonds    0.243675
clubs       0.241144
Name: suit, dtype: float64
Validation Set - Class Distribution:
spades      0.269261
hearts      0.245914
diamonds    0.243580
clubs       0.241245
Name: suit, dtype: float64
Test Set - Class Distribution:
spades      0.268991
hearts      0.245953
diamonds    0.244085
clubs       0.240971
Name: suit, dtype: float64


## Extract HOG Features

In [12]:
def preprocess_image(image):
    image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    return image_gray


def extract_hog_features(image_paths, labels):
    featurelist = []
    hog_example = None
    label_list = []

    for i, image_path in enumerate(image_paths):
        img = load_img(image_path)
        image_array = img_to_array(img)
        gray_image = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY)
        fd, hog_image = hog(gray_image, 
                            pixels_per_cell=(8, 8),
                            cells_per_block=(2, 2), 
                            orientations=4, 
                            visualize=True)

        if i == 0:
            hog_example = hog_image

        featurelist.append(fd[np.newaxis, :])
        label_list.append(labels[i])

    features = np.vstack(featurelist)
    labels = np.array(label_list)

    return features, labels

In [16]:
# Select a portion of the train set
train_fraction = 0.5 
train_df_subset, _ = train_test_split(train_df, train_size=train_fraction, stratify=train_df['suit'], random_state=42)
train_features, train_labels = extract_hog_features(train_df_subset['filepaths'].values, train_df_subset['suit'].values)

# Map string labels to integer labels using LabelEncoder
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# One-hot encode the target labels
num_classes = len(label_encoder.classes_)
train_labels_onehot = to_categorical(train_labels_encoded, num_classes=num_classes)


In [18]:
# Select a portion of the validation set
val_fraction = 0.5 
val_df_subset, _ = train_test_split(val_df, test_size=val_fraction, stratify=val_df['suit'], random_state=42)
val_features, val_labels = extract_hog_features(val_df_subset['filepaths'].values, val_df_subset['suit'].values)

# Map string labels to integer labels using LabelEncoder (same label_encoder as for the training set)
val_labels_encoded = label_encoder.transform(val_labels)

# One-hot encode the target labels for the validation set
val_labels_onehot = to_categorical(val_labels_encoded, num_classes=num_classes)

In [20]:
test_features, test_labels = extract_hog_features(test_df['filepaths'].values, test_df['suit'].values)
# test_labels_encoded = label_encoder.transform(test_labels)
# test_labels_onehot = to_categorical(test_labels, num_classes=num_classes)

# test_fraction = 0.5 
# test_df_subset, _ = train_test_split(test_df, test_size=test_fraction, stratify=test_df['suit'], random_state=42)
# test_features, test_labels = extract_hog_features(test_df_subset['filepaths'].values, test_df_subset['suit'].values)

# Map string labels to integer labels using LabelEncoder (same label_encoder as for the training set)
test_labels_encoded = label_encoder.transform(test_labels)

# One-hot encode the target labels for the test set
test_labels_onehot = to_categorical(test_labels_encoded, num_classes=num_classes)

## CNN

In [21]:
num_classes = 4

model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(train_features.shape[1],)))
model.add(Dropout(0.03))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.03))
model.add(Dense(num_classes, activation='softmax'))

model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])


In [22]:
# Train the neural network model
history = model.fit(train_features, train_labels_onehot,
                    batch_size=32,
                    epochs=20,
                    validation_data=(val_features, val_labels_onehot))

Epoch 1/20


2023-07-30 08:52:47.502387: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:655] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-07-30 08:52:47.556758: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x5588b17194e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-07-30 08:52:47.556796: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (0): NVIDIA GeForce RTX 4090, Compute Capability 8.9
2023-07-30 08:52:47.580321: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-07-30 08:52:47.622378: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8902
2023-07-30 08:52:47.798227: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the p

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
# Make predictions on the validation set
valid_predictions_probs = model.predict(valid_hog_features)
valid_predictions = np.argmax(valid_predictions_probs, axis=1)

# Calculate validation accuracy
valid_accuracy = accuracy_score(valid_generator.labels, valid_predictions)
print("Validation Accuracy:", valid_accuracy)

# Make predictions on the test set
test_predictions_probs = model.predict(test_hog_features)
test_predictions = np.argmax(test_predictions_probs, axis=1)

# Calculate test accuracy
test_accuracy = accuracy_score(test_generator.labels, test_predictions)
print("Test Accuracy:", test_accuracy)

## Confusion Matrix

In [27]:
conf_matrix = confusion_matrix(val_labels_onehot, val_predictions)

# Define the class labels
class_labels = ['spades', 'hearts', 'diamonds', 'clubs']

# Create the seaborn heatmap
sns.set(font_scale=1.2)
sns.heatmap(conf_matrix, 
            annot=True, 
            fmt="d", 
            cmap="Blues", 
            xticklabels=class_labels, 
            yticklabels=class_labels)

# Add labels and title
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("SVM - Confusion Matrix")

# Show the plot
plt.show()

NameError: name 'val_predictions' is not defined

## Classification Report

In [None]:
# Get the class names from the generator
class_names = list(['spades', 'hearts', 'diamonds', 'clubs'])
print(class_names)

In [None]:
# Calculate classification report for validation set
valid_report = classification_report(val_labels, valid_predictions, target_names=class_names)
print("Validation Classification Report:\n", valid_report)

In [None]:
# Calculate classification report for test set
test_report = classification_report(test_labels, test_predictions, target_names=class_names)
print("Test Classification Report:\n", test_report)

## PCA Dimensionality Reduction

In [None]:
# Perform PCA on the HOG features.
pca = PCA()
hog_features_pca = pca.fit_transform(train_features)

# Cumulative explained variance.
explained_variance_ratio = pca.explained_variance_ratio_
cumulative_explained_variance = np.cumsum(explained_variance_ratio)

# Plot fifty principal components
num_components = min(2000, len(cumulative_explained_variance))
components_range = np.arange(1, num_components + 1, 100)

plt.figure(figsize=(12, 6))
plt.plot(components_range, cumulative_explained_variance[components_range - 1], marker='o')
plt.xlabel('Number of Principal Components')
plt.ylabel('Cumulative Explained Variance')
plt.title('PCA - Cumulative Explained Variance')
plt.xticks(components_range, fontsize=6)
plt.grid(True)
plt.show()