## Imports

In [1]:
!pip install matplotlib scikit-image
!pip install scikeras

import pandas as pd
import numpy as np
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.wrappers.scikit_learn import KerasClassifier

from skimage import filters
from skimage import exposure
from skimage import color
from skimage.io import imread
from skimage.color import rgb2gray
from skimage.feature import hog
from skimage.filters import gaussian


from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix
from sklearn.manifold import TSNE

from scipy.stats import loguniform

[0m

2023-07-29 10:57:41.020138: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-07-29 10:57:41.043357: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Data Load

In [2]:
# Load the image paths and labels from the DataFrame
df = pd.read_csv('Dataset/cards.csv')

# For column names that contain space, replace the space with an underscore
df.columns = [c.replace(' ', '_') for c in df.columns]

# Add suits column
df['suit'] = df['labels'].str.split().str[-1]

# Remove rows with jokers
df = df[~df['suit'].str.contains('joker', case=False)]

# Remove unwanted columns
df = df.drop(columns = ['data_set'])
df = df.drop(columns = ['class_index'])
df = df.drop(columns = ['labels'])
df = df.drop(columns = ['card_type'])

# Add folder name to the filepath
df['filepaths'] = df['filepaths'].apply(lambda x: 'Dataset/' + x)
df.head()

Unnamed: 0,filepaths,suit
0,Dataset/train/ace of clubs/001.jpg,clubs
1,Dataset/train/ace of clubs/002.jpg,clubs
2,Dataset/train/ace of clubs/003.jpg,clubs
3,Dataset/train/ace of clubs/004.jpg,clubs
4,Dataset/train/ace of clubs/005.jpg,clubs


## Train Test Split

In [3]:
# Split dataframe into train and test sets, stratified by the 'suit' column
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['suit'], random_state=42)

# Split train set into train and validation sets, stratified by the 'suit' column
train_df, val_df = train_test_split(train_df, test_size=0.2, stratify=train_df['suit'], random_state=42)

In [4]:
# Check class distribution in the train, validation, and test sets
train_class_distribution = train_df['suit'].value_counts(normalize=True)
val_class_distribution = val_df['suit'].value_counts(normalize=True)
test_class_distribution = test_df['suit'].value_counts(normalize=True)

print("Train Set - Class Distribution:")
print(train_class_distribution)

print("Validation Set - Class Distribution:")
print(val_class_distribution)

print("Test Set - Class Distribution:")
print(test_class_distribution)

Train Set - Class Distribution:
spades      0.269171
hearts      0.246010
diamonds    0.243675
clubs       0.241144
Name: suit, dtype: float64
Validation Set - Class Distribution:
spades      0.269261
hearts      0.245914
diamonds    0.243580
clubs       0.241245
Name: suit, dtype: float64
Test Set - Class Distribution:
spades      0.268991
hearts      0.245953
diamonds    0.244085
clubs       0.240971
Name: suit, dtype: float64


## Augment Images

In [None]:
# # Initialize data generator class
# train_data_generator = ImageDataGenerator(
#     rescale = 1/255.0,
#     rotation_range= 45,
#     zoom_range= 0.2,
#     width_shift_range = 0.2,
#     height_shift_range = 0.2,
#     shear_range= 0.2,
#     horizontal_flip=True,
#     vertical_flip=True
# )

# test_data_generator = ImageDataGenerator(rescale = 1/255.0)

# # Create data generators for train, validation, and test
# batch_size = 32

# train_generator = train_data_generator.flow_from_dataframe(
#     dataframe = train_df,
#     x_col = 'filepaths',
#     y_col = 'suit',
#     target_size = (224,224),
#     batch_size = batch_size,
#     class_mode = 'categorical'
# )

# valid_generator = train_data_generator.flow_from_dataframe(
#     dataframe = val_df,
#     x_col = 'filepaths',
#     y_col = 'suit',
#     target_size = (224,224),
#     batch_size = batch_size,
#     class_mode = 'categorical'
# )


# test_generator = test_data_generator.flow_from_dataframe(
#     dataframe = test_df,
#     x_col = 'filepaths',
#     y_col = 'suit',
#     target_size = (224,224),
#     batch_size = batch_size,
#     class_mode = 'categorical',
#     shuffle = False,
# )

# del train_df
# del test_df
# del val_df

## Extract HOG Features

In [5]:
def preprocess_image(image):
    image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    return image_gray


def extract_hog_features(image_paths, labels):
    featurelist = []
    hog_example = None
    label_list = []

    for i, image_path in enumerate(image_paths):
        img = load_img(image_path)
        image_array = img_to_array(img)
        gray_image = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY)
        fd, hog_image = hog(gray_image, 
                            pixels_per_cell=(8, 8),
                            cells_per_block=(2, 2), 
                            orientations=4, 
                            visualize=True)

        if i == 0:
            hog_example = hog_image

        featurelist.append(fd[np.newaxis, :])
        label_list.append(labels[i])

    features = np.vstack(featurelist)
    labels = np.array(label_list)

    return features, labels


# def extract_hog_features(images, labels):
#     featurelist = []
#     hog_example = None
#     label_list = []
    
#     for i in range(images.shape[0]):
#         gray_image = rgb2gray(images[i, :, :])
#         fd, hog_image = hog(gray_image, 
#                             pixels_per_cell=(8, 8),
#                             cells_per_block=(2, 2), 
#                             orientations=4, 
#                             visualize=True)
        
#         if i == 0:
#             hog_example = hog_image
        
#         featurelist.append(fd[np.newaxis, :])
#         label_list.append(labels[i])
        
#     features = np.vstack(featurelist)
#     labels = np.array(label_list)
    
#     return features, labels

# def extract_hog_features_batch(images, labels):
#     feature_list = []
#     label_list = []
    
#     for i in range(images.shape[0]):
#         gray_image = rgb2gray(images[i, :, :, :])  # Convert RGB to grayscale
#         fd, _ = hog(gray_image,
#                     pixels_per_cell=(8, 8),
#                     cells_per_block=(2, 2),
#                     orientations=4,
#                     visualize=True)
        
#         feature_list.append(fd)
#         label_list.append(labels[i])
        
#     return np.array(feature_list), np.array(label_list)



In [8]:
# Select a portion of the train set
train_fraction = 0.5  # Change this fraction to the desired portion

train_df_subset, _ = train_test_split(train_df, train_size=train_fraction, stratify=train_df['suit'], random_state=42)
train_features, train_labels = extract_hog_features(train_df_subset['filepaths'].values, train_df_subset['suit'].values)

In [9]:
# Select a portion of the validation set
val_fraction = 0.2  # Change this fraction to the desired portion

val_df_subset, _ = train_test_split(val_df, test_size=val_fraction, stratify=val_df['suit'], random_state=42)
val_features, val_labels = extract_hog_features(val_df_subset['filepaths'].values, val_df_subset['suit'].values)

In [None]:
test_features, test_labels = extract_hog_features(test_df_subset['filepaths'].values, test_df_subset['suit'].values)

In [None]:
# # Extract HOG features for train, validation, and test sets
# train_features, train_labels = extract_hog_features(train_df['filepaths'].values, train_df['suit'].values)
# val_features, val_labels = extract_hog_features(val_df['filepaths'].values, val_df['suit'].values)
# test_features, test_labels = extract_hog_features(test_df['filepaths'].values, test_df['suit'].values)

## Support Vector Machines (SVM)

In [10]:
# Initialize and train the SVM classifier
svm_classifier = SVC(kernel='linear', random_state=42)  # You can experiment with different kernels
svm_classifier.fit(train_features, train_labels)

# Evaluate the SVM on the validation set
validation_accuracy = svm_classifier.score(val_features, val_labels)
print("Validation Accuracy:", validation_accuracy)

# Optionally, evaluate the SVM on the test set
test_accuracy = svm_classifier.score(test_features, test_labels)
print("Test Accuracy:", test_accuracy)

Validation Accuracy: 0.6352140077821011


NameError: name 'test_features' is not defined

In [None]:
# Initialize and train the SVM classifier
svm_classifier = SVC(kernel='linear', random_state=42)
svm_classifier.fit(train_features, np.argmax(train_labels, axis=1))  # use argmax to convert one-hot to categorical labels

In [None]:
# Extract HOG features from the validation set
# valid_features, valid_labels = extract_hog_features(valid_generator[0][0], valid_generator[0][1])

# Make predictions on the validation set
valid_predictions = svm_classifier.predict(valid_features)
valid_labels_categorical = np.argmax(valid_labels, axis=1)


# Evaluate the SVM classifier on the validation set
valid_accuracy = svm_classifier.score(valid_features, np.argmax(valid_labels, axis=1))
print("Valid accuracy:", valid_accuracy)

In [None]:
# Extract HOG features from the test set
# test_features, test_labels = extract_hog_features(test_generator[0][0], test_generator[0][1])

# Make predictions on the test set
test_predictions = svm_classifier.predict(test_features)
test_labels_categorical = np.argmax(test_labels, axis=1)

# Evaluate the SVM classifier on the test set
test_accuracy = svm_classifier.score(test_features, np.argmax(test_labels, axis=1))
print("Test accuracy:", test_accuracy)

## Confusion Matrix

In [None]:
# # Get the true labels from the test generator and flatten to 1D array
# true_labels = np.array(test_generator[0][1].argmax(axis=1)).flatten()

# # Convert the predicted labels to numpy array
# predicted_labels = test_predictions.flatten()

conf_matrix = confusion_matrix(valid_labels_categorical, valid_predictions)

# Define the class labels
class_labels = ['spades', 'hearts', 'diamonds', 'clubs']

# Create the seaborn heatmap
sns.set(font_scale=1.2)
sns.heatmap(conf_matrix, 
            annot=True, 
            fmt="d", 
            cmap="Blues", 
            xticklabels=class_labels, 
            yticklabels=class_labels)

# Add labels and title
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")

# Show the plot
plt.show()

## Classification Report

In [None]:
# Get the class names from the generator
class_names = list(train_generator.class_indices.keys())
print(class_names)

In [None]:
# Calculate classification report for validation set
valid_report = classification_report(valid_labels_categorical, valid_predictions, target_names=class_names)
print("Validation Classification Report:\n", valid_report)

In [None]:
# Calculate classification report for test set
test_report = classification_report(test_labels_categorical, test_predictions, target_names=class_names)
print("Test Classification Report:\n", test_report)

## PCA Dimensionality Reduction

In [None]:
# Perform PCA on the HOG features.
pca = PCA()
hog_features_pca = pca.fit_transform(train_features)

# Cumulative explained variance.
explained_variance_ratio = pca.explained_variance_ratio_
cumulative_explained_variance = np.cumsum(explained_variance_ratio)

# Plot fifty principal components
num_components = min(50, len(cumulative_explained_variance))
components_range = np.arange(1, num_components + 1)

plt.figure(figsize=(12, 6))
plt.plot(components_range, cumulative_explained_variance[components_range - 1], marker='o')
plt.xlabel('Number of Principal Components')
plt.ylabel('Cumulative Explained Variance')
plt.title('PCA - Cumulative Explained Variance')
plt.xticks(components_range, fontsize=6)
plt.grid(True)
plt.show()

## tSNE

In [None]:
# Compute tSNE
tsne = TSNE(n_components=2, random_state=42)
tsne_result = tsne.fit_transform(data_features)

# Create a scatter plot
plt.figure(figsize=(10, 8))
colors = ['red', 'blue', 'green', 'purple']  # Colors for the 4 classes
class_labels = ['spades', 'hearts', 'diamonds', 'clubs']

for i in range(4):
    # Get the indices of data points with the current class label
    indices = np.where(data_labels == i)[0]
    
    # Scatter plot the t-SNE points with the corresponding class label
    plt.scatter(tsne_result[indices, 0], tsne_result[indices, 1], c=colors[i], label=class_labels[i])

plt.xlabel("t-SNE Component 1")
plt.ylabel("t-SNE Component 2")
plt.title("t-SNE Visualization for 4 Card Suit Classes")
plt.legend(loc='best')
plt.grid(True)
plt.show()

In [None]:
# dimensionality reduction
X_pixels_pca, X_hog_pca = get_PCA([pixel_features, hog_features], n_components=2)[-1]
X_pixels_tsne, X_hog_tsne = get_tsne([pixel_features, hog_features], n_components=2)