In [1]:
import pandas as pd
import numpy as np
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from skimage.feature import hog
from skimage import exposure
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from skimage.io import imread
from skimage.color import rgb2gray
from skimage import exposure
from skimage import color
from skimage.filters import threshold_otsu
from skimage.filters import threshold_local
from skimage.morphology import binary_erosion
from skimage.measure import regionprops, find_contours
from tensorflow.keras.preprocessing.image import load_img, img_to_array

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn import linear_model

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder

from skimage.transform import resize


2023-07-24 21:45:54.425894: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-07-24 21:45:54.449039: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load the image paths and labels from the DataFrame
df = pd.read_csv('Dataset/cards.csv')

# For column names that contain space, replace the space with an underscore
df.columns = [c.replace(' ', '_') for c in df.columns]

# Add suits column
df['suit'] = df['labels'].str.split().str[-1]

# Remove rows with jokers
df = df[~df['suit'].str.contains('joker', case=False)]

# Remove unwanted columns
df = df.drop(columns = ['data_set'])
df = df.drop(columns = ['class_index'])
df = df.drop(columns = ['labels'])
df = df.drop(columns = ['card_type'])

# Add folder name to the filepath
df['filepaths'] = df['filepaths'].apply(lambda x: 'Dataset/' + x)
df.head()

Unnamed: 0,filepaths,suit
0,Dataset/train/ace of clubs/001.jpg,clubs
1,Dataset/train/ace of clubs/002.jpg,clubs
2,Dataset/train/ace of clubs/003.jpg,clubs
3,Dataset/train/ace of clubs/004.jpg,clubs
4,Dataset/train/ace of clubs/005.jpg,clubs


In [3]:
# Split dataframe into train and test sets, stratified by the 'suit' column
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['suit'], random_state=42)

# Split train set into train and validation sets, stratified by the 'suit' column
train_df, val_df = train_test_split(train_df, test_size=0.2, stratify=train_df['suit'], random_state=42)

# Check class distribution in the train, validation, and test sets
train_class_distribution = train_df['suit'].value_counts(normalize=True)
val_class_distribution = val_df['suit'].value_counts(normalize=True)
test_class_distribution = test_df['suit'].value_counts(normalize=True)

print("Train Set - Class Distribution:")
print(train_class_distribution)

print("Validation Set - Class Distribution:")
print(val_class_distribution)

print("Test Set - Class Distribution:")
print(test_class_distribution)

Train Set - Class Distribution:
spades      0.269171
hearts      0.246010
diamonds    0.243675
clubs       0.241144
Name: suit, dtype: float64
Validation Set - Class Distribution:
spades      0.269261
hearts      0.245914
diamonds    0.243580
clubs       0.241245
Name: suit, dtype: float64
Test Set - Class Distribution:
spades      0.268991
hearts      0.245953
diamonds    0.244085
clubs       0.240971
Name: suit, dtype: float64


In [4]:
# Initialize data generator class

train_data_generator = ImageDataGenerator(
    rescale = 1/255.0,
    rotation_range= 45,
    zoom_range= 0.2,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range= 0.2,
    horizontal_flip=True,
    vertical_flip=True
)

test_data_generator = ImageDataGenerator(rescale = 1/255.0)

# Create data generators for train, validation, and test
batch_size = 32

train_generator = train_data_generator.flow_from_dataframe(
    dataframe = train_df,
    x_col = 'filepaths',
    y_col = 'suit',
    target_size = (224,224),
    batch_size = batch_size,
    class_mode = 'categorical'
)

valid_generator = train_data_generator.flow_from_dataframe(
    dataframe = val_df,
    x_col = 'filepaths',
    y_col = 'suit',
    target_size = (224,224),
    batch_size = batch_size,
    class_mode = 'categorical'
)


test_generator = test_data_generator.flow_from_dataframe(
    dataframe = test_df,
    x_col = 'filepaths',
    y_col = 'suit',
    target_size = (224,224),
    batch_size = batch_size,
    class_mode = 'categorical',
    shuffle = False,
)


Found 5138 validated image filenames belonging to 4 classes.
Found 1285 validated image filenames belonging to 4 classes.
Found 1606 validated image filenames belonging to 4 classes.


In [5]:
# Function to preprocess the image
def preprocess_image(image):
    # Convert the image to grayscale
    grayscale_image = rgb2gray(image)

    # Resize the image to a consistent size (e.g., (224, 224))
    resized_image = resize(grayscale_image, (224, 224))

    return resized_image

# Function to compute HOG features for a single preprocessed image
def compute_hog_features(image):
    # Compute the HOG features for the image
    fd, hog_image = hog(image, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualize=True)
    return fd

# Function to compute HOG features for a batch of images
def compute_batch_hog_features(image_generator):
    hog_features = []
    for _ in range(len(image_generator)):
        image_batch, _ = next(image_generator)  # Get a batch of images (ignore the labels)
        for image in image_batch:
            preprocessed_image = preprocess_image(image)
            hog_features.append(compute_hog_features(preprocessed_image))
    return np.array(hog_features)

In [6]:
# Compute HOG features for train, validation, and test sets
train_hog_features = compute_batch_hog_features(train_generator)
valid_hog_features = compute_batch_hog_features(valid_generator)
test_hog_features = compute_batch_hog_features(test_generator)

In [7]:
# Initialize the Linear Regression model
model = LinearRegression()

In [8]:
# Fit the model on the train HOG features and corresponding labels
model.fit(train_hog_features, train_generator.labels)

In [9]:
# Predict the labels for the validation set
valid_predictions = model.predict(valid_hog_features)

In [10]:
# Round the predicted labels to get the final class predictions
valid_predictions = np.round(valid_predictions)

In [11]:
# Convert the predictions to integers (required by some evaluation metrics)
valid_predictions = valid_predictions.astype(int)

In [13]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# Convert the test labels to integers (required by some evaluation metrics)
test_labels = test_generator.labels

# Predict the labels for the test set
test_predictions = model.predict(test_hog_features)

# Round the predicted labels to get the final class predictions
test_predictions = np.round(test_predictions)

# Convert the predictions to integers (required by some evaluation metrics)
test_predictions = test_predictions.astype(int)

# Calculate evaluation metrics
accuracy = accuracy_score(test_labels, test_predictions)
precision = precision_score(test_labels, test_predictions, average='weighted')
recall = recall_score(test_labels, test_predictions, average='weighted')
f1 = f1_score(test_labels, test_predictions, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Accuracy: 0.2602739726027397
Precision: 0.25972345473214475
Recall: 0.2602739726027397
F1-score: 0.22604073251263854


  _warn_prf(average, modifier, msg_start, len(result))


# Least Squares Classiciation

#### Extract HOG Features

In [None]:
from skimage.feature import hog
from skimage import exposure

# Function to compute HOG features for a single image
def compute_hog_features(image):
    # Compute the HOG features for the image
    fd, hog_image = hog(image, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualize=True)

    return fd

# Function to compute HOG features for a batch of images
def compute_batch_hog_features(image_batch):
    hog_features = []
    for image in image_batch:
        hog_features.append(compute_hog_features(image))
    return np.array(hog_features)

# Compute HOG features for train, validation, and test sets
train_hog_features = compute_batch_hog_features(train_generator)
valid_hog_features = compute_batch_hog_features(valid_generator)
test_hog_features = compute_batch_hog_features(test_generator)


#### Perform Least Squares Classification

In [None]:
from sklearn.linear_model import LinearRegression

# Initialize the Linear Regression model
model = LinearRegression()

# Fit the model on the train HOG features and corresponding labels
model.fit(train_hog_features, train_generator.labels)

# Predict the labels for the validation set
valid_predictions = model.predict(valid_hog_features)

# Round the predicted labels to get the final class predictions
valid_predictions = np.round(valid_predictions)

# Convert the predictions to integers (required by some evaluation metrics)
valid_predictions = valid_predictions.astype(int)


#### Evaluate the model

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Convert the test labels to integers (required by some evaluation metrics)
test_labels = test_generator.labels

# Predict the labels for the test set
test_predictions = model.predict(test_hog_features)

# Round the predicted labels to get the final class predictions
test_predictions = np.round(test_predictions)

# Convert the predictions to integers (required by some evaluation metrics)
test_predictions = test_predictions.astype(int)

# Calculate evaluation metrics
accuracy = accuracy_score(test_labels, test_predictions)
precision = precision_score(test_labels, test_predictions, average='weighted')
recall = recall_score(test_labels, test_predictions, average='weighted')
f1 = f1_score(test_labels, test_predictions, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
