## **Implement Basic ML Algorithm for Classification**
**Dataset from BlackBoard:** Facial Emotion Recognition dataset

**File:** facial-emotion-recognition.zip (432.768 MB)

In [6]:
# Step 1: Define the path to your local dataset
dataset_path = "/Users/thomas/Desktop/IT3212/assignment_4/thomas/facial-emotion-recognition/images"  # Replace this with the actual path to your dataset

In [2]:
import numpy as np
import cv2
from skimage.feature import local_binary_pattern
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
import os
from sklearn.datasets import load_files

# Function to extract LBP features
def extract_lbp_features(image, radius=1, n_points=32):
    """
    Extract LBP features from an image.
    
    :param image: Input image (grayscale)
    :param radius: The radius of the circular neighborhood for LBP
    :param n_points: Number of points to consider for each neighborhood
    :return: Histogram of LBP features
    """
    lbp = local_binary_pattern(image, n_points, radius, method="uniform")
    # Calculate the histogram of LBP values
    n_bins = int(lbp.max() + 1)
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=n_bins, range=(0, n_bins))
    lbp_hist = lbp_hist.astype("float")
    lbp_hist /= (lbp_hist.sum() + 1e-6)  # Normalize the histogram
    return lbp_hist

# Function to load image data from a folder structure
def load_images_from_folder(folder_path):
    """
    Loads images and their labels from a folder.
    
    :param folder_path: Path to the folder containing subfolders of images
    :return: List of images and corresponding labels
    """
    images = []
    labels = []
    for label in os.listdir(folder_path):
        folder = os.path.join(folder_path, label)
        if os.path.isdir(folder):
            for filename in os.listdir(folder):
                img_path = os.path.join(folder, filename)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    images.append(img)
                    labels.append(label)
    return images, labels

# Load dataset (replace with your dataset path)
images, labels = load_images_from_folder(dataset_path)

# Extract LBP features from all images
lbp_features = []
for img in images:
    lbp_hist = extract_lbp_features(img)
    lbp_features.append(lbp_hist)

lbp_features = np.array(lbp_features)

# Convert labels to numerical form
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(lbp_features, labels, test_size=0.3, random_state=42)

# Normalize the features (important for SVM performance)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train SVM classifier
svm = SVC(kernel='linear')  # You can try other kernels like 'rbf' or 'poly'
svm.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm.predict(X_test)

# Evaluate the classifier
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


Classification Report:
              precision    recall  f1-score   support

       anger       0.17      0.25      0.20         4
    contempt       0.00      0.00      0.00         9
     disgust       0.08      0.14      0.11         7
        fear       0.00      0.00      0.00         6
       happy       0.00      0.00      0.00         6
     neutral       0.00      0.00      0.00         3
         sad       0.00      0.00      0.00         5
   surprised       0.00      0.00      0.00         6

    accuracy                           0.04        46
   macro avg       0.03      0.05      0.04        46
weighted avg       0.03      0.04      0.03        46



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [7]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from skimage.feature import hog, local_binary_pattern, blob_doh
from skimage import io, color, transform
from skimage.util import img_as_ubyte
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from torchvision import datasets, transforms

# Step 1: Load dataset
#dataset_path = "/path/to/your/local/dataset"  # Replace this with the actual path to your dataset

# Assuming images are organized by class in folders
full_dataset = datasets.ImageFolder(root=dataset_path, transform=transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to a uniform size
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale for better feature extraction (optional)
    transforms.ToTensor(),  # Convert images to tensor
]))

# Convert images to numpy arrays for processing
def extract_hog_features(image):
    """Extract HOG features from a given image."""
    image = img_as_ubyte(image)  # Convert image to uint8 for HOG
    features, _ = hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True)
    return features

def extract_lbp_features(image):
    """Extract LBP features from a given image."""
    radius = 3
    n_points = 24
    lbp = local_binary_pattern(image, n_points, radius, method="uniform")
    return lbp.ravel()  # Flatten to 1D

def extract_blob_features(image):
    """Extract blob features from a given image."""
    blobs = blob_doh(image, max_sigma=30, threshold=0.1)
    return blobs.ravel()  # Flatten to 1D

# Step 2: Extract features from all images
X = []
y = []

for img, label in full_dataset.imgs:
    image = io.imread(img)
    gray_image = color.rgb2gray(image)  # Convert image to grayscale

    # Choose one of the feature extraction methods:
    features = extract_hog_features(gray_image)  # or extract_lbp_features or extract_blob_features
    X.append(features)
    y.append(label)

X = np.array(X)
y = np.array(y)

# Step 3: Split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Scaling features and training SVM
# Normalize the feature data (important for SVM)
scaler = StandardScaler()

# Create an SVM pipeline with scaling and an RBF kernel
svm_model = make_pipeline(scaler, SVC(kernel='rbf', class_weight='balanced'))

# Step 5: Hyperparameter tuning with GridSearchCV
param_grid = {
    'svc__C': [0.1, 1, 10],  # Regularization parameter
    'svc__gamma': ['scale', 'auto'],  # Kernel coefficient
}

grid_search = GridSearchCV(svm_model, param_grid, cv=3, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Step 6: Evaluate on test set
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

print(f"Best hyperparameters: {grid_search.best_params_}")
print("Classification Report on Test Set:")
print(classification_report(y_test, y_pred))

# Step 7: Plot Accuracy (optional)
# Since SVM doesn't train in epochs, we'll skip plotting per-epoch accuracy but can plot the performance on the test set

# (If you want to visualize the impact of hyperparameter tuning, you can plot grid search results)
results = grid_search.cv_results_
mean_test_scores = results['mean_test_score']

plt.figure(figsize=(10, 6))
plt.plot(mean_test_scores)
plt.title("Hyperparameter Search: Mean Test Scores")
plt.xlabel("Parameter combination index")
plt.ylabel("Mean Accuracy")
plt.show()


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (152,) + inhomogeneous part.