# Cricket Image Classification: Feature Extraction & Model Training

This notebook demonstrates the complete pipeline for classifying cricket image cells (Ball, Bat, Stump, Background).

## Steps:
1.  **Setup**: Install dependencies.
2.  **Data Loading**: Load images and labels.
3.  **Feature Visualization**: Inspect HOG, Color, and Shape features.
4.  **Feature Extraction**: Build the feature matrix.
5.  **Model Training**: Train SVM, Random Forest, and MLP classifiers.
6.  **Evaluation**: Analyze performance metrics.

## 1. Setup & Dependencies

In [None]:
!pip install scikit-image opencv-python pandas matplotlib scikit-learn

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.feature import hog
from skimage import exposure
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns

# Configuration
IMG_WIDTH = 800
IMG_HEIGHT = 600
GRID_ROWS = 8
GRID_COLS = 8
CELL_W = IMG_WIDTH // GRID_COLS
CELL_H = IMG_HEIGHT // GRID_ROWS

# Paths (Adjust if using Google Drive)
PROCESSED_DIR = "processed_images"
LABELS_FILE = "labels.csv"

## 2. Data Loading
Ensure `processed_images` folder and `labels.csv` are uploaded to the Colab environment.

In [None]:
def load_data(labels_file, image_dir):
    if not os.path.exists(labels_file):
        print(f"Error: {labels_file} not found.")
        return None
    
    df = pd.read_csv(labels_file)
    print(f"Loaded labels for {len(df)} images.")
    return df

df = load_data(LABELS_FILE, PROCESSED_DIR)
df.head()

## 3. Feature Visualization
Let's inspect the features for a single cell to understand what the model sees.

In [None]:
def visualize_features(image_name, row_idx, col_idx):
    img_path = os.path.join(PROCESSED_DIR, image_name)
    if not os.path.exists(img_path):
        print(f"Image {image_name} not found.")
        return
        
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Extract Cell
    x1 = col_idx * CELL_W
    y1 = row_idx * CELL_H
    x2 = x1 + CELL_W
    y2 = y1 + CELL_H
    cell = img[y1:y2, x1:x2]
    
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    
    # 1. Original
    axes[0, 0].imshow(cell)
    axes[0, 0].set_title("Original Cell")
    axes[0, 0].axis('off')
    
    # 2. HOG
    fd, hog_image = hog(cell, orientations=9, pixels_per_cell=(8, 8),
                        cells_per_block=(2, 2), visualize=True, channel_axis=-1)
    hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))
    axes[0, 1].imshow(hog_image_rescaled, cmap='gray')
    axes[0, 1].set_title("HOG Features")
    axes[0, 1].axis('off')
    
    # 3. Color Histogram
    axes[0, 2].set_title("Color Histogram")
    colors = ('r', 'g', 'b')
    for i, color in enumerate(colors):
        hist = cv2.calcHist([cell], [i], None, [32], [0, 256])
        axes[0, 2].plot(hist, color=color)
        
    # 4. Edge Detection
    gray = cv2.cvtColor(cell, cv2.COLOR_RGB2GRAY)
    edges = cv2.Canny(gray, 50, 150)
    axes[1, 0].imshow(edges, cmap='gray')
    axes[1, 0].set_title("Canny Edges")
    axes[1, 0].axis('off')
    
    # 5. Hough Lines
    lines_img = cell.copy()
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=30, minLineLength=20, maxLineGap=10)
    if lines is not None:
        for line in lines:
            x1_l, y1_l, x2_l, y2_l = line[0]
            cv2.line(lines_img, (x1_l, y1_l), (x2_l, y2_l), (0, 255, 0), 2)
    axes[1, 1].imshow(lines_img)
    axes[1, 1].set_title("Hough Lines")
    axes[1, 1].axis('off')
    
    # 6. Hough Circles
    circles_img = cell.copy()
    circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, dp=1.2, minDist=20,
                               param1=50, param2=30, minRadius=5, maxRadius=50)
    if circles is not None:
        circles = np.uint16(np.around(circles))
        for i in circles[0, :]:
            cv2.circle(circles_img, (i[0], i[1]), i[2], (0, 255, 0), 2)
            cv2.circle(circles_img, (i[0], i[1]), 2, (255, 0, 0), 3)
    axes[1, 2].imshow(circles_img)
    axes[1, 2].set_title("Hough Circles")
    axes[1, 2].axis('off')
    
    plt.tight_layout()
    plt.show()

# Visualize a sample cell (Row 4, Col 4 of first image)
if df is not None and not df.empty:
    visualize_features(df.iloc[0]['ImageFileName'], 3, 3)

## 4. Feature Extraction
We will now iterate through all images and extract features for every labeled cell.

In [None]:
def extract_features(df):
    features_list = []
    labels_list = []
    
    print("Starting feature extraction...")
    
    for idx, row in df.iterrows():
        img_name = row['ImageFileName']
        img_path = os.path.join(PROCESSED_DIR, img_name)
        
        if not os.path.exists(img_path):
            continue
            
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        for i in range(64):
            label = row[f"c{i+1:02d}"]
            if label == 0: # Skip background/unlabeled if desired, or keep as class 0
                # For this task, let's keep 0 as 'Background' class
                pass
                
            r = i // GRID_COLS
            c = i % GRID_COLS
            
            x1 = c * CELL_W
            y1 = r * CELL_H
            x2 = x1 + CELL_W
            y2 = y1 + CELL_H
            
            cell = img[y1:y2, x1:x2]
            
            # --- Feature 1: HOG ---
            fd = hog(cell, orientations=9, pixels_per_cell=(8, 8),
                     cells_per_block=(2, 2), visualize=False, channel_axis=-1)
            
            # --- Feature 2: Color Histogram ---
            hist_features = []
            for ch in range(3):
                hist = cv2.calcHist([cell], [ch], None, [32], [0, 256])
                hist = cv2.normalize(hist, hist).flatten()
                hist_features.extend(hist)
                
            # --- Feature 3: Shape Counts ---
            gray = cv2.cvtColor(cell, cv2.COLOR_RGB2GRAY)
            edges = cv2.Canny(gray, 50, 150)
            
            lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=30, minLineLength=20, maxLineGap=10)
            num_lines = len(lines) if lines is not None else 0
            
            circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, dp=1.2, minDist=20,
                                       param1=50, param2=30, minRadius=5, maxRadius=50)
            num_circles = len(circles[0, :]) if circles is not None else 0
            
            # Combine
            combined = np.concatenate([fd, hist_features, [num_lines, num_circles]])
            
            features_list.append(combined)
            labels_list.append(label)
            
        if (idx + 1) % 5 == 0:
            print(f"Processed {idx + 1} images...")
            
    return np.array(features_list), np.array(labels_list)

X, y = extract_features(df)
print(f"Feature Matrix Shape: {X.shape}")
print(f"Labels Shape: {y.shape}")

## 5. Model Training
We will split the data and train multiple classifiers.

In [None]:
# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Train set: {X_train.shape}")
print(f"Test set: {X_test.shape}")

In [None]:
# 1. Support Vector Machine
print("Training SVM...")
svm_model = SVC(kernel='rbf', C=10, gamma='scale')
svm_model.fit(X_train_scaled, y_train)
svm_pred = svm_model.predict(X_test_scaled)
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))

In [None]:
# 2. Random Forest
print("Training Random Forest...")
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
rf_pred = rf_model.predict(X_test_scaled)
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))

In [None]:
# 3. MLP (Neural Network)
print("Training MLP...")
mlp_model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
mlp_model.fit(X_train_scaled, y_train)
mlp_pred = mlp_model.predict(X_test_scaled)
print("MLP Accuracy:", accuracy_score(y_test, mlp_pred))

## 6. Evaluation
Visualizing the performance of the best model (likely Random Forest or SVM).

In [None]:
def plot_confusion_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=['None', 'Ball', 'Bat', 'Stump'], 
                yticklabels=['None', 'Ball', 'Bat', 'Stump'])
    plt.title(title)
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()

print("SVM Classification Report:")
print(classification_report(y_test, svm_pred, target_names=['None', 'Ball', 'Bat', 'Stump']))
plot_confusion_matrix(y_test, svm_pred, "SVM Confusion Matrix")

print("Random Forest Classification Report:")
print(classification_report(y_test, rf_pred, target_names=['None', 'Ball', 'Bat', 'Stump']))
plot_confusion_matrix(y_test, rf_pred, "Random Forest Confusion Matrix")