In [5]:
import pandas as pd
import numpy as np
import os
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from skimage.io import imread
from skimage.transform import resize
from tqdm import tqdm

# Define constants
IMAGE_SIZE = (64, 64)  # Resize images to this size
RANDOM_STATE = 42

# Paths to files and folders
base_path = '/Users/zhao0725/Desktop/COMP3314/Assignment3/image-classification-challenge'
train_csv_path = os.path.join(base_path, 'train.csv')
test_csv_path = os.path.join(base_path, 'test.csv')
train_ims_path = os.path.join(base_path, 'train_ims')
test_ims_path = os.path.join(base_path, 'test_ims')

# Load training data
train_df = pd.read_csv(train_csv_path)

# Function to load and preprocess images
def load_images(image_folder, image_filenames, image_size):
    images = []
    for filename in tqdm(image_filenames, desc="Loading images"):
        img_path = os.path.join(image_folder, filename)
        img = imread(img_path, as_gray=True)  # Load as grayscale
        img_resized = resize(img, image_size)  # Resize to uniform size
        images.append(img_resized.flatten())  # Flatten to 1D array
    return np.array(images)

# Load training images and labels
train_images = load_images(train_ims_path, train_df['im_name'], IMAGE_SIZE)
train_labels = train_df['label'].values

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    train_images, train_labels, test_size=0.2, random_state=RANDOM_STATE
)

# Create an SVM model pipeline with scaling
svm_model = make_pipeline(StandardScaler(), SVC(kernel='linear', random_state=RANDOM_STATE))

# Train the model
print("Training the SVM model...")
svm_model.fit(X_train, y_train)

# Validate the model
print("Validating the model...")
y_val_pred = svm_model.predict(X_val)
print(classification_report(y_val, y_val_pred))

# Load test images
test_df = pd.read_csv(test_csv_path)
test_images = load_images(test_ims_path, test_df['im_name'], IMAGE_SIZE)

# Make predictions on test images
print("Making predictions on test images...")
test_predictions = svm_model.predict(test_images)

# Add predictions to the test DataFrame
test_df['label'] = test_predictions

# Save the updated test DataFrame
output_csv_path = os.path.join(base_path, 'test_predictions.csv')
test_df.to_csv(output_csv_path, index=False)
print(f"Predictions saved to {output_csv_path}.")


Loading images: 100%|██████████| 50000/50000 [00:55<00:00, 908.70it/s] 


Training the SVM model...
