<a href="https://colab.research.google.com/github/Bensonsoh1189/IE4483/blob/main/PCA_SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# === STEP 0: UPLOAD & EXTRACT DATASET ZIP ===
from google.colab import files
import zipfile
import os

# Upload ZIP
print("📁 Please upload your dataset ZIP file...")
uploaded = files.upload()

# Define the ZIP name and extraction path dynamically
ZIP_NAME = list(uploaded.keys())[0]  # Get the uploaded file name
EXTRACT_PATH = "/content/dataset"

if ZIP_NAME:
    with zipfile.ZipFile(ZIP_NAME, 'r') as zip_ref:
        zip_ref.extractall(EXTRACT_PATH)
    print(f"✅ Extracted to {EXTRACT_PATH}")
else:
    print("❌ ZIP file not found. Please re-upload.")

# Confirm structure
print("📂 Class folders in train:", os.listdir(os.path.join(EXTRACT_PATH, "train")))

# === STEP 1: LOAD DATASETS ===
import cv2
import numpy as np

def load_dataset(data_path):
    X = []
    y = []
    class_labels = sorted(os.listdir(data_path))  # Sort class folders alphabetically

    for label_index, class_name in enumerate(class_labels):
        class_path = os.path.join(data_path, class_name)
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (224, 224))  # Resize image to 224x224
            X.append(img.flatten())  # Flatten the image
            y.append(label_index)  # Append corresponding label index

    return np.array(X), np.array(y), class_labels

# Dynamically load the datasets from the extracted paths
X_train, y_train, class_names = load_dataset(os.path.join(EXTRACT_PATH, "train"))
X_val, y_val, _ = load_dataset(os.path.join(EXTRACT_PATH, "val"))
X_test, y_test, _ = load_dataset(os.path.join(EXTRACT_PATH, "test"))

print("✅ Data loaded:")
print("Train shape:", X_train.shape)
print("Val shape:", X_val.shape)
print("Test shape:", X_test.shape)

# === STEP 2: STANDARDIZE DATA ===
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# === STEP 3: APPLY PCA ===
from sklearn.decomposition import PCA

pca = PCA(n_components=0.95, svd_solver='full')  # PCA with 95% variance retained
X_train_pca = pca.fit_transform(X_train_scaled)
X_val_pca = pca.transform(X_val_scaled)
X_test_pca = pca.transform(X_test_scaled)

print("✅ PCA applied. Components used:", pca.n_components_)

# === STEP 4: TRAIN SVM ===
from sklearn.svm import SVC

svm = SVC(kernel='rbf', C=1, gamma='scale')  # SVM with RBF kernel
svm.fit(X_train_pca, y_train)
print("✅ SVM model trained.")

# === STEP 5: EVALUATE MODEL ===
from sklearn.metrics import classification_report, accuracy_score

# Validation
val_preds = svm.predict(X_val_pca)
print("\n📊 Validation Results:")
print(classification_report(y_val, val_preds, target_names=class_names))

# Test
test_preds = svm.predict(X_test_pca)
print("\n📊 Test Results:")
print(classification_report(y_test, test_preds, target_names=class_names))
print("✅ Test Accuracy:", accuracy_score(y_test, test_preds))


📁 Please upload your dataset ZIP file...


Saving augmented_dataset.zip to augmented_dataset (3).zip
✅ Extracted to /content/dataset
📂 Class folders in train: ['Sherryn', 'Eris', 'Poon min', 'Benson', 'James', 'Akshith', 'Rishita']
✅ Data loaded:
Train shape: (1260, 50176)
Val shape: (70, 50176)
Test shape: (70, 50176)
✅ PCA applied. Components used: 178
✅ SVM model trained.

📊 Validation Results:
              precision    recall  f1-score   support

     Akshith       1.00      0.70      0.82        10
      Benson       0.83      1.00      0.91        10
        Eris       0.90      0.90      0.90        10
       James       0.82      0.90      0.86        10
    Poon min       1.00      0.80      0.89        10
     Rishita       1.00      1.00      1.00        10
     Sherryn       0.83      1.00      0.91        10

    accuracy                           0.90        70
   macro avg       0.91      0.90      0.90        70
weighted avg       0.91      0.90      0.90        70


📊 Test Results:
              precision    r

In [None]:
import joblib
from google.colab import files

# Save
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(pca, 'pca.pkl')
joblib.dump(svm, 'svm_model.pkl')
joblib.dump(class_names, 'class_names.pkl')

# Download to local machine
files.download('scaler.pkl')
files.download('pca.pkl')
files.download('svm_model.pkl')
files.download('class_names.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import cv2
import numpy as np
import joblib
