In [None]:
# =============================
# 0. Mount Drive & Imports
# =============================
from google.colab import drive
drive.mount('/content/drive')

In [None]:

import pandas as pd
import numpy as np
import os, cv2, tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Directories
base_dir = "/content/drive/MyDrive/mini_proj_data/"
cache_dir = os.path.join(base_dir, "cache/")
os.makedirs(cache_dir, exist_ok=True)

# Try RAPIDS cuML for GPU PCA/FA
use_cuml = False
try:
    from cuml.decomposition import PCA as cuPCA
    from cuml.decomposition import FactorAnalysis as cuFA
    import cupy as cp
    use_cuml = True
    print("✅ cuML detected → GPU accelerated PCA & FactorAnalysis will be used.")
except ImportError:
    from sklearn.decomposition import PCA, FactorAnalysis
    print("⚠️ cuML not available → Falling back to sklearn (CPU).")

In [None]:



# =============================
# 1. Load Data
# =============================
df = pd.read_csv(os.path.join(base_dir, "processed_data.csv"))
print("Data loaded:", df.shape)

X = df['clean_path']
y = df['label'].values

# =============================
# 2. Preprocess Images (with caching)
# =============================
img_cache = os.path.join(cache_dir, "X_images.npy")

if os.path.exists(img_cache):
    print("🔄 Loading preprocessed images from cache...")
    X_images = np.load(img_cache)
else:
    print("⚡ Preprocessing images...")
    def load_and_preprocess_image(path, target_size=(224,224)):
        img = cv2.imread(path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, target_size)
        img = img / 255.0
        return img.astype('float32')

    X_images = np.array([load_and_preprocess_image(p) for p in X])
    # Standardization
    mean = np.mean(X_images, axis=(0,1,2), keepdims=True)
    std  = np.std(X_images, axis=(0,1,2), keepdims=True)
    X_images = (X_images - mean) / (std + 1e-7)
    np.save(img_cache, X_images)

print("✅ Images ready:", X_images.shape)

# =============================
# 3. Define CNN Feature Extractors
# =============================
from tensorflow.keras import Model, regularizers
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dropout, Input

def build_cnn1(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv2D(32,(3,3),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(inputs)
    x = MaxPooling2D((2,2))(x)
    x = Conv2D(64,(3,3),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(x)
    x = MaxPooling2D((2,2))(x)
    x = Conv2D(128,(3,3),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(x)
    x = MaxPooling2D((2,2))(x)
    x = Conv2D(256,(3,3),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(x)
    x = MaxPooling2D((2,2))(x)
    x = Flatten()(x); x = Dropout(0.5)(x)
    return Model(inputs, x, name="CNN1")

def build_cnn2(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv2D(256,(7,7),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(inputs)
    x = AveragePooling2D((2,2))(x)
    x = Conv2D(128,(5,5),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(x)
    x = AveragePooling2D((2,2))(x)
    x = Conv2D(96,(3,3),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(x)
    x = AveragePooling2D((2,2))(x)
    x = Conv2D(96,(3,3),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(x)
    x = AveragePooling2D((2,2))(x)
    x = Flatten()(x); x = Dropout(0.5)(x)
    return Model(inputs, x, name="CNN2")

def build_cnn3(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv2D(32,(3,3),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(inputs)
    x = MaxPooling2D((2,2))(x)
    x = Conv2D(96,(5,5),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(x)
    x = MaxPooling2D((2,2))(x)
    x = Conv2D(128,(5,5),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(x)
    x = MaxPooling2D((2,2))(x)
    x = Conv2D(256,(7,7),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(x)
    x = MaxPooling2D((2,2))(x)
    x = Flatten()(x); x = Dropout(0.5)(x)
    return Model(inputs, x, name="CNN3")

def build_cnn4(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv2D(32,(3,3),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(inputs)
    x = AveragePooling2D((3,3))(x)
    x = Conv2D(32,(3,3),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(x)
    x = AveragePooling2D((5,5))(x)
    x = Conv2D(64,(5,5),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(x)
    x = AveragePooling2D((3,3))(x)
    x = Conv2D(128,(5,5),activation='relu',padding="same",kernel_regularizer=regularizers.l2(1e-4))(x)
    x = AveragePooling2D((3,3))(x)
    x = Flatten()(x); x = Dropout(0.5)(x)
    return Model(inputs, x, name="CNN4")

input_shape = (224,224,3)
CNN1, CNN2, CNN3, CNN4 = build_cnn1(input_shape), build_cnn2(input_shape), build_cnn3(input_shape), build_cnn4(input_shape)

# =============================
# 4. Feature Extraction (with caching)
# =============================
def get_or_compute_features(model, X_images, name):
    cache_file = os.path.join(cache_dir, f"{name}.npy")
    if os.path.exists(cache_file):
        print(f"🔄 Loading cached {name} features...")
        return np.load(cache_file)
    print(f"⚡ Extracting {name} features...")
    with tf.device('/GPU:0'):
        feats = model.predict(X_images, batch_size=128, verbose=1)
    np.save(cache_file, feats)
    return feats

features_cnn1 = get_or_compute_features(CNN1, X_images, "features_cnn1")
features_cnn2 = get_or_compute_features(CNN2, X_images, "features_cnn2")
features_cnn3 = get_or_compute_features(CNN3, X_images, "features_cnn3")
features_cnn4 = get_or_compute_features(CNN4, X_images, "features_cnn4")

# =============================
# 5. Dimensionality Reduction (with caching)
# =============================
merged_cache = os.path.join(cache_dir, "merged_features.npy")

if os.path.exists(merged_cache):
    print("🔄 Loading merged PCA+FA features from cache...")
    merged_features = np.load(merged_cache)
else:
    print("Applying PCA on CNN1+CNN2...")
    all_features_dual1 = np.concatenate([features_cnn1, features_cnn2], axis=1)
    if use_cuml:
        pca = cuPCA(n_components=50)
        features_pca = pca.fit_transform(cp.asarray(all_features_dual1))
        features_pca = cp.asnumpy(features_pca)
    else:
        pca = PCA(n_components=50)
        features_pca = pca.fit_transform(all_features_dual1)

    print("Applying FA on CNN3+CNN4...")
    all_features_dual2 = np.concatenate([features_cnn3, features_cnn4], axis=1)
    if use_cuml:
        fa = cuFA(n_components=50, random_state=42)
        features_fa = fa.fit_transform(cp.asarray(all_features_dual2))
        features_fa = cp.asnumpy(features_fa)
    else:
        fa = FactorAnalysis(n_components=50, random_state=42)
        features_fa = fa.fit_transform(all_features_dual2)

    merged_features = np.concatenate([features_pca, features_fa], axis=1)
    np.save(merged_cache, merged_features)

print("✅ Features ready:", merged_features.shape)

# =============================
# 6. Train/Val/Test Split
# =============================
X_train, X_temp, y_train, y_temp = train_test_split(
    merged_features, y, test_size=0.3, random_state=42, stratify=y
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=1/3, random_state=42, stratify=y_temp
)

print("Train:", X_train.shape, y_train.shape)
print("Val:  ", X_val.shape, y_val.shape)
print("Test: ", X_test.shape, y_test.shape)

# =============================
# 7. Classifier Training
# =============================
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

model_path = os.path.join(base_dir, "saved_models/dual_cnn_classifier.h5")
os.makedirs(os.path.dirname(model_path), exist_ok=True)

if os.path.exists(model_path):
    print("🔄 Loading saved classifier model...")
    from tensorflow.keras.models import load_model
    model = load_model(model_path)
else:
    print("⚡ Training new classifier...")
    model = Sequential([
        Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.2),
        Dense(128, activation='relu'),
        Dropout(0.2),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(7, activation='softmax')
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=80,
        batch_size=128,
        callbacks=[early_stop],
        verbose=1
    )
    model.save(model_path)
    print(f"💾 Model saved at {model_path}")
