In [None]:
# COVID X-ray Classification Project Workflow (ML + DL)

# 1. Load & Preprocess Data
# ---------------------------------------------------
# - Load X-ray images and labels
# - Resize to fixed shape (e.g., 299x299)
# - Apply CLAHE, subtract mask
# - Normalize pixel values (0-1)
# - Convert to numpy arrays

# 2. Encode Labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_encoded = le.fit_transform(y_labels)

# 3. Feature Extraction for ML
from tensorflow.keras.applications import VGG16
model = VGG16(weights='imagenet', include_top=False, input_shape=(299,299,3))
features = model.predict(X_images)
features = features.reshape(features.shape[0], -1)

# 4. Train-Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    features, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42)

# 5. Class Imbalance Fix (only on training data)
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=42)
X_train_bal, y_train_bal = sm.fit_resample(X_train, y_train)

# 6. ML Modeling + Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from skopt import BayesSearchCV
from sklearn.svm import SVC

model = SVC()

# GridSearch
param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')

# RandomSearch
param_dist = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
random_search = RandomizedSearchCV(model, param_dist, cv=5, n_iter=6, scoring='accuracy')

# BayesSearch
search_spaces = {'C': (0.1, 50.0), 'kernel': ['linear', 'rbf']}
bayes_search = BayesSearchCV(model, search_spaces, n_iter=10, cv=5, scoring='accuracy')

# Fit
# grid_search.fit(X_train_bal, y_train_bal)
# random_search.fit(X_train_bal, y_train_bal)
# bayes_search.fit(X_train_bal, y_train_bal)

# 7. Evaluate Models
# print(grid_search.best_params_)
# print(random_search.best_params_)
# print(bayes_search.best_params_)
# Predict, Confusion Matrix, Accuracy

# 8. Deep Learning Path (Instead of ML)
# - Use data augmentation on minority classes
# - Fine-tune pretrained CNN (EfficientNet, ResNet, etc.)
# - Evaluate on test set
# - Compare results to ML

# 9. Save Best Model
# from joblib import dump
# dump(grid_search.best_estimator_, 'best_model.joblib')

# OR (DL)
# model.save("best_dl_model.h5")
