In [None]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE
import numpy as np

In [None]:
# Load data
digits = load_digits()
X, y = digits.data, digits.target
# For demonstration, artificially create imbalance by reducing class 0 samples
indices_class_0 = np.where(y == 0)[:int(len(np.where(y == 0)) * 0.1)]
indices_other = np.where(y != 0)
idxs = np.concatenate([indices_class_0, indices_other])
X, y = X[idxs], y[idxs]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y)

In [None]:
# Pipeline including SMOTE for balancing and class weights for RandomForest
pipe = ImbPipeline([
    ('scaler', StandardScaler()),
    ('smote', SMOTE(random_state=42)),                # Handle imbalance
    ('pca', PCA(n_components=30)),
    ('selectkbest', SelectKBest(mutual_info_classif, k=15)),
    ('classifier', RandomForestClassifier(class_weight='balanced', random_state=42))
])

In [None]:
param_grid = {
    'pca__n_components': [20, 30, 40],
    'selectkbest__k': [10, 15, 20],
    'classifier__n_estimators': [50, 100],
    'classifier__max_depth': [10, 20, None]
}

In [None]:
search = GridSearchCV(pipe, param_grid, cv=3, scoring='accuracy', verbose=2)
search.fit(X_train, y_train)


In [None]:

best_pipe = search.best_estimator_
pred = best_pipe.predict(X_test)

In [None]:
print("Best Pipeline Parameters:", search.best_params_)
print(classification_report(y_test, pred))