In [None]:
 
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

# 1. Load dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Create pipeline
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('classifier', SVC())
])

# 3. Define parameter grid
param_grid = {
    'pca__n_components': [2, 3],
    'classifier__C': [0.1, 1, 10],
    'classifier__kernel': ['linear', 'rbf']
}

# 4. GridSearchCV
grid = GridSearchCV(pipe, param_grid)
grid.fit(X_train, y_train)

# 5. Results
print("Best parameters found:", grid.best_params_)
print("Best cross-validation score: {:.2f}".format(grid.best_score_))
print("Test set score: {:.2f}".format(grid.score(X_test, y_test)))


Best parameters found: {'classifier__C': 0.1, 'classifier__kernel': 'linear', 'pca__n_components': 3}
Best cross-validation score: 0.96
Test set score: 1.00


Check for 3 fold, 5 fold and 7 fold cross validation

Replace classifier, SVC with RandomForestClassifier and LogisticRegression, Perceptron, knn .

Update the param_grid accordingly (e.g., for RandomForestClassifier, use n_estimators, max_depth, etc.)

Also replace Gridsearch with randomnsearch function.

Relplace with with your own csv dataset using code below:

In [31]:
import pandas as pd
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neighbors import KNeighborsClassifier
import warnings

warnings.filterwarnings("ignore")

#  1. Load custom CSV dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Define classifiers
classifiers = {
    'random_forest': RandomForestClassifier(),
    'log_reg': LogisticRegression(),
    'perceptron': Perceptron(),
    'knn': KNeighborsClassifier()
}

#  3. Define parameter distributions
param_distributions = {
    'random_forest': {
        'pca__n_components': [2, 3],
        'classifier__n_estimators': [50, 100, 150],
        'classifier__max_depth': [3, 5, None]
    },
    'log_reg': {
        'pca__n_components': [2, 3],
        'classifier__C': [0.01, 0.1, 1, 10]
    },
    'perceptron': {
        'pca__n_components': [2, 3],
        'classifier__penalty': [None, 'l2'],
        'classifier__max_iter': [500, 1000, 2000],
        'classifier__tol': [1e-3, 1e-4]
    },
    'knn': {
        'pca__n_components': [2, 3],
        'classifier__n_neighbors': [3, 5, 7, 9],
        'classifier__weights': ['uniform', 'distance']
    }
}

#  4. Run randomized search across different cross-validation splits
for cv in [3, 5, 7]:
    print(f"\n Cross-validation: {cv}-fold")
    for name, clf in classifiers.items():
        if name in param_distributions:
            print(f"\n Randomized search for: {name}")
            pipe = Pipeline([
                ('scaler', StandardScaler()),
                ('pca', PCA()),
                ('classifier', clf)
            ])
            random_search = RandomizedSearchCV(
                pipe,
                param_distributions[name],
                n_iter=10,
                cv=cv,
                random_state=42
            )
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
            random_search.fit(X_train, y_train)

            print(f"Best params: {random_search.best_params_}")
            print(f"Best CV score: {random_search.best_score_:.2f}")
            print(f"Test score: {random_search.score(X_test, y_test):.2f}")
        else:
            print(f"Skipping {name} — no parameter distribution defined.")


 Cross-validation: 3-fold

 Randomized search for: random_forest
Best params: {'pca__n_components': 3, 'classifier__n_estimators': 50, 'classifier__max_depth': None}
Best CV score: 0.96
Test score: 1.00

 Randomized search for: log_reg
Best params: {'pca__n_components': 3, 'classifier__C': 1}
Best CV score: 0.96
Test score: 1.00

 Randomized search for: perceptron
Best params: {'pca__n_components': 3, 'classifier__tol': 0.001, 'classifier__penalty': 'l2', 'classifier__max_iter': 1000}
Best CV score: 0.90
Test score: 0.90

 Randomized search for: knn
Best params: {'pca__n_components': 3, 'classifier__weights': 'uniform', 'classifier__n_neighbors': 9}
Best CV score: 0.97
Test score: 1.00

 Cross-validation: 5-fold

 Randomized search for: random_forest
Best params: {'pca__n_components': 3, 'classifier__n_estimators': 100, 'classifier__max_depth': None}
Best CV score: 0.93
Test score: 1.00

 Randomized search for: log_reg
Best params: {'pca__n_components': 3, 'classifier__C': 1}
Best CV 

In [32]:
import pandas as pd


data = pd.read_csv("Iris.csv")

# Separate features and target
X = data.drop(["Id", "Species"], axis=1)
y = data["Species"]

In [33]:
import pandas as pd
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neighbors import KNeighborsClassifier
import warnings

warnings.filterwarnings("ignore")

#  1. Load custom CSV dataset


# 2. Define classifiers
classifiers = {
    'random_forest': RandomForestClassifier(),
    'log_reg': LogisticRegression(),
    'perceptron': Perceptron(),
    'knn': KNeighborsClassifier()
}

#  3. Define parameter distributions
param_distributions = {
    'random_forest': {
        'pca__n_components': [2, 3],
        'classifier__n_estimators': [50, 100, 150],
        'classifier__max_depth': [3, 5, None]
    },
    'log_reg': {
        'pca__n_components': [2, 3],
        'classifier__C': [0.01, 0.1, 1, 10]
    },
    'perceptron': {
        'pca__n_components': [2, 3],
        'classifier__penalty': [None, 'l2'],
        'classifier__max_iter': [500, 1000, 2000],
        'classifier__tol': [1e-3, 1e-4]
    },
    'knn': {
        'pca__n_components': [2, 3],
        'classifier__n_neighbors': [3, 5, 7, 9],
        'classifier__weights': ['uniform', 'distance']
    }
}

#  4. Run randomized search across different cross-validation splits
for cv in [3, 5, 7]:
    print(f"\n Cross-validation: {cv}-fold")
    for name, clf in classifiers.items():
        if name in param_distributions:
            print(f"\n Randomized search for: {name}")
            pipe = Pipeline([
                ('scaler', StandardScaler()),
                ('pca', PCA()),
                ('classifier', clf)
            ])
            random_search = RandomizedSearchCV(
                pipe,
                param_distributions[name],
                n_iter=10,
                cv=cv,
                random_state=42
            )
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
            random_search.fit(X_train, y_train)

            print(f"Best params: {random_search.best_params_}")
            print(f"Best CV score: {random_search.best_score_:.2f}")
            print(f"Test score: {random_search.score(X_test, y_test):.2f}")
        else:
            print(f"Skipping {name} — no parameter distribution defined.")


 Cross-validation: 3-fold

 Randomized search for: random_forest
Best params: {'pca__n_components': 3, 'classifier__n_estimators': 50, 'classifier__max_depth': None}
Best CV score: 0.92
Test score: 0.97

 Randomized search for: log_reg
Best params: {'pca__n_components': 3, 'classifier__C': 1}
Best CV score: 0.96
Test score: 1.00

 Randomized search for: perceptron
Best params: {'pca__n_components': 3, 'classifier__tol': 0.001, 'classifier__penalty': 'l2', 'classifier__max_iter': 1000}
Best CV score: 0.90
Test score: 1.00

 Randomized search for: knn
Best params: {'pca__n_components': 3, 'classifier__weights': 'uniform', 'classifier__n_neighbors': 9}
Best CV score: 0.97
Test score: 1.00

 Cross-validation: 5-fold

 Randomized search for: random_forest
Best params: {'pca__n_components': 3, 'classifier__n_estimators': 100, 'classifier__max_depth': None}
Best CV score: 0.94
Test score: 1.00

 Randomized search for: log_reg
Best params: {'pca__n_components': 3, 'classifier__C': 1}
Best CV 