<a href="https://colab.research.google.com/github/Raghav1209gb/ICP/blob/main/Sai_Raghavendhar_Chitta_ICP_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

# 1. Load dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Create pipeline
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('classifier', SVC())
])

# 3. Define parameter grid
param_grid = {
    'pca__n_components': [2, 3],
    'classifier__C': [0.1, 1, 10],
    'classifier__kernel': ['linear', 'rbf']
}

# 4. GridSearchCV
grid = GridSearchCV(pipe, param_grid)
grid.fit(X_train, y_train)

# 5. Results
print("Best parameters found:", grid.best_params_)
print("Best cross-validation score: {:.2f}".format(grid.best_score_))
print("Test set score: {:.2f}".format(grid.score(X_test, y_test)))


Best parameters found: {'classifier__C': 0.1, 'classifier__kernel': 'linear', 'pca__n_components': 3}
Best cross-validation score: 0.96
Test set score: 1.00


Check for 3 fold, 5 fold and 7 fold cross validation

Replace classifier, SVC with RandomForestClassifier and LogisticRegression, Perceptron, knn .

Update the param_grid accordingly (e.g., for RandomForestClassifier, use n_estimators, max_depth, etc.)

Also replace Gridsearch with randomnsearch function.

Relplace with with your own csv dataset using code below:

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score


#dataset
data = pd.read_csv("Iris.csv")
X = data.drop("Species", axis=1)
y = data["Species"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Models and their parameter grids
models = {
    "RandomForest": (RandomForestClassifier(), {
        "classifier__n_estimators": [50, 100, 200],
        "classifier__max_depth": [None, 10, 20]
    }),
    "LogisticRegression": (LogisticRegression(max_iter=1000), {
        "classifier__C": [0.1, 1, 10],
        "classifier__solver": ['lbfgs']
    }),
    "Perceptron": (Perceptron(), {
        "classifier__alpha": [0.0001, 0.001],
        "classifier__max_iter": [500, 1000]
    }),
    "KNN": (KNeighborsClassifier(), {
        "classifier__n_neighbors": [3, 5, 7],
        "classifier__weights": ['uniform', 'distance']
    })
}

#CV folds
for model_name, (model, params) in models.items():
    print(f"\n--- {model_name} ---")
    for cv in [3, 5, 7]:
        pipe = Pipeline([
            ('scaler', StandardScaler()),
            ('pca', PCA(n_components=3)),
            ('classifier', model)
        ])
        search = RandomizedSearchCV(pipe, params, n_iter=5, cv=cv, scoring='accuracy', random_state=42, n_jobs=-1)
        search.fit(X_train, y_train)

        print(f"{cv}-Fold CV | Best Score: {search.best_score_:.2f} | Test Accuracy: {search.score(X_test, y_test):.2f}")




--- RandomForest ---
3-Fold CV | Best Score: 0.98 | Test Accuracy: 1.00
5-Fold CV | Best Score: 0.98 | Test Accuracy: 1.00
7-Fold CV | Best Score: 0.99 | Test Accuracy: 1.00

--- LogisticRegression ---
3-Fold CV | Best Score: 1.00 | Test Accuracy: 1.00




5-Fold CV | Best Score: 1.00 | Test Accuracy: 1.00




7-Fold CV | Best Score: 1.00 | Test Accuracy: 1.00

--- Perceptron ---
3-Fold CV | Best Score: 0.88 | Test Accuracy: 1.00




5-Fold CV | Best Score: 0.87 | Test Accuracy: 1.00




7-Fold CV | Best Score: 0.84 | Test Accuracy: 1.00

--- KNN ---
3-Fold CV | Best Score: 0.99 | Test Accuracy: 1.00
5-Fold CV | Best Score: 0.99 | Test Accuracy: 1.00
7-Fold CV | Best Score: 0.99 | Test Accuracy: 1.00
