In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

# 1. Load dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Create pipeline
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('classifier', SVC())
])

# 3. Define parameter grid
param_grid = {
    'pca__n_components': [2, 3],
    'classifier__C': [0.1, 1, 10],
    'classifier__kernel': ['linear', 'rbf']
}

# 4. GridSearchCV
grid = GridSearchCV(pipe, param_grid)
grid.fit(X_train, y_train)

# 5. Results
print("Best parameters found:", grid.best_params_)
print("Best cross-validation score: {:.2f}".format(grid.best_score_))
print("Test set score: {:.2f}".format(grid.score(X_test, y_test)))

Best parameters found: {'classifier__C': 0.1, 'classifier__kernel': 'linear', 'pca__n_components': 3}
Best cross-validation score: 0.96
Test set score: 1.00


In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neighbors import KNeighborsClassifier
from scipy.stats import randint, uniform
import numpy as np

# Load dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define classifiers and their param grids
models_params = {
    "RandomForest": (
        RandomForestClassifier(random_state=42),
        {
            'classifier__n_estimators': randint(50, 200),
            'classifier__max_depth': randint(3, 20)
        }
    ),
    "LogisticRegression": (
        LogisticRegression(max_iter=1000),
        {
            'classifier__C': uniform(0.01, 10),
            'classifier__solver': ['liblinear', 'lbfgs']
        }
    ),
    "KNN": (
        KNeighborsClassifier(),
        {
            'classifier__n_neighbors': randint(3, 15)
        }
    ),
    "Perceptron": (
        Perceptron(max_iter=1000, random_state=42),
        {
            'classifier__penalty': [None, 'l1', 'l2'],
            'classifier__alpha': uniform(0.0001, 0.1)
        }
    )
}

# Cross-validation folds
cv_folds = [3, 5, 7]

# Loop through models and cross-validations
for model_name, (clf, param_dist) in models_params.items():
    for cv in cv_folds:
        pipe = Pipeline([
            ('scaler', StandardScaler()),
            ('pca', PCA(n_components=2)),
            ('classifier', clf)
        ])

        random_search = RandomizedSearchCV(
            pipe,
            param_distributions=param_dist,
            n_iter=5,  # you can increase if param space is large
            cv=cv,
            random_state=42,
            n_jobs=-1
        )

        random_search.fit(X_train, y_train)

        print(f"\n🔍 Model: {model_name}, CV: {cv}-fold")
        print("Best Parameters:", random_search.best_params_)
        print(f"Training Score: {random_search.best_score_:.2f}")
        print(f"Test Score: {random_search.score(X_test, y_test):.2f}")



🔍 Model: RandomForest, CV: 3-fold
Best Parameters: {'classifier__max_depth': 9, 'classifier__n_estimators': 142}
Training Score: 0.88
Test Score: 0.90

🔍 Model: RandomForest, CV: 5-fold
Best Parameters: {'classifier__max_depth': 10, 'classifier__n_estimators': 70}
Training Score: 0.91
Test Score: 0.90

🔍 Model: RandomForest, CV: 7-fold
Best Parameters: {'classifier__max_depth': 9, 'classifier__n_estimators': 142}
Training Score: 0.89
Test Score: 0.90

🔍 Model: LogisticRegression, CV: 3-fold
Best Parameters: {'classifier__C': np.float64(1.844347898661638), 'classifier__solver': 'lbfgs'}
Training Score: 0.91
Test Score: 0.90

🔍 Model: LogisticRegression, CV: 5-fold
Best Parameters: {'classifier__C': np.float64(1.844347898661638), 'classifier__solver': 'lbfgs'}
Training Score: 0.92
Test Score: 0.90

🔍 Model: LogisticRegression, CV: 7-fold
Best Parameters: {'classifier__C': np.float64(1.844347898661638), 'classifier__solver': 'lbfgs'}
Training Score: 0.92
Test Score: 0.90

🔍 Model: KNN, C

Check for 3 fold, 5 fold and 7 fold cross validation

Replace classifier, SVC with RandomForestClassifier and LogisticRegression, Perceptron, knn .

Update the param_grid accordingly (e.g., for RandomForestClassifier, use n_estimators, max_depth, etc.)

Also replace Gridsearch with randomnsearch function.

Relplace with with your own csv dataset using code below:

In [7]:
from google.colab import files
uploaded = files.upload()

Saving hypertension_dataset.csv to hypertension_dataset.csv


In [8]:
import pandas as pd
data = pd.read_csv("hypertension_dataset.csv")

In [9]:
data.head()

Unnamed: 0,Age,Salt_Intake,Stress_Score,BP_History,Sleep_Duration,BMI,Medication,Family_History,Exercise_Level,Smoking_Status,Has_Hypertension
0,69,8.0,9,Normal,6.4,25.8,,Yes,Low,Non-Smoker,Yes
1,32,11.7,10,Normal,5.4,23.4,,No,Low,Non-Smoker,No
2,78,9.5,3,Normal,7.1,18.7,,No,Moderate,Non-Smoker,No
3,38,10.0,10,Hypertension,4.2,22.1,ACE Inhibitor,No,Low,Non-Smoker,Yes
4,41,9.8,1,Prehypertension,5.8,16.2,Other,No,Moderate,Non-Smoker,No


In [10]:
print(data[data.columns[-1]].value_counts())  # Look at last column


Has_Hypertension
Yes    1032
No      953
Name: count, dtype: int64


In [11]:
import pandas as pd

data = pd.read_csv("hypertension_dataset.csv")
X = data.drop("Has_Hypertension", axis=1)
y = data["Has_Hypertension"]

In [12]:
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import pandas as pd


In [13]:
# Step 1: Convert 'Yes'/'No' to 1/0 for the target
data['Has_Hypertension'] = data['Has_Hypertension'].map({'Yes': 1, 'No': 0})

# Step 2: One-hot encode categorical features
data_encoded = pd.get_dummies(data.drop('Has_Hypertension', axis=1))

# Step 3: Final feature and target split
X = data_encoded
y = data['Has_Hypertension']

In [14]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define classifiers and their param grids
models = {
    "RandomForest": {
        "classifier": RandomForestClassifier(),
        "param_grid": {
            "classifier__n_estimators": [50, 100, 150],
            "classifier__max_depth": [3, 5, 10, None]
        }
    },
    "LogisticRegression": {
        "classifier": LogisticRegression(max_iter=1000),
        "param_grid": {
            "classifier__C": [0.1, 1, 10],
            "classifier__solver": ['lbfgs', 'liblinear']
        }
    },
    "KNN": {
        "classifier": KNeighborsClassifier(),
        "param_grid": {
            "classifier__n_neighbors": [3, 5, 7, 9]
        }
    },
    "Perceptron": {
        "classifier": Perceptron(),
        "param_grid": {
            "classifier__penalty": [None, 'l2', 'l1', 'elasticnet'],
            "classifier__alpha": [0.0001, 0.001, 0.01]
        }
    }
}

# Try 3, 5, and 7-fold cross-validation
cv_values = [3, 5, 7]

# Loop through models and CV folds
for model_name, model_info in models.items():
    for cv in cv_values:
        print(f"\n🔍 Model: {model_name}, CV: {cv}-fold")

        pipe = Pipeline([
            ('scaler', StandardScaler()),
            ('pca', PCA(n_components=3)),  # optional
            ('classifier', model_info['classifier'])
        ])

        search = RandomizedSearchCV(
            pipe,
            model_info['param_grid'],
            cv=cv,
            n_iter=5,
            random_state=42,
            verbose=0
        )

        search.fit(X_train, y_train)
        print("Best Parameters:", search.best_params_)
        print(f"Training Score: {search.best_score_:.2f}")
        print(f"Test Score: {search.score(X_test, y_test):.2f}")


🔍 Model: RandomForest, CV: 3-fold
Best Parameters: {'classifier__n_estimators': 150, 'classifier__max_depth': 10}
Training Score: 0.73
Test Score: 0.77

🔍 Model: RandomForest, CV: 5-fold
Best Parameters: {'classifier__n_estimators': 150, 'classifier__max_depth': 10}
Training Score: 0.74
Test Score: 0.78

🔍 Model: RandomForest, CV: 7-fold
Best Parameters: {'classifier__n_estimators': 150, 'classifier__max_depth': 10}
Training Score: 0.73
Test Score: 0.76

🔍 Model: LogisticRegression, CV: 3-fold
Best Parameters: {'classifier__solver': 'liblinear', 'classifier__C': 0.1}
Training Score: 0.64
Test Score: 0.68

🔍 Model: LogisticRegression, CV: 5-fold
Best Parameters: {'classifier__solver': 'liblinear', 'classifier__C': 10}
Training Score: 0.64
Test Score: 0.68

🔍 Model: LogisticRegression, CV: 7-fold
Best Parameters: {'classifier__solver': 'lbfgs', 'classifier__C': 0.1}
Training Score: 0.65
Test Score: 0.68

🔍 Model: KNN, CV: 3-fold




Best Parameters: {'classifier__n_neighbors': 9}
Training Score: 0.74
Test Score: 0.77

🔍 Model: KNN, CV: 5-fold




Best Parameters: {'classifier__n_neighbors': 5}
Training Score: 0.74
Test Score: 0.76

🔍 Model: KNN, CV: 7-fold




Best Parameters: {'classifier__n_neighbors': 7}
Training Score: 0.74
Test Score: 0.77

🔍 Model: Perceptron, CV: 3-fold
Best Parameters: {'classifier__penalty': 'l2', 'classifier__alpha': 0.001}
Training Score: 0.58
Test Score: 0.58

🔍 Model: Perceptron, CV: 5-fold
Best Parameters: {'classifier__penalty': None, 'classifier__alpha': 0.0001}
Training Score: 0.57
Test Score: 0.57

🔍 Model: Perceptron, CV: 7-fold
Best Parameters: {'classifier__penalty': 'l1', 'classifier__alpha': 0.01}
Training Score: 0.55
Test Score: 0.60
