In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split , GridSearchCV
from sklearn.metrics import accuracy_score, mean_squared_error , classification_report

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeClassifier , DecisionTreeRegressor
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

In [3]:
from sklearn.datasets import load_iris, make_regression

In [12]:
# Load Iris dataset
data = load_iris()
X_classification = data.data
y_classification = data.target

# Split the dataset
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(
    X_classification, y_classification, test_size=0.3, random_state=42)


In [10]:
# REGRESSION DATA
# Generate synthetic regression dataset
X_regression, y_regression = make_regression(n_samples=150, n_features=2, noise=0.1, random_state=42)

# Split the dataset
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
    X_regression, y_regression, test_size=0.3, random_state=42)


In [13]:
# 
# Define parameter grids for classification models
classification_param_grid = {
    'Logistic Regression': {
        'model__C': [0.1, 1, 10],
        'model__max_iter': [100, 200]
    },
    'Decision Tree': {
        'model__max_depth': [None, 10, 20, 30],
        'model__min_samples_split': [2, 5, 10]
    },
    'Random Forest': {
        'model__n_estimators': [50, 100, 200],
        'model__max_depth': [None, 10, 20]
    },
    'SVM': {
        'model__C': [0.1, 1, 10],
        'model__kernel': ['linear', 'rbf']
    },
    'K-Nearest Neighbors': {
        'model__n_neighbors': [3, 5, 7],
        'model__weights': ['uniform', 'distance']
    },
    'Gradient Boosting': {
        'model__n_estimators': [50, 100, 200],
        'model__learning_rate': [0.01, 0.1, 0.2]
    }
}

# Define models for classification
classification_models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}

# Run GridSearchCV for each classification model
for name, model in classification_models.items():
    pipeline = Pipeline([
        ('scaler', StandardScaler()),  # Add scaler to pipeline
        ('model', model)
    ])
    
    grid = GridSearchCV(pipeline, classification_param_grid[name], cv=5, scoring='accuracy')
    grid.fit(X_train_clf, y_train_clf)
    
    print(f"{name} Best Parameters: {grid.best_params_}")
    predictions = grid.predict(X_test_clf)
    accuracy = accuracy_score(y_test_clf, predictions)
    print(f"{name} Accuracy: {accuracy:.2f}")
    print(classification_report(y_test_clf, predictions, target_names=data.target_names))




Logistic Regression Best Parameters: {'model__C': 1, 'model__max_iter': 100}
Logistic Regression Accuracy: 1.00
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      1.00      1.00        13
   virginica       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

Decision Tree Best Parameters: {'model__max_depth': None, 'model__min_samples_split': 10}
Decision Tree Accuracy: 1.00
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      1.00      1.00        13
   virginica       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

Random 

In [8]:
# Define parameter grids for regression models
regression_param_grid = {
    'Linear Regression': {},
    'Decision Tree Regressor': {
        'model__max_depth': [None, 10, 20, 30],
        'model__min_samples_split': [2, 5, 10]
    },
    'Random Forest Regressor': {
        'model__n_estimators': [50, 100, 200],
        'model__max_depth': [None, 10, 20]
    },
    'Gradient Boosting Regressor': {
        'model__n_estimators': [50, 100, 200],
        'model__learning_rate': [0.01, 0.1, 0.2]
    }
}

# Define models for regression
regression_models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree Regressor": DecisionTreeRegressor(),
    "Random Forest Regressor": RandomForestRegressor(),
    "Gradient Boosting Regressor": GradientBoostingRegressor()
}

# Run GridSearchCV for each regression model
for name, model in regression_models.items():
    pipeline = Pipeline([
        ('scaler', StandardScaler()),  # Add scaler to pipeline
        ('model', model)
    ])
    
    grid = GridSearchCV(pipeline, regression_param_grid[name], cv=5, scoring='neg_mean_squared_error')
    grid.fit(X_train_reg, y_train_reg)
    
    print(f"{name} Best Parameters: {grid.best_params_}")
    predictions = grid.predict(X_test_reg)
    mse = mean_squared_error(y_test_reg, predictions)
    print(f"{name} MSE: {mse:.2f}")


Linear Regression Best Parameters: {}
Linear Regression MSE: 0.01
Decision Tree Regressor Best Parameters: {'model__max_depth': None, 'model__min_samples_split': 5}
Decision Tree Regressor MSE: 184.01
Random Forest Regressor Best Parameters: {'model__max_depth': None, 'model__n_estimators': 200}
Random Forest Regressor MSE: 123.45
Gradient Boosting Regressor Best Parameters: {'model__learning_rate': 0.1, 'model__n_estimators': 200}
Gradient Boosting Regressor MSE: 110.96
