In [15]:
# from sklearn.pipeline import Pipeline
# from sklearn.preprocessing import StandardScaler
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.model_selection import train_test_split

# # Sample Data
# X, y = load_iris(return_X_y=True)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Create Pipeline
# pipeline = Pipeline([
#     ('scaler', StandardScaler()),         # Step 1: Standardize Features
#     ('classifier', RandomForestClassifier(n_estimators=100))  # Step 2: Train Model
# ])

# # Train and Evaluate
# pipeline.fit(X_train, y_train)
# accuracy = pipeline.score(X_test, y_test)

# print(f"Model Accuracy: {accuracy:.2f}")


In [2]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris

# Load Dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define Pipeline (Placeholder for Models)
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Step 1: Standardize Features
    ('classifier', RandomForestClassifier())  # Step 2: Model (will be replaced in GridSearchCV)
])

# Define Models & Hyperparameters
param_grid = [
    {
        'classifier': [LogisticRegression()],  # Model 1
        'classifier__C': [0.1, 1, 10]  # Hyperparameters
    },
    {
        'classifier': [SVC()],  # Model 2
        'classifier__C': [0.1, 1, 10],
        'classifier__kernel': ['linear', 'rbf']
    },
    {
        'classifier': [RandomForestClassifier()],  # Model 3
        'classifier__n_estimators': [50, 100, 200],
        'classifier__max_depth': [None, 5, 10]
    }
]

# Use GridSearchCV to Find the Best Model
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print Best Model & Accuracy
print(f"Best Model: {grid_search.best_params_}")
print(f"Best Score: {grid_search.best_score_:.2f}")

# Evaluate on Test Data
best_model = grid_search.best_estimator_
test_accuracy = best_model.score(X_test, y_test)
print(f"Test Accuracy: {test_accuracy:.2f}")


Best Model: {'classifier': LogisticRegression(), 'classifier__C': 1}
Best Score: 0.96
Test Accuracy: 1.00


In [24]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, 
    make_scorer, classification_report
)
from sklearn.datasets import load_iris

# Load Dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define Pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Step 1: Standardize Features
    ('classifier', RandomForestClassifier())  # Step 2: Placeholder for model
])

# Define Models & Hyperparameters
param_grid = [
    {
        'classifier': [LogisticRegression()],  
        'classifier__C': [0.1, 1, 10]  
    },
    {
        'classifier': [SVC()],  
        'classifier__C': [0.1, 1, 10],
        'classifier__kernel': ['linear', 'rbf']
    },
    {
        'classifier': [RandomForestClassifier()],  
        'classifier__n_estimators': [50, 100, 200],
        'classifier__max_depth': [None, 5, 10]
    }
]

# Define Scoring Metrics (Removed 'roc_auc')
scoring = {
    'accuracy': make_scorer(accuracy_score),
    'precision': make_scorer(precision_score, average='macro'),
    'recall': make_scorer(recall_score, average='macro'),
    'f1': make_scorer(f1_score, average='macro'),
}

# Use GridSearchCV to Find the Best Model Based on Multiple Metrics
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring=scoring, refit='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get Best Model & Scores
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
cv_results = grid_search.cv_results_

# Print Best Model & Best Scores
print("\n🔹 Best Model Hyperparameters:", best_params)
print(f"🔹 Best Accuracy: {grid_search.best_score_:.4f}")

# Extract best scores for each metric
metrics = ['accuracy', 'precision', 'recall', 'f1']
best_scores = {metric: np.max(cv_results[f'mean_test_{metric}']) for metric in metrics}

# Display Best Scores for Each Metric
print("\n🔹 Best Scores Across Metrics:")
for metric, score in best_scores.items():
    print(f"{metric.capitalize()}: {score:.4f}")

# Evaluate on Test Data
y_pred = best_model.predict(X_test)

# Compute Test Metrics (Removed 'roc_auc' calculation)
test_accuracy = accuracy_score(y_test, y_pred)
test_precision = precision_score(y_test, y_pred, average='macro')
test_recall = recall_score(y_test, y_pred, average='macro')
test_f1 = f1_score(y_test, y_pred, average='macro')

# Print Test Metrics
print("\n🔹 Test Set Performance:")
print(f"Accuracy: {test_accuracy:.4f}")
print(f"Precision: {test_precision:.4f}")
print(f"Recall: {test_recall:.4f}")
print(f"F1 Score: {test_f1:.4f}")

# Print Classification Report
print("\n🔹 Classification Report on Test Set:\n")
print(classification_report(y_test, y_pred))



🔹 Best Model Hyperparameters: {'classifier': LogisticRegression(), 'classifier__C': 1}
🔹 Best Accuracy: 0.9583

🔹 Best Scores Across Metrics:
Accuracy: 0.9583
Precision: 0.9711
Recall: 0.9571
F1: 0.9579

🔹 Test Set Performance:
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000

🔹 Classification Report on Test Set:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

