<a href="https://colab.research.google.com/github/Aditya1344/ML_Lab/blob/main/Lab8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

# Load dataset
def load_data(file_path, sheet_name=0):
    """Load the dataset from an csv file."""
    return pd.read_csv(file_path)

# Hyperparameter tuning for Perceptron
def tune_perceptron(X_train, y_train):
    """Use RandomizedSearchCV to tune Perceptron hyperparameters."""
    perceptron = Perceptron()
    param_dist = {
        'penalty': ['l2', 'l1', 'elasticnet'],
        'alpha': np.logspace(-4, 1, 10),
        'max_iter': [1000, 2000, 3000]
    }
    random_search = RandomizedSearchCV(perceptron, param_distributions=param_dist, n_iter=10, cv=5, scoring='accuracy')
    random_search.fit(X_train, y_train)
    return random_search

# Hyperparameter tuning for MLP
def tune_mlp(X_train, y_train):
    """Use RandomizedSearchCV to tune MLPClassifier hyperparameters."""
    mlp = MLPClassifier()
    param_dist = {
        'hidden_layer_sizes': [(50,), (100,), (50, 50)],
        'activation': ['tanh', 'relu'],
        'solver': ['adam', 'sgd'],
        'alpha': np.logspace(-4, 1, 10),
        'learning_rate': ['constant', 'adaptive']
    }
    random_search = RandomizedSearchCV(mlp, param_distributions=param_dist, n_iter=10, cv=5, scoring='accuracy')
    random_search.fit(X_train, y_train)
    return random_search

# Main function for A2
def run_a2_hyperparameter_tuning(file_path):
    """Run hyperparameter tuning for Perceptron and MLP models."""
    # Load data
    data = load_data(file_path)
    X = data.iloc[:, :-1].values  # Features
    y = data.iloc[:, -1].values  # Target

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Tune Perceptron
    perceptron_search = tune_perceptron(X_train, y_train)
    print(f"Best Perceptron Params: {perceptron_search.best_params_}")
    print(f"Perceptron Test Score: {perceptron_search.score(X_test, y_test)}")

    # Tune MLP
    mlp_search = tune_mlp(X_train, y_train)
    print(f"Best MLP Params: {mlp_search.best_params_}")
    print(f"MLP Test Score: {mlp_search.score(X_test, y_test)}")

    # Classification report for the best models
    y_pred_perceptron = perceptron_search.best_estimator_.predict(X_test)
    y_pred_mlp = mlp_search.best_estimator_.predict(X_test)
    print("Perceptron Classification Report:")
    print(classification_report(y_test, y_pred_perceptron))
    print("MLP Classification Report:")
    print(classification_report(y_test, y_pred_mlp))

# File path for the dataset
file_path = '/content/Clustering.xlsx'
run_a2_hyperparameter_tuning(file_path)


In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report, roc_auc_score

# Load dataset
def load_data(file_path):
    """Load the dataset from an Excel file."""
    return pd.read_csv(file_path)

# Fit and evaluate a classifier
def evaluate_classifier(clf, X_train, y_train, X_test, y_test):
    """Fit the classifier and evaluate its performance."""
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    # Calculate performance metrics
    metrics = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'F1 Score': f1_score(y_test, y_pred, average='weighted')
    }

    # Try to calculate ROC AUC if classifier supports predict_proba or decision_function
    if hasattr(clf, "predict_proba"):
        try:
            y_prob = clf.predict_proba(X_test)
            metrics['ROC AUC'] = roc_auc_score(y_test, y_prob, multi_class='ovr')
        except ValueError:
            metrics['ROC AUC'] = "N/A - issue with probabilities"
    else:
        metrics['ROC AUC'] = "N/A - no predict_proba"

    return metrics

# Compare different classifiers
def compare_classifiers(X_train, y_train, X_test, y_test):
    """Compare multiple classifiers and tabulate their results."""
    classifiers = {
        'SVM': SVC(probability=True),  # Ensure SVM has probability enabled
        'Decision Tree': DecisionTreeClassifier(),
        'Random Forest': RandomForestClassifier(),
        'AdaBoost': AdaBoostClassifier(),
        'Naive Bayes': GaussianNB(),
        'GradientBoosting': GradientBoostingClassifier()
    }

    # Evaluate each classifier and store results
    results = {}
    for name, clf in classifiers.items():
        results[name] = evaluate_classifier(clf, X_train, y_train, X_test, y_test)

    # Print the results
    for name, metrics in results.items():
        print(f"Classifier: {name}")
        for metric, value in metrics.items():
            print(f"{metric}: {value}")
        print("\n")

# Main function for A3 (Classifier Comparison)
def run_a3_classifier_comparison(file_path):
    """Run multiple classifiers and compare their results."""
    # Load data
    data = load_data(file_path)
    X = data.iloc[:, :-1].values  # Features
    y = data.iloc[:, -1].values  # Target

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Compare classifiers
    compare_classifiers(X_train, y_train, X_test, y_test)

# File path for the dataset
file_path = '/content/Clustering.xlsx'

# Run A3 - Classifier Comparison
print("=== A3: Classifier Comparison ===")
run_a3_classifier_comparison(file_path)

