<a href="https://colab.research.google.com/github/Dhayanadh054/ADML-PROJECT/blob/main/Untitled14.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# =========================================================
# Model Evaluation with Cross-Validation and Random Forest
# Google Colab Compatible â€“ Single Script
# =========================================================

import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import (
    KFold,
    StratifiedKFold,
    cross_val_score,
    train_test_split,
    GridSearchCV
)

from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import classification_report

# ---------------------------------------------------------
# Load Dataset
# ---------------------------------------------------------
data = load_breast_cancer()
X = data.data
y = data.target

print("Dataset Loaded Successfully")
print("Dataset Shape:", X.shape)
print("-" * 50)

# ---------------------------------------------------------
# K-Fold Cross-Validation
# ---------------------------------------------------------
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

dt_model = DecisionTreeClassifier(random_state=42)
dt_scores = cross_val_score(dt_model, X, y, cv=kfold, scoring='accuracy')

print("Decision Tree - K-Fold Cross-Validation Accuracy:")
print("Mean Accuracy:", dt_scores.mean())
print("-" * 50)

# ---------------------------------------------------------
# Stratified K-Fold Cross-Validation
# ---------------------------------------------------------
skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

rf_model = RandomForestClassifier(random_state=42)
rf_scores = cross_val_score(rf_model, X, y, cv=skfold, scoring='accuracy')

print("Random Forest - Stratified K-Fold Cross-Validation Accuracy:")print("Mean Accuracy:", rf_scores.mean())
print("-" * 50)

# ---------------------------------------------------------
# Random Forest Hyperparameter Tuning (GridSearchCV)
# ---------------------------------------------------------
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5]
}

rf = RandomForestClassifier(random_state=42)

grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

grid_search.fit(X, y)

print("Best Random Forest Parameters:")
print(grid_search.best_params_)
print("Best Cross-Validation Accuracy:")
print(grid_search.best_score_)
print("-" * 50)

# ---------------------------------------------------------
# Model Comparison using Stratified K-Fold
# ---------------------------------------------------------
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(
        **grid_search.best_params_,
        random_state=42
    )
}

print("Model Comparison Results (Stratified K-Fold):")

for name, model in models.items():
    scores = cross_val_score(model, X, y, cv=skfold, scoring='accuracy')
    print(f"{name} Accuracy: {scores.mean():.4f}")

print("-" * 50)

# ---------------------------------------------------------
# Final Evaluation Metrics
# ---------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

best_rf = grid_search.best_estimator_
best_rf.fit(X_train, y_train)

y_pred = best_rf.predict(X_test)

print("Classification Report for Optimized Random Forest:")
print(classification_report(y_test, y_pred))

print("Project Execution Completed Successfully")


Dataset Loaded Successfully
Dataset Shape: (569, 30)
--------------------------------------------------
Decision Tree - K-Fold Cross-Validation Accuracy:
Mean Accuracy: 0.9332246545567457
--------------------------------------------------
Random Forest - Stratified K-Fold Cross-Validation Accuracy:
Mean Accuracy: 0.9560937742586555
--------------------------------------------------
Best Random Forest Parameters:
{'max_depth': None, 'min_samples_split': 5, 'n_estimators': 200}
Best Cross-Validation Accuracy:
0.9613569321533924
--------------------------------------------------
Model Comparison Results (Stratified K-Fold):
Decision Tree Accuracy: 0.9104
SVM Accuracy: 0.9139
Random Forest Accuracy: 0.9526
--------------------------------------------------
Classification Report for Optimized Random Forest:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        42
           1       0.96      0.96      0.96        72

    accuracy          