<a href="https://colab.research.google.com/github/ManishInde/sy/blob/main/BMLexp10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import fetch_openml

# Fetch the dataset directly from UCI Machine Learning Repository (using OpenML)
diabetes_data = fetch_openml(name="diabetes", version=1)

# Convert to DataFrame for easier handling
df = diabetes_data.frame

# Check first few rows of the dataset
print(df.head())

# Separate the features (X) and the target (y)
X = df.drop(columns=['class'])
y = df['class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize a DecisionTreeClassifier as the base model
dtree = DecisionTreeClassifier(random_state=42)

# Initialize the BaggingClassifier with the DecisionTreeClassifier as the base estimator
# Use 'estimator' instead of 'base_estimator' for newer versions of scikit-learn
bagging = BaggingClassifier(estimator=dtree, n_estimators=100, random_state=42)

# Train the Bagging model
bagging.fit(X_train, y_train)

# Predict on the test set
y_pred = bagging.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

   preg  plas  pres  skin  insu  mass   pedi  age            class
0     6   148    72    35     0  33.6  0.627   50  tested_positive
1     1    85    66    29     0  26.6  0.351   31  tested_negative
2     8   183    64     0     0  23.3  0.672   32  tested_positive
3     1    89    66    23    94  28.1  0.167   21  tested_negative
4     0   137    40    35   168  43.1  2.288   33  tested_positive
Accuracy: 0.7316
Classification Report:
                 precision    recall  f1-score   support

tested_negative       0.81      0.77      0.79       151
tested_positive       0.60      0.66      0.63        80

       accuracy                           0.73       231
      macro avg       0.71      0.72      0.71       231
   weighted avg       0.74      0.73      0.73       231



In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler  # Import StandardScaler

# Fetch the dataset directly from UCI Machine Learning Repository (using OpenML)
diabetes_data = fetch_openml(name="diabetes", version=1)

# Convert to DataFrame for easier handling
df = diabetes_data.frame

# Separate the features (X) and the target (y)
X = df.drop(columns=['class'])
y = df['class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale the features to ensure better convergence for Logistic Regression
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the base classifiers
classifiers = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=500, random_state=42),  # Increased max_iter
    "Naive Bayes": GaussianNB()
}

# Loop through each classifier, apply Bagging, and evaluate performance
for model_name, model in classifiers.items():
    print(f"\nEvaluating model: {model_name}")

    # Apply Bagging with each base classifier
    bagging = BaggingClassifier(estimator=model, n_estimators=100, random_state=42)  # Changed 'base_estimator' to 'estimator'

    # Train the model
    bagging.fit(X_train_scaled, y_train)  # Use scaled data for training

    # Make predictions
    y_pred = bagging.predict(X_test_scaled)  # Use scaled data for prediction

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.4f}")

    # Print the classification report (only precision, recall, f1-score)
    report = classification_report(y_test, y_pred, output_dict=True)

    # Dynamically extract the class labels from the classification report
    for class_label in report.keys():
        if class_label not in ['accuracy', 'macro avg', 'weighted avg']:
            precision = report[class_label]['precision']
            recall = report[class_label]['recall']
            f1 = report[class_label]['f1-score']
            print(f"Class {class_label} - Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")



Evaluating model: Decision Tree
Accuracy: 0.7359
Class tested_negative - Precision: 0.8125, Recall: 0.7748, F1-Score: 0.7932
Class tested_positive - Precision: 0.6092, Recall: 0.6625, F1-Score: 0.6347

Evaluating model: K-Nearest Neighbors
Accuracy: 0.6840
Class tested_negative - Precision: 0.7468, Recall: 0.7815, F1-Score: 0.7638
Class tested_positive - Precision: 0.5479, Recall: 0.5000, F1-Score: 0.5229

Evaluating model: Support Vector Machine
Accuracy: 0.7446
Class tested_negative - Precision: 0.7911, Recall: 0.8278, F1-Score: 0.8091
Class tested_positive - Precision: 0.6438, Recall: 0.5875, F1-Score: 0.6144

Evaluating model: Logistic Regression
Accuracy: 0.7229
Class tested_negative - Precision: 0.7881, Recall: 0.7881, F1-Score: 0.7881
Class tested_positive - Precision: 0.6000, Recall: 0.6000, F1-Score: 0.6000

Evaluating model: Naive Bayes
Accuracy: 0.7446
Class tested_negative - Precision: 0.8151, Recall: 0.7881, F1-Score: 0.8013
Class tested_positive - Precision: 0.6235, Reca