# DECISION TREE CLASSIFIER


 **Build the Decision Tree Classifier compare its performance with Ensemble
          Techniques like Random Forest, Bagging, Boosting and Stacking Demonstrate
          it with different Decision Trees.**

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neighbors import KNeighborsClassifier

# load data from CSV
df = pd.read_csv('/content/diabetes_dataset.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [None]:
# Obtain Train data and Train output
X = df.drop(['Outcome'], axis=1)
y = df.Outcome

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [None]:
def evaluate_model(model):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, pos_label=1)
    recall = recall_score(y_test, y_pred, pos_label=1)
    return accuracy, precision, recall

In [None]:
# 1. Decision Tree Classifier
dt = DecisionTreeClassifier(random_state=42)
dt_results = evaluate_model(dt)

In [None]:
# 2. Random Forest Classifier
rf = RandomForestClassifier(random_state=42)
rf_results = evaluate_model(rf)

In [None]:
# 3. Bagging Classifier with Decision Trees
bagging = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=50, random_state=42)
bagging_results = evaluate_model(bagging)

In [None]:
# 4. Boosting (AdaBoost)
boosting = AdaBoostClassifier(estimator=DecisionTreeClassifier(), n_estimators=50, random_state=42)
boosting_results = evaluate_model(boosting)




In [None]:
# 5. Gradient Boosting
gboost = GradientBoostingClassifier(n_estimators=50, random_state=42)
gboost_results = evaluate_model(gboost)

In [None]:
# 6. Stacking Classifier with base models and Logistic Regression as meta-model
stacking = StackingClassifier(
    estimators=[
        ('dt', DecisionTreeClassifier()),
        ('rf', RandomForestClassifier()),
        ('knn', KNeighborsClassifier())
    ],
    final_estimator=LogisticRegression(),
    cv=5
)
stacking_results = evaluate_model(stacking)

In [None]:
# Display results
results = pd.DataFrame({
    'Model': ['Decision Tree', 'Random Forest', 'Bagging', 'Boosting (AdaBoost)', 'Gradient Boosting', 'Stacking'],
    'Accuracy': [dt_results[0], rf_results[0], bagging_results[0], boosting_results[0], gboost_results[0], stacking_results[0]],
    'Precision': [dt_results[1], rf_results[1], bagging_results[1], boosting_results[1], gboost_results[1], stacking_results[1]],
    'Recall': [dt_results[2], rf_results[2], bagging_results[2], boosting_results[2], gboost_results[2], stacking_results[2]]
})

print(results)

                 Model  Accuracy  Precision    Recall
0        Decision Tree  0.746753   0.625000  0.727273
1        Random Forest  0.720779   0.607143  0.618182
2              Bagging  0.746753   0.633333  0.690909
3  Boosting (AdaBoost)  0.746753   0.621212  0.745455
4    Gradient Boosting  0.766234   0.661017  0.709091
5             Stacking  0.746753   0.648148  0.636364
