<a href="https://colab.research.google.com/github/JatinSharma222/AI-experiments-Univaersity/blob/main/Experiment9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Experiment 9
Construct decision trees and ensemble methods like Random Forest and Gradient Boosting

In [1]:
# Import necessary libraries
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd


In [2]:
data = pd.read_csv('/content/breast-cancer.csv')

df = pd.DataFrame(data)

In [3]:
# Preprocessing steps
# Map diagnosis to binary labels (M: 1, B: 0)
df['diagnosis'] = df['diagnosis'].map({'M': 1, 'B': 0})

# Drop 'id' column as it's not relevant for modeling
df = df.drop(columns=['id'])

# Features and target variable
X = df.drop(columns=['diagnosis'])
y = df['diagnosis']


In [4]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [5]:
# Decision Tree Classifier
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)


In [6]:
# Random Forest Classifier
rf_model = RandomForestClassifier(random_state=42, n_estimators=100)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)


In [7]:
# Gradient Boosting Classifier
gb_model = GradientBoostingClassifier(random_state=42, n_estimators=100)
gb_model.fit(X_train, y_train)
y_pred_gb = gb_model.predict(X_test)


In [8]:
# Function to evaluate models
def evaluate_model(y_true, y_pred, model_name):
    print(f"Evaluation Metrics for {model_name}")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("Classification Report:\n", classification_report(y_true, y_pred))
    print("\n")


In [9]:
# Evaluate Decision Tree
evaluate_model(y_test, y_pred_dt, "Decision Tree")

# Evaluate Random Forest
evaluate_model(y_test, y_pred_rf, "Random Forest")

# Evaluate Gradient Boosting
evaluate_model(y_test, y_pred_gb, "Gradient Boosting")


Evaluation Metrics for Decision Tree
Accuracy: 0.9415204678362573
Confusion Matrix:
 [[101   7]
 [  3  60]]
Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.94      0.95       108
           1       0.90      0.95      0.92        63

    accuracy                           0.94       171
   macro avg       0.93      0.94      0.94       171
weighted avg       0.94      0.94      0.94       171



Evaluation Metrics for Random Forest
Accuracy: 0.9707602339181286
Confusion Matrix:
 [[107   1]
 [  4  59]]
Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.99      0.98       108
           1       0.98      0.94      0.96        63

    accuracy                           0.97       171
   macro avg       0.97      0.96      0.97       171
weighted avg       0.97      0.97      0.97       171



Evaluation Metrics for Gradient Boosting
Accuracy: 0.9590643274853801
Confusio