In [None]:
# 1.	Use any classification dataset.
# 2.	Implement a Bagging classifier with Decision Trees.
# 3.	Compare its performance with Random Forest and AdaBoost.
# 4.	Report accuracy, precision, and recall for each method, and briefly explain which ensemble worked best and why.


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import LabelEncoder

In [None]:
data = pd.read_csv("C:\\Users\\dhruv joshi\\Downloads\\test.csv")
print("Dataset loaded successfully!")
print("Shape:", data.shape)
print("Columns:", list(data.columns))


Dataset loaded successfully!
Shape: (1000, 21)
Columns: ['id', 'battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g', 'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height', 'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g', 'touch_screen', 'wifi']


In [None]:

target = "n_cores"

In [None]:
X = data.drop(columns=[c for c in ["id", target] if c in data.columns])
y = data[target]


In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [None]:
models = {
    "Bagging": BaggingClassifier(DecisionTreeClassifier(), n_estimators=50, random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=50, random_state=42),
    "AdaBoost": AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=50, random_state=42)
}

In [None]:
results = []
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='macro', zero_division=0)
    rec = recall_score(y_test, y_pred, average='macro', zero_division=0)

    print("\n---------------------------")
    print(f"Model: {name}")
    print(f"Accuracy : {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall   : {rec:.4f}")
    print("---------------------------")

    results.append([name, acc, prec, rec])


---------------------------
Model: Bagging
Accuracy : 0.1500
Precision: 0.1497
Recall   : 0.1471
---------------------------

---------------------------
Model: Random Forest
Accuracy : 0.1167
Precision: 0.1220
Recall   : 0.1168
---------------------------

---------------------------
Model: AdaBoost
Accuracy : 0.1500
Precision: 0.2336
Recall   : 0.1386
---------------------------


In [None]:
results_df = pd.DataFrame(results, columns=["Model", "Accuracy", "Precision", "Recall"])
print("\nFinal Comparison:\n")
print(results_df)


Final Comparison:

           Model  Accuracy  Precision    Recall
0        Bagging  0.150000   0.149730  0.147133
1  Random Forest  0.116667   0.121988  0.116835
2       AdaBoost  0.150000   0.233638  0.138574


In [None]:
result_df = pd.DataFrame(results, columns=['Model', 'Accuracy', 'Precision', 'Recall'])
print("\n=== Model Comparison ===")
print(result_df)


=== Model Comparison ===
           Model  Accuracy  Precision    Recall
0        Bagging  0.150000   0.149730  0.147133
1  Random Forest  0.116667   0.121988  0.116835
2       AdaBoost  0.150000   0.233638  0.138574


In [None]:
best_model = results_df.loc[results_df["Accuracy"].idxmax()]
print("\nBest Ensemble Method:")
print(best_model)


Best Ensemble Method:
Model         Bagging
Accuracy         0.15
Precision     0.14973
Recall       0.147133
Name: 0, dtype: object
