In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Load dataset
df = pd.read_csv('/content/income.csv')

# Features and target
X = df.drop('income_level', axis=1)
y = df['income_level']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 1: AdaBoost with 10 estimators
clf_10 = AdaBoostClassifier(n_estimators=10, random_state=42)
clf_10.fit(X_train, y_train)
y_pred_10 = clf_10.predict(X_test)
acc_10 = accuracy_score(y_test, y_pred_10)

print("Accuracy with 10 trees:", acc_10)
print("Confusion Matrix with 10 trees:\n", confusion_matrix(y_test, y_pred_10))

# Step 2: Fine-tune number of estimators
best_score = 0
best_n = 0
best_cm = None

for n in range(1, 101):
    clf = AdaBoostClassifier(n_estimators=n, random_state=42)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    if acc > best_score:
        best_score = acc
        best_n = n
        best_cm = confusion_matrix(y_test, y_pred)

print("\nBest Accuracy:", best_score)
print("Best number of trees (n_estimators):", best_n)
print("Best Confusion Matrix:\n", best_cm)


Accuracy with 10 trees: 0.8276803384972361
Confusion Matrix with 10 trees:
 [[10722   387]
 [ 2138  1406]]

Best Accuracy: 0.830956118201051
Best number of trees (n_estimators): 42
Best Confusion Matrix:
 [[10496   613]
 [ 1864  1680]]
