In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv("/content/income.csv")

# Encode the target column (income_level)
le = LabelEncoder()
df['income_level'] = le.fit_transform(df['income_level'])  # e.g., <=50K → 0, >50K → 1

# Split features and target
X = df.drop("income_level", axis=1)
y = df["income_level"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 1️⃣ Train AdaBoostClassifier with n_estimators = 10
model = AdaBoostClassifier(n_estimators=10, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy with 10 estimators: {accuracy:.4f}")

# 2️⃣ Fine-tune the model by changing n_estimators
scores = []
n_values = range(10, 101, 10)

for n in n_values:
    model = AdaBoostClassifier(n_estimators=n, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    scores.append(acc)
    print(f"n_estimators={n} => Accuracy: {acc:.4f}")

# 3️⃣ Find the best result
best_n = n_values[scores.index(max(scores))]
best_score = max(scores)
print(f"\n✅ Best accuracy: {best_score:.4f} with {best_n} estimators")

# Optional: Plot the result
plt.plot(n_values, scores, marker='o')
plt.title("Accuracy vs Number of Estimators")
plt.xlabel("n_estimators (number of trees)")
plt.ylabel("Accuracy")
plt.grid(True)
plt.show()


Accuracy with 10 estimators: 0.8182
n_estimators=10 => Accuracy: 0.8182
n_estimators=20 => Accuracy: 0.8244
n_estimators=30 => Accuracy: 0.8310
n_estimators=40 => Accuracy: 0.8314
n_estimators=50 => Accuracy: 0.8327
n_estimators=60 => Accuracy: 0.8328
n_estimators=70 => Accuracy: 0.8334
n_estimators=80 => Accuracy: 0.8335
