In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, confusion_matrix


df = pd.read_csv('income.csv')


X_raw = df.iloc[:, :-1]
y_raw = df.iloc[:, -1]


X_encoded = pd.get_dummies(X_raw)

if y_raw.dtype == 'object':
    y_encoded = LabelEncoder().fit_transform(y_raw)
else:
    y_encoded = y_raw


imputer = SimpleImputer(strategy='most_frequent')
X_imputed = pd.DataFrame(imputer.fit_transform(X_encoded), columns=X_encoded.columns)

X_train, X_test, y_train, y_test = train_test_split(X_imputed, y_encoded, test_size=0.2, random_state=42)


model = AdaBoostClassifier(random_state=42)
model.fit(X_train, y_train)


y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)


print("Accuracy Score:", accuracy)
print("Confusion Matrix:\n", conf_matrix)


Accuracy Score: 0.8327362063670796
Confusion Matrix:
 [[7003  411]
 [1223 1132]]


In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


iris = load_iris()
X = iris.data
y = iris.target


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("----- AdaBoost with Decision Tree -----")
for n in [10, 50, 100]:
    for lr in [0.1, 0.5, 1.0]:
        model = AdaBoostClassifier(
            estimator=DecisionTreeClassifier(max_depth=1),
            n_estimators=n,
            learning_rate=lr,
            random_state=42
        )
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        score = accuracy_score(y_test, y_pred)
        print(f"n_estimators={n}, learning_rate={lr} => Accuracy: {score:.4f}")

print("\n----- AdaBoost with Logistic Regression -----")
for n in [10, 50, 100]:
    for lr in [0.1, 0.5, 1.0]:
        model = AdaBoostClassifier(
            estimator=LogisticRegression(max_iter=1000),
            n_estimators=n,
            learning_rate=lr,
            random_state=42
        )
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        score = accuracy_score(y_test, y_pred)
        print(f"n_estimators={n}, learning_rate={lr} => Accuracy: {score:.4f}")


----- AdaBoost with Decision Tree -----
n_estimators=10, learning_rate=0.1 => Accuracy: 0.9667
n_estimators=10, learning_rate=0.5 => Accuracy: 1.0000
n_estimators=10, learning_rate=1.0 => Accuracy: 1.0000
n_estimators=50, learning_rate=0.1 => Accuracy: 1.0000
n_estimators=50, learning_rate=0.5 => Accuracy: 0.9667
n_estimators=50, learning_rate=1.0 => Accuracy: 0.9333
n_estimators=100, learning_rate=0.1 => Accuracy: 1.0000
n_estimators=100, learning_rate=0.5 => Accuracy: 1.0000
n_estimators=100, learning_rate=1.0 => Accuracy: 0.9333

----- AdaBoost with Logistic Regression -----
n_estimators=10, learning_rate=0.1 => Accuracy: 1.0000
n_estimators=10, learning_rate=0.5 => Accuracy: 0.9667
n_estimators=10, learning_rate=1.0 => Accuracy: 0.9333
n_estimators=50, learning_rate=0.1 => Accuracy: 1.0000
n_estimators=50, learning_rate=0.5 => Accuracy: 1.0000
n_estimators=50, learning_rate=1.0 => Accuracy: 0.9333
n_estimators=100, learning_rate=0.1 => Accuracy: 1.0000
n_estimators=100, learning_ra