In [1]:
import pandas as pd  
from sklearn.model_selection import train_test_split  
from sklearn.ensemble import AdaBoostClassifier  
from sklearn.tree import DecisionTreeClassifier  
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score  
from sklearn.impute import SimpleImputer  
from sklearn.preprocessing import OneHotEncoder  
from sklearn.compose import ColumnTransformer  
from sklearn.pipeline import Pipeline  

# Load dataset  
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'  
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education_num', 'marital_status',  
               'occupation', 'relationship', 'race', 'sex', 'capital_gain', 'capital_loss',  
               'hours_per_week', 'native_country', 'income']  
data = pd.read_csv(url, header=None, names=column_names, na_values='?')  

# Preprocess data  
# Drop NA values for simplicity (can impute in real cases)  
data = data.dropna()  

# Split features and target  
X = data.drop('income', axis=1)  
y = (data['income'] == ' >50K').astype(int)  # Binary classification  

# Define categorical and numerical columns  
cat_cols = ['workclass', 'education', 'marital_status', 'occupation',  
            'relationship', 'race', 'sex', 'native_country']  
num_cols = ['age', 'fnlwgt', 'education_num', 'capital_gain', 'capital_loss', 'hours_per_week']  

# Create preprocessing pipeline  
preprocessor = ColumnTransformer(  
    transformers=[  
        ('num', 'passthrough', num_cols),  
        ('cat', OneHotEncoder(), cat_cols)  
    ])  

# Create full pipeline  
pipeline = Pipeline(  
    steps=[  
        ('preprocessor', preprocessor),  
        ('classifier', AdaBoostClassifier(  
            base_estimator=DecisionTreeClassifier(max_depth=1),  
            n_estimators=50,  
            learning_rate=0.1  
        ))  
    ])  

# Split data  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  

# Train model  
pipeline.fit(X_train, y_train)  

# Evaluate  
y_pred = pipeline.predict(X_test)  
y_proba = pipeline.predict_proba(X_test)[:, 1]  

print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")  
print(f"AUC-ROC: {roc_auc_score(y_test, y_proba):.4f}")  
print(f"F1-Score: {f1_score(y_test, y_pred):.4f}")

TypeError: AdaBoostClassifier.__init__() got an unexpected keyword argument 'base_estimator'