In [4]:
import pandas as pd
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the dataset
df = pd.read_csv("income.csv")

# Define features and target
X = df.drop('income_level', axis=1)
y = df['income_level']

# Split into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize AdaBoost with a Decision Tree base estimator
base_estimator = DecisionTreeClassifier(max_depth=1)
model = AdaBoostClassifier(n_estimators=50, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Display results
print(f"Accuracy: {accuracy:.3f}")
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 0.833
Confusion Matrix:
[[7003  411]
 [1223 1132]]


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load the dataset
iris_df = pd.read_csv('iris.csv')

# Encode the target variable (species)
le = LabelEncoder()
iris_df['species'] = le.fit_transform(iris_df['species'])

# Features and target
X = iris_df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y = iris_df['species']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Function to train and evaluate AdaBoost
def evaluate_adaboost(estimator, n_estimators, learning_rate, estimator_name):
    model = AdaBoostClassifier(
        estimator=estimator,
        n_estimators=n_estimators,
        learning_rate=learning_rate,
        random_state=42
    )
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{estimator_name} with n_estimators={n_estimators}, learning_rate={learning_rate}: Accuracy = {accuracy:.3f}")
    return accuracy

# Experiment 1: Vary n_estimators and learning_rate with Decision Tree
print("AdaBoost with Decision Tree:")
dt_base = DecisionTreeClassifier(max_depth=1)
n_estimators_list = [10, 50, 100]
learning_rates = [0.1, 0.5, 1.0]

for n in n_estimators_list:
    for lr in learning_rates:
        evaluate_adaboost(dt_base, n, lr, "Decision Tree")

# Experiment 2: Use Logistic Regression as base classifier
print("\nAdaBoost with Logistic Regression:")
logreg_base = LogisticRegression(max_iter=1000)
for n in n_estimators_list:
    for lr in learning_rates:
        evaluate_adaboost(logreg_base, n, lr, "Logistic Regression")

AdaBoost with Decision Tree:
Decision Tree with n_estimators=10, learning_rate=0.1: Accuracy = 0.967
Decision Tree with n_estimators=10, learning_rate=0.5: Accuracy = 1.000
Decision Tree with n_estimators=10, learning_rate=1.0: Accuracy = 1.000
Decision Tree with n_estimators=50, learning_rate=0.1: Accuracy = 1.000
Decision Tree with n_estimators=50, learning_rate=0.5: Accuracy = 0.967
Decision Tree with n_estimators=50, learning_rate=1.0: Accuracy = 0.933
Decision Tree with n_estimators=100, learning_rate=0.1: Accuracy = 1.000
Decision Tree with n_estimators=100, learning_rate=0.5: Accuracy = 1.000
Decision Tree with n_estimators=100, learning_rate=1.0: Accuracy = 0.933

AdaBoost with Logistic Regression:
Logistic Regression with n_estimators=10, learning_rate=0.1: Accuracy = 1.000
Logistic Regression with n_estimators=10, learning_rate=0.5: Accuracy = 0.967
Logistic Regression with n_estimators=10, learning_rate=1.0: Accuracy = 0.933
Logistic Regression with n_estimators=50, learning