In [1]:
import pandas as pd
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:

# Download the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
data = pd.read_csv(url, header=None)

In [3]:
# Assign column names to the dataset
data.columns = [
    "age", "workclass", "fnlwgt", "education", "education-num", "marital-status",
    "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss",
    "hours-per-week", "native-country", "income"
]

In [4]:
# Preprocess the dataset
le = LabelEncoder()
data_encoded = data.apply(le.fit_transform)

In [5]:

# Split the dataset into features and target
X = data_encoded.drop("income", axis=1)
y = data_encoded["income"]

In [6]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Create the individual models
tree_model = DecisionTreeClassifier(random_state=42)
forest_model = RandomForestClassifier(random_state=42)
logreg_model = LogisticRegression(solver='lbfgs', max_iter=1000, random_state=42)

# Create the ensemble model
ensemble_model = VotingClassifier(
    estimators=[("tree", tree_model), ("forest", forest_model), ("logreg", logreg_model)],
    voting="hard"
)

# Train the ensemble model
ensemble_model.fit(X_train, y_train)

# Make predictions
y_pred = ensemble_model.predict(X_test)

# Evaluate the ensemble model
accuracy = accuracy_score(y_test, y_pred)
confusion_mat = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", confusion_mat)
print("Classification Report:\n", classification_rep)

Accuracy: 0.8499923230462153
Confusion Matrix:
 [[4627  315]
 [ 662  909]]
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.94      0.90      4942
           1       0.74      0.58      0.65      1571

    accuracy                           0.85      6513
   macro avg       0.81      0.76      0.78      6513
weighted avg       0.84      0.85      0.84      6513

