In [None]:
# import necessary packages
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv('adult.csv')
print(data.head())
print(data.describe())

In [None]:
# check missing values if any
print(data.isnull().sum()) # 0 missing parameters

In [None]:
# data visualization
sns.countplot(x='income', data=data)
plt.show()

In [None]:
# encode categorical params/features
categorical_features = [col for col in data.columns if data[col].dtype == 'object']
le = LabelEncoder()
for col in categorical_features:
    data[col] = le.fit_transform(data[col])

In [None]:
X = data.drop('income', axis=1)
y = data['income']
# choose 80% data for learning and randomizing of splits based on number 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# scale features/params
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# train log_reg model
lr_model = LogisticRegression()
lr_model.fit(X_train_scaled, y_train)

In [None]:
# train SVM (support vector machine) model using SVC (SV classifier)
svm_model = SVC()
svm_model.fit(X_train_scaled, y_train)

In [ ]:
# evaluate models
lr_predictions = lr_model.predict(X_test_scaled)
svm_predictions = svm_model.predict(X_test_scaled)

print("Logistic Regression Accuracy:", accuracy_score(y_test, lr_predictions))
print("SVM Accuracy:", accuracy_score(y_test, svm_predictions))