All the imports and libraries used below

In [12]:
# UCI ML repo imports
from ucimlrepo import fetch_ucirepo

# sk-learn imports
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.metrics import accuracy_score

# Pandas and Numpy
import pandas as pd
import numpy as np

Preprocessing Data Section

In [None]:
# Preprocessing Data

adult = fetch_ucirepo(id=2)
scale = MinMaxScaler()

X = adult.data['features'] #imports the features as pd dataframes
y = adult.data['targets'] # imports the targets as pd dataframes

print(X.dtypes) # Assess the data types

X_scaled = scale.fit_transform(X) # scales the data

age_class_education = X_scaled[:, [0, 1, 3]] # selects the features of age class and education
relationship_race_sex = X_scaled[:, [5,8,9]] # selects the features of marital status, race and sex
occupation_hours_country = X_scaled[:, [6,12,13]] # selects the features of occupation, hours per week and native country

Using Logistic Regression to Train the Initial Models

In [None]:
# Initial Model Training

X_train_ace, X_test_ace, y_train_ace, y_test_ace = train_test_split(age_class_education, y, test_size=0.2, random_state=42)

logistic_model_ace = LogisticRegression().fit(X_train_ace, y_train_ace) # calls logistic regression model on X_train_ace
y_pred_ace = logistic_model_ace.predict(X_test_ace)

X_train_rrs, X_test_rrs, y_train_rrs, y_test_rrs = train_test_split(relationship_race_sex, y, test_size=0.2, random_state=42)

logistic_model_rrs = LogisticRegression().fit(X_train_rrs, y_train_rrs) # calls logistic regression model on X_train_rrs
y_pred_rrs = logistic_model_rrs.predict(X_test_rrs)

X_train_ohc, X_test_ohc, y_train_ohc, y_test_ohc = train_test_split(occupation_hours_country, y, test_size=0.2, random_state=42)

logistic_model_ohc = LogisticRegression().fit(X_train_ohc, y_train_ohc) # calls logistic regression model on X_train_ohc
y_pred_ohc = logistic_model_ohc.predict(X_test_ohc)

acc_ace = accuracy_score(y_test_ace, y_pred_ace) # calculates the accuracy of the model
acc_rrs = accuracy_score(y_test_rrs, y_pred_rrs)
acc_ohc = accuracy_score(y_test_ohc, y_pred_ohc)

print("Accuracy for age, class, and education:", acc_ace)
print("Accuracy for relationship, race, and sex:", acc_rrs)
print("Accuracy for occupation, hours per week, and native country:", acc_ohc)

# print(X.features.head()) # Features: age, workclass, fnlwgt, education, education-num, marital-status, occupation, relationship, race, sex, capital-gain, capital-loss, hours-per-week, native-country
#print(y.features.head()) # does the person make more than $50k per year

Using Gradient Descent to Minimize the Error of Our Models

In [None]:
# Training Using GD



Model Testing

In [None]:
# Results and Analysis

