# Jupyter notebook to demonstrate how the machine learning model will operate

Step 1: import necessary libraries to execute the Multi-layer perceptron model

In [1]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

Step 2: Read data from csv file. For this execution, we used census data provided by UCLA.
        Found here: https://archive.ics.uci.edu/ml/datasets/Adult

In [2]:
census_data = pd.read_csv("..\\Data\\adult.csv")
df = pd.DataFrame(census_data)

Step 3: Read the settings from the file 'data_settings.txt'. The file contains the categories we modify as well as a list of all the categories used in the model.

In [3]:
col_labels = []
model_parameters = []
with open('..\\data_settings.txt') as f:
    lines = f.readlines()
    categories = lines[0].split(':')[1].strip().split(',')
    for category in categories:
        col_labels.append(category)

Step 4: Use LabelEncoder to make necessary adjustments to data. This will permit the machine learning model to read and take the input as it requires float values as input.

In [4]:
#col_labels = ['workclass','education','marital-status','occupation','relationship','race','sex','native-country','earnings']
enc = LabelEncoder()
for col in col_labels:
    census_data[col] = census_data[col].astype('str')
    census_data[col] = enc.fit_transform(census_data[col])

Step 5: Split the data into four sets, two sets as training inputs and two sets as testing inputs.

In [5]:
features = df.iloc[:,:-1]
classification = df.iloc[: , -1]
X_train, X_test, y_train, y_test = train_test_split(features, classification)

Step 6: Create and execute the machine learning model. Our project will attempt to provide a method of adjusting machine learning parameters.

In [6]:
#Parameters of the machine learning model go within MLPClassifier()
#Please see https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html for details on the multi-layer perceptron model.
mlp_clf = MLPClassifier()
mlp_clf.fit(X_train, y_train)
mlp_score = mlp_clf.score(X_test, y_test)*100
print(mlp_score)

f.close()

73.22196290382017
