In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [2]:
data = pd.read_csv("census_income_dataset_preprocessed.csv")

In [3]:
X = data.drop("target", axis=1)
Y = data["target"]

In [4]:
X_new, X_test, Y_new, Y_test = train_test_split(X, Y, test_size=0.1, random_state=101)
test_size = X_test.shape[0] / X_new.shape[0]
X_train, X_dev, Y_train, Y_dev = train_test_split(X_new, Y_new, test_size=test_size, random_state=101)
print(X_train.shape, X_dev.shape, X_test.shape, Y_train.shape, Y_dev.shape, Y_test.shape)

(26047, 9) (3257, 9) (3257, 9) (26047,) (3257,) (3257,)


# First round of experiments

## Experiment 1

In [5]:
model = MLPClassifier(random_state=101, max_iter=500)
model.fit(X_train, Y_train)



MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=500,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=101, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [6]:
sets = ["Training", "Validation", "Testing"]
X_sets = [X_train, X_dev, X_test]
Y_sets = [Y_train, Y_dev, Y_test]

accuracy = {}
for i in range(0,len(X_sets)):  
    pred = model.predict(X_sets[i])
    score = accuracy_score(Y_sets[i], pred)
    accuracy[sets[i]] = score

print(accuracy)

{'Training': 0.8243943640342458, 'Validation': 0.803193122505373, 'Testing': 0.8222290451335585}


## Experiment 2

In [7]:
model = MLPClassifier(random_state=101, max_iter=500, hidden_layer_sizes=(100,100))
model.fit(X_train, Y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100, 100), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=500,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=101, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [8]:
sets = ["Training", "Validation", "Testing"]
X_sets = [X_train, X_dev, X_test]
Y_sets = [Y_train, Y_dev, Y_test]

accuracy = {}
for i in range(0,len(X_sets)):  
    pred = model.predict(X_sets[i])
    score = accuracy_score(Y_sets[i], pred)
    accuracy[sets[i]] = score

print(accuracy)

{'Training': 0.8673935577993627, 'Validation': 0.8311329444273872, 'Testing': 0.8520110531163647}


## Experiment 3

In [9]:
model = MLPClassifier(random_state=101, max_iter=500, hidden_layer_sizes=(100,100,100))
model.fit(X_train, Y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100, 100, 100), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=500,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=101, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [10]:
sets = ["Training", "Validation", "Testing"]
X_sets = [X_train, X_dev, X_test]
Y_sets = [Y_train, Y_dev, Y_test]

accuracy = {}
for i in range(0,len(X_sets)):  
    pred = model.predict(X_sets[i])
    score = accuracy_score(Y_sets[i], pred)
    accuracy[sets[i]] = score

print(accuracy)

{'Training': 0.8494644296848005, 'Validation': 0.8299048203868591, 'Testing': 0.842800122812404}


# Second round of experiments

## Experiment 1

In [11]:
model = MLPClassifier(random_state=101, max_iter=500, hidden_layer_sizes=(50,50))
model.fit(X_train, Y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(50, 50), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=500,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=101, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [12]:
sets = ["Training", "Validation", "Testing"]
X_sets = [X_train, X_dev, X_test]
Y_sets = [Y_train, Y_dev, Y_test]

accuracy = {}
for i in range(0,len(X_sets)):  
    pred = model.predict(X_sets[i])
    score = accuracy_score(Y_sets[i], pred)
    accuracy[sets[i]] = score

print(accuracy)

{'Training': 0.8523822321188621, 'Validation': 0.828983727356463, 'Testing': 0.8443352778630642}


## Experiment 2

In [13]:
model = MLPClassifier(random_state=101, max_iter=500, hidden_layer_sizes=(150,150))
model.fit(X_train, Y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(150, 150), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=500,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=101, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [14]:
sets = ["Training", "Validation", "Testing"]
X_sets = [X_train, X_dev, X_test]
Y_sets = [Y_train, Y_dev, Y_test]

accuracy = {}
for i in range(0,len(X_sets)):  
    pred = model.predict(X_sets[i])
    score = accuracy_score(Y_sets[i], pred)
    accuracy[sets[i]] = score

print(accuracy)

{'Training': 0.859024071870081, 'Validation': 0.8219220141234265, 'Testing': 0.8455634019035922}
