In [128]:
import numpy as np
import pandas as pd
import sklearn
import keras
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from keras.initializers import RandomNormal
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
warnings.filterwarnings('ignore', category=DeprecationWarning)

In [129]:
def feature_engr(train_set, test_set):
    '''
    Taking the raw training and test sets, and cleaning them
    up so that they can be used in training a model
    
    Parameters
    ----------
    train_set: pandas dataframe
        training set read in from pandas
    test_set: pandas dataframe
        test set read in from pandas
    
    Returns
    -------
    train_scaled, test_scaled, y_train, y_test
        feature engineered and cleaned up sets   
    '''
    train_set =train_set.dropna()
    test_set = test_set.dropna()
    
    combined = pd.concat([train_set, test_set], axis=0).reset_index(drop=True)
    
    combined.loc[combined.target == ' >50K', 'target']=1
    combined.loc[combined.target==' <=50K', 'target']=0
    combined.loc[combined.target==' <=50K.', 'target']=0
    combined.loc[combined.target == ' >50K.', 'target']=1
    
       
    dummies = pd.get_dummies(combined)
    
    ts = len(train_set)
    ts2 = len(test_set)
    
    train_set = dummies[0:ts]
    test_set = dummies[ts:]
    
    y_train = train_set['target']
    y_test = test_set['target']
    
    train_set = train_set.drop(["target"], axis=1)
    test_set = test_set.drop(["target"], axis=1)
    
    std_scaler = StandardScaler()
    std_scaler.fit(train_set)
    train_scaled = std_scaler.transform(train_set)
    test_scaled = std_scaler.transform(test_set)
    

    return train_scaled, test_scaled, y_train, y_test

In [130]:
columns=['age', 'workclass','fnlwgt','education','education-num','marital-status','occupation',
         'relationship','race','sex','capital-gain','capital-loss','hours-per-week','native-country', 'target']
test_set = pd.read_csv("adult.test.txt", index_col=False, names=columns, na_values=[' ?'])

In [131]:
columns=['age', 'workclass','fnlwgt','education','education-num','marital-status','occupation',
         'relationship','race','sex','capital-gain','capital-loss','hours-per-week','native-country', 'target']
train_set = pd.read_csv("adult.data.txt", index_col=False, names=columns,na_values=[' ?'])


In [132]:
train_scaled, test_scaled, y_train, y_test = feature_engr(train_set, test_set)



  return self.partial_fit(X, y)


In [133]:
dim = train_scaled.shape
#num_classes = 2
batch_size = 128
epochs = 20
#y_train = keras.utils.to_categorical(y_train, num_classes)
#y_test = keras.utils.to_categorical(y_test, num_classes)


In [134]:
def build_model():
    '''
    Building a basic sequential model, with me just guessing basically
    
    Parameters
    ----------
    
    
    Returns
    -------
    compiled neural net
    '''
    
    model = Sequential()
    model.add(Dense(512, activation='elu', input_shape=(dim[1],)))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(256, activation='elu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(256, activation='elu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(256, activation='elu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(128, activation='elu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(64, activation='elu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))

    model.summary()

    model.compile(loss='binary_crossentropy',
    optimizer=Adam(),
    metrics=['accuracy'])
    
    return model

In [135]:
mod = build_model()
history = mod.fit(train_scaled, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_split=.15)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_73 (Dense)             (None, 512)               127488    
_________________________________________________________________
batch_normalization_31 (Batc (None, 512)               2048      
_________________________________________________________________
dropout_59 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_74 (Dense)             (None, 256)               131328    
_________________________________________________________________
batch_normalization_32 (Batc (None, 256)               1024      
_________________________________________________________________
dropout_60 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_75 (Dense)             (None, 256)               65792     
__________

In [136]:
score = model.evaluate(test_scaled, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.36551714549818204
Test accuracy: 0.837715803437024
