In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals import joblib
from sklearn import metrics




In [2]:
df = pd.read_csv('cleaned_census.csv')
df.drop(['Unnamed: 0'], axis=1, inplace=True)
df.head(5)

Unnamed: 0,nchild,nchild_under_5,nsibs,eldch,sex,age,occ1950,ed_group_h
0,0,0,0,0,Male,43,Operative and kindred workers (nec),1
1,0,0,0,0,Male,28,"Managers, officials, and proprietors (nec)",2
2,3,0,0,15,Male,36,Foremen (nec),1
3,0,0,0,0,Male,48,"Managers, officials, and proprietors (nec)",1
4,2,2,0,2,Male,27,Salesmen and sales clerks (nec),1


In [3]:
df['occ1950'] = np.where((df['occ1950'] == 'N/A (blank)'),
                           float('nan'), df['occ1950'])

df['occ1950'] = np.where((df['occ1950'] == 'Occupation missing/unknown'),
                           float('nan'), df['occ1950'])
df['sex'] = np.where((df['sex'] == 'Male'),
                           1, df['sex'])
df['sex'] = np.where((df['sex'] == 'Female'),
                           1, df['sex'])
df = df.dropna()

In [4]:
dummies =  pd.get_dummies(df.occ1950)

In [5]:
census = pd.concat([df, dummies], axis='columns')
census = census.drop(labels=['occ1950' ], axis='columns')
census

Unnamed: 0,nchild,nchild_under_5,nsibs,eldch,sex,age,ed_group_h,Accountants and auditors,Actors and actresses,Advertising agents and salesmen,...,"Tinsmiths, coppersmiths, and sheet metal workers","Tool makers, and die makers and setters",Truck and tractor drivers,Upholsterers,"Ushers, recreation and amusement",Veterinarians,Waiters and waitresses,Watchmen (crossing) and bridge tenders,"Weavers, textile",Welders and flame cutters
0,0,0,0,0,1,43,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,1,28,2,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,0,0,15,1,36,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,1,48,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2,2,0,2,1,27,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188046,0,0,0,0,1,53,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
188047,0,0,0,0,1,67,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
188048,2,0,0,7,1,37,2,0,0,0,...,0,0,0,0,0,0,0,0,0,0
188049,6,3,0,11,1,29,2,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
census.dtypes

nchild                                     int64
nchild_under_5                             int64
nsibs                                      int64
eldch                                      int64
sex                                       object
                                           ...  
Veterinarians                              uint8
Waiters and waitresses                     uint8
Watchmen (crossing) and bridge tenders     uint8
Weavers, textile                           uint8
Welders and flame cutters                  uint8
Length: 275, dtype: object

In [34]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import utils

In [35]:
Y = np.asarray(utils.to_categorical(census['ed_group_h']))
X = np.asarray(census.drop(labels=['ed_group_h'], axis=1))


In [36]:


X = X.astype(np.float32)

In [89]:
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.09, random_state=0)

In [90]:
X_train.shape

(171126, 274)

In [91]:
model = tf.keras.Sequential()
model.add(layers.Dense(64, activation='relu', input_dim=X.shape[1]))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(Y.shape[1], activation='softmax'))
sgd = tf.keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
ada = tf.keras.optimizers.Adamax(
    learning_rate=0.005, beta_1=0.9, beta_2=0.999, epsilon=1e-07, name="Adamax")

In [92]:
model.compile(ada, 
              "categorical_crossentropy", metrics=["accuracy"])

In [93]:
model.fit(X_train, y_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x24a071b1c88>

In [94]:
# evaluate the model
_, train_acc = model.evaluate(X_train, y_train, verbose=0)
_, test_acc = model.evaluate(X_test, y_test, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))

Train: 0.636, Test: 0.632
