In [1]:
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense

# Intro to Neural Networks

## Demos

In [3]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

In [4]:
A = np.random.uniform(size = (200, 500))
B = np.random.uniform(size = (500, 200))

In [5]:
tf.matmul(A, B)

<tf.Tensor: shape=(200, 200), dtype=float64, numpy=
array([[127.24522129, 123.09367397, 123.11675354, ..., 124.3268622 ,
        128.86083907, 132.11287904],
       [123.02809325, 123.52609522, 119.19766634, ..., 121.45162854,
        121.98163823, 128.75521498],
       [117.50754082, 116.60337139, 116.56676013, ..., 117.77602778,
        117.28467566, 122.94939686],
       ...,
       [128.36805345, 127.71596173, 120.30973894, ..., 131.47537736,
        128.81665996, 137.91502215],
       [121.24690387, 120.99482829, 118.72616612, ..., 120.18345776,
        124.99874792, 134.52654949],
       [119.82505325, 121.95549942, 118.43563334, ..., 122.7094102 ,
        121.61850346, 126.90456199]])>

In [6]:
a = np.array([5, 1, 2, 3, 10])
b = np.array([8, -3, 8, 15, 0])

result = (2 * a + 3 * b) ** 2

print(result)
print(type(result))

[1156   49  784 2601  400]
<class 'numpy.ndarray'>


In [7]:
a = tf.constant([5, 1, 2, 3, 10])
b = tf.constant([8, -3, 8, 15, 0])

result = (2 * a + 3 * b) ** 2

print(result)
print(type(result))

tf.Tensor([1156   49  784 2601  400], shape=(5,), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>


In [8]:
type(result.numpy())

numpy.ndarray

In [9]:
a = tf.constant(np.array([5, 1, 2, 3, 10]))
b = tf.constant(np.array([8, -3, 8, 15, 0]))

result = (2 * a + 3 * b) ** 2

print(result)
print(type(result))

tf.Tensor([1156   49  784 2601  400], shape=(5,), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>


In [10]:
a = tf.constant(5)
b = tf.constant(8)

result = (2 * a + 3 * b) ** 2

print(result)
print(type(result))

tf.Tensor(1156, shape=(), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>


In [11]:
a = tf.constant(np.array([5, 1, 2, 3, 10]))
b = tf.constant(np.array([8, -3, 8, 15, 0]))

tf.pow(tf.add(tf.multiply(2, a), tf.multiply(3, b)), 2)

<tf.Tensor: shape=(5,), dtype=int32, numpy=array([1156,   49,  784, 2601,  400])>

In [12]:
tf.reduce_mean([1, 2, 3])

<tf.Tensor: shape=(), dtype=int32, numpy=2>

In [13]:
tf.reduce_mean(np.random.uniform(size = (10, 10)))

<tf.Tensor: shape=(), dtype=float64, numpy=0.4888455985020832>

In [14]:
tf.keras.backend.clear_session()

In [15]:
income_data = pd.read_csv("./data/adult.data", sep = ", ", engine = "python", header = None)

In [16]:
income_data.columns = ["age", "workclass", "fnlwgt", "education", "education-num", "marital-status", "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss", "hours-per-week", "native-country", "income_class"]

In [17]:
income_data

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income_class
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,27,Private,257302,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
32557,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,40,United-States,>50K
32558,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
32559,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K


In [18]:
income_attributes, income_target = income_data.drop(columns = ["income_class"]), income_data.income_class

In [21]:
income_attributes = pd.get_dummies(income_attributes)

In [30]:
income_attributes = income_attributes.drop(columns = ["fnlwgt"])

In [49]:
income_target = income_target.replace({"<=50K": 0, ">50K": 1})

In [50]:
income_attributes_train, income_attributes_test, income_target_train, income_target_test = train_test_split(
    income_attributes, income_target, stratify = income_target, test_size = 5000)

In [51]:
income_target_train.value_counts() / len(income_target_train)

0    0.759189
1    0.240811
Name: income_class, dtype: float64

In [52]:
income_target_test.value_counts() / len(income_target_test)

0    0.7592
1    0.2408
Name: income_class, dtype: float64

In [53]:
income_attributes_train

Unnamed: 0,age,education-num,capital-gain,capital-loss,hours-per-week,workclass_?,workclass_Federal-gov,workclass_Local-gov,workclass_Never-worked,workclass_Private,...,native-country_Portugal,native-country_Puerto-Rico,native-country_Scotland,native-country_South,native-country_Taiwan,native-country_Thailand,native-country_Trinadad&Tobago,native-country_United-States,native-country_Vietnam,native-country_Yugoslavia
29467,30,13,0,0,45,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
19043,30,10,0,0,40,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
3478,35,13,0,0,40,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
21382,62,14,0,0,50,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
24538,32,13,0,0,30,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14725,65,13,10605,0,40,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
7757,50,10,0,0,60,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
8140,27,9,0,0,50,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
17832,66,4,0,0,20,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0


In [35]:
scaller = MinMaxScaler()
income_attributes_train_scalled = scaller.fit_transform(income_attributes_train)
income_attributes_test_scalled = scaller.transform(income_attributes_test)

In [40]:
income_attributes_train_scalled.max(axis = 0)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1.])

In [41]:
num_attrubutes = income_attributes.shape[1]
num_attrubutes

107

In [64]:
logistic_regression = Sequential([
    Input(shape = (num_attrubutes, )),
    Dense(1, activation = "sigmoid")
], name = "log_reg")

In [65]:
logistic_regression.summary()

Model: "log_reg"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 1)                 108       
                                                                 
Total params: 108
Trainable params: 108
Non-trainable params: 0
_________________________________________________________________


In [56]:
logistic_regression.compile(loss = "binary_crossentropy", optimizer = "sgd")

In [63]:
logistic_regression.fit(income_attributes_train, income_target_train, epochs = 99)

Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
Epoch 25/99
Epoch 26/99
Epoch 27/99
Epoch 28/99
Epoch 29/99
Epoch 30/99
Epoch 31/99
Epoch 32/99
Epoch 33/99
Epoch 34/99
Epoch 35/99
Epoch 36/99
Epoch 37/99
Epoch 38/99
Epoch 39/99
Epoch 40/99
Epoch 41/99
Epoch 42/99
Epoch 43/99
Epoch 44/99
Epoch 45/99
Epoch 46/99
Epoch 47/99
Epoch 48/99
Epoch 49/99
Epoch 50/99
Epoch 51/99
Epoch 52/99
Epoch 53/99
Epoch 54/99
Epoch 55/99
Epoch 56/99
Epoch 57/99
Epoch 58/99
Epoch 59/99
Epoch 60/99
Epoch 61/99
Epoch 62/99
Epoch 63/99
Epoch 64/99
Epoch 65/99
Epoch 66/99
Epoch 67/99
Epoch 68/99
Epoch 69/99
Epoch 70/99
Epoch 71/99
Epoch 72/99
Epoch 73/99
Epoch 74/99
Epoch 75/99
Epoch 76/99
Epoch 77/99
Epoch 78/99
Epoch 79/99
Epoch 80/99
Epoch 81/99
Epoch 82/99
Epoch 83/99
Epoch 84/99
E

Epoch 96/99
Epoch 97/99
Epoch 98/99
Epoch 99/99


<keras.callbacks.History at 0x1797360c550>

In [98]:
nn = Sequential([
    Input(shape = (num_attrubutes, )),
    
    Dense(30, activation = "relu"),
    Dense(20, activation = "relu"),
    Dense(10, activation = "relu"),
    
    Dense(1, activation = "sigmoid")
], name = "nn")

In [99]:
nn.summary()

Model: "nn"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_41 (Dense)            (None, 30)                3240      
                                                                 
 dense_42 (Dense)            (None, 20)                620       
                                                                 
 dense_43 (Dense)            (None, 10)                210       
                                                                 
 dense_44 (Dense)            (None, 1)                 11        
                                                                 
Total params: 4,081
Trainable params: 4,081
Non-trainable params: 0
_________________________________________________________________


In [109]:
nn.compile(loss = "binary_crossentropy", optimizer = "sgd", metrics = ["accuracy"])

In [110]:
nn.fit(income_attributes_train, income_target_train, epochs = 99)

Epoch 1/99
Epoch 2/99
Epoch 3/99
Epoch 4/99
Epoch 5/99
Epoch 6/99
Epoch 7/99
Epoch 8/99
Epoch 9/99
Epoch 10/99
Epoch 11/99
Epoch 12/99
Epoch 13/99
Epoch 14/99
Epoch 15/99
Epoch 16/99
Epoch 17/99
Epoch 18/99
Epoch 19/99
Epoch 20/99
Epoch 21/99
Epoch 22/99
Epoch 23/99
Epoch 24/99
194/862 [=====>........................] - ETA: 0s - loss: 0.3976 - accuracy: 0.7940

KeyboardInterrupt: 

In [111]:
nn.evaluate(income_attributes_train, income_target_train)



[0.3933012783527374, 0.806828498840332]

In [112]:
nn.evaluate(income_attributes_test, income_target_test)



[0.39848995208740234, 0.8004000186920166]