# Using Tensorflow to Train Simple NN

The purpose of the below is to classify days over years 2017-2018 by their corresponding mobility patterns between 10 zones in Taipei (quantified by an aggregated temporal network of subway ridership flows across the city)

In [1]:
#use Python 3.7
import numpy as np
import pandas as pd
import keras
from keras.datasets import mnist
from sklearn.preprocessing import MinMaxScaler
from keras.models import Model, Sequential
from keras.layers import Input, Dense
from keras.utils import np_utils

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
#read the data
workfolder='';
TNet=pd.read_csv(workfolder+'taipeiD_TNet2.csv',header=None);

In [3]:
TNet.head() 
#each row represents a 10x10 adjacency matrix of the normalized Taipei subway mobility network between 10 zones flattened into a 100x1 row corresponding to a single day
#days start at jan-1-2017

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,0.017943,0.005415,0.00359,0.008316,0.007859,0.012942,0.012196,0.019543,0.001196,0.003327,...,0.002529,0.001533,0.00186,0.002375,0.005408,0.008922,0.003945,0.011075,0.005073,0.012708
1,0.021283,0.005215,0.00353,0.009359,0.007803,0.014288,0.011185,0.019044,0.001382,0.003499,...,0.002803,0.001757,0.001783,0.002549,0.005515,0.00965,0.003596,0.009618,0.005946,0.013709
2,0.028988,0.006511,0.005591,0.01297,0.007816,0.015878,0.010973,0.015768,0.002252,0.005388,...,0.004649,0.002555,0.002672,0.004291,0.007385,0.009558,0.004293,0.008791,0.01004,0.016301
3,0.029534,0.006471,0.005615,0.013017,0.007717,0.016098,0.011182,0.015815,0.002325,0.005443,...,0.004611,0.002473,0.002636,0.004195,0.007255,0.009487,0.004316,0.008729,0.010296,0.016437
4,0.029333,0.006525,0.005727,0.013098,0.007692,0.016358,0.011,0.015677,0.002344,0.005527,...,0.004694,0.002515,0.002677,0.004222,0.007269,0.009921,0.004387,0.008923,0.010381,0.016914


In [4]:
#convert to an array and scale the data
X=np.array(TNet);

In [5]:
X=MinMaxScaler(feature_range=(0, 1), copy=True).fit_transform(X)

In [6]:
X.shape

(669, 100)

In [7]:
#define day of the week corresponding to each day of observation; 0-Sunday, 1-Monday,...,6-Saturday
y=np.array(range(669))%7; y[:10]

array([0, 1, 2, 3, 4, 5, 6, 0, 1, 2], dtype=int32)

In [8]:
yc=np_utils.to_categorical(y) #get categorical binary variables isSunday, isMonday,...
yc[:5]

array([[1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0.]], dtype=float32)

In [9]:
X_test=X[400:,:]; X_train=X[:400,:]; #split the data into training and test
y_test=yc[400:,:]; y_train=yc[:400,:]

## 1. Classify weekdays/weekends
Label the rows with ones for weekends, zeros for weekdays.
Train a neural network with 4 layers of 30,10,3 and 1 (output) neurons over the training sample against this label, evaluating its performance over the test sample. Report the acheived accuracy (categorical) over the test sample

First three layers use relu activation function, last one - sigmoid.
Use loss='binary_crossentropy', optimizer='adam', 100 epochs, batch_size=20. 

In [57]:
# Create your own labels 1 and 0, and report the accuracy 
y = []
for i in yc:
    if i[:5].sum() == 1:
        y.append(0)
    else:
        y.append(1)
test = y[400:]; train = y[:400]

In [58]:
dim = X_train.shape[1]

In [59]:
np.random.seed(2019)
model = Sequential()
model.add(Dense(30, activation='relu', input_dim=dim))
model.add(Dense(10, activation='relu'))
model.add(Dense(3, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, train, validation_data=(X_test, test), epochs=100, batch_size=20, verbose=2)
preds = model.predict(X_test)

Train on 400 samples, validate on 269 samples
Epoch 1/100
 - 1s - loss: 0.5989 - accuracy: 0.6900 - val_loss: 0.5283 - val_accuracy: 0.7249
Epoch 2/100
 - 0s - loss: 0.5272 - accuracy: 0.7225 - val_loss: 0.5057 - val_accuracy: 0.7732
Epoch 3/100
 - 0s - loss: 0.5041 - accuracy: 0.7350 - val_loss: 0.4817 - val_accuracy: 0.7695
Epoch 4/100
 - 0s - loss: 0.4786 - accuracy: 0.8000 - val_loss: 0.4682 - val_accuracy: 0.7361
Epoch 5/100
 - 0s - loss: 0.4499 - accuracy: 0.7975 - val_loss: 0.4306 - val_accuracy: 0.8141
Epoch 6/100
 - 0s - loss: 0.4104 - accuracy: 0.8250 - val_loss: 0.3914 - val_accuracy: 0.8178
Epoch 7/100
 - 0s - loss: 0.3643 - accuracy: 0.8625 - val_loss: 0.3500 - val_accuracy: 0.8401
Epoch 8/100
 - 0s - loss: 0.3192 - accuracy: 0.8825 - val_loss: 0.3126 - val_accuracy: 0.8773
Epoch 9/100
 - 0s - loss: 0.2769 - accuracy: 0.9400 - val_loss: 0.2823 - val_accuracy: 0.8959
Epoch 10/100
 - 0s - loss: 0.2411 - accuracy: 0.9475 - val_loss: 0.2654 - val_accuracy: 0.8736
Epoch 11/100


Epoch 87/100
 - 0s - loss: 0.0104 - accuracy: 1.0000 - val_loss: 0.1604 - val_accuracy: 0.9554
Epoch 88/100
 - 0s - loss: 0.0095 - accuracy: 1.0000 - val_loss: 0.1526 - val_accuracy: 0.9665
Epoch 89/100
 - 0s - loss: 0.0099 - accuracy: 0.9975 - val_loss: 0.1517 - val_accuracy: 0.9703
Epoch 90/100
 - 0s - loss: 0.0090 - accuracy: 1.0000 - val_loss: 0.1811 - val_accuracy: 0.9517
Epoch 91/100
 - 0s - loss: 0.0089 - accuracy: 1.0000 - val_loss: 0.1475 - val_accuracy: 0.9703
Epoch 92/100
 - 0s - loss: 0.0078 - accuracy: 1.0000 - val_loss: 0.1506 - val_accuracy: 0.9703
Epoch 93/100
 - 0s - loss: 0.0091 - accuracy: 1.0000 - val_loss: 0.1663 - val_accuracy: 0.9591
Epoch 94/100
 - 0s - loss: 0.0100 - accuracy: 0.9975 - val_loss: 0.1652 - val_accuracy: 0.9628
Epoch 95/100
 - 0s - loss: 0.0074 - accuracy: 1.0000 - val_loss: 0.1530 - val_accuracy: 0.9703
Epoch 96/100
 - 0s - loss: 0.0110 - accuracy: 0.9975 - val_loss: 0.1792 - val_accuracy: 0.9591
Epoch 97/100
 - 0s - loss: 0.0103 - accuracy: 0.99

In [67]:
preds = model.predict(X_test)

In [85]:
y_preds = []
for pred in preds:
    if pred[0]>0.5:
        y_preds.append(1)
    else:
        y_preds.append(0)

In [89]:
m = 0
for i in range(len(y_preds)):
    if test[i]==y_preds[i]:
        m += 1
print("The Accuracy is: {}".format(m/len(y_preds)))

The Accuracy is: 0.9702602230483272


## 2. Classify all days of the week
Train a neural network against the origial categorical label. Use 5 layers of 40,15,5 and 7 (outputs, representing probabilities for a current input to correspond to each of the weekdays) neurons over the training sample, evaluating its performance over the test sample (use 'categorical_accurary'). Report the acheived accuracy (categorical) over the test sample.

First three layers use relu activation function, last one - sigmoid.
Use loss='binary_crossentropy', optimizer='adam', 200 epochs, batch_size=20

In [101]:
np.random.seed(2019)
model = Sequential()
model.add(Dense(40, activation='relu', input_dim=dim))
model.add(Dense(15, activation='relu'))
model.add(Dense(5, activation='relu'))
model.add(Dense(7, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['categorical_accuracy'])
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=200, batch_size=20, verbose=2)
preds = model.predict(X_test)

Train on 400 samples, validate on 269 samples
Epoch 1/200
 - 1s - loss: 0.6796 - categorical_accuracy: 0.1900 - val_loss: 0.6571 - val_categorical_accuracy: 0.1301
Epoch 2/200
 - 0s - loss: 0.6340 - categorical_accuracy: 0.1700 - val_loss: 0.6197 - val_categorical_accuracy: 0.1450
Epoch 3/200
 - 0s - loss: 0.6110 - categorical_accuracy: 0.1425 - val_loss: 0.6029 - val_categorical_accuracy: 0.1450
Epoch 4/200
 - 0s - loss: 0.5949 - categorical_accuracy: 0.1425 - val_loss: 0.5883 - val_categorical_accuracy: 0.1450
Epoch 5/200
 - 0s - loss: 0.5808 - categorical_accuracy: 0.1425 - val_loss: 0.5746 - val_categorical_accuracy: 0.1450
Epoch 6/200
 - 0s - loss: 0.5676 - categorical_accuracy: 0.1425 - val_loss: 0.5612 - val_categorical_accuracy: 0.1450
Epoch 7/200
 - 0s - loss: 0.5540 - categorical_accuracy: 0.1450 - val_loss: 0.5483 - val_categorical_accuracy: 0.1487
Epoch 8/200
 - 0s - loss: 0.5408 - categorical_accuracy: 0.1650 - val_loss: 0.5355 - val_categorical_accuracy: 0.1673
Epoch 9/20

Epoch 70/200
 - 0s - loss: 0.2332 - categorical_accuracy: 0.5300 - val_loss: 0.2610 - val_categorical_accuracy: 0.5279
Epoch 71/200
 - 0s - loss: 0.2348 - categorical_accuracy: 0.5300 - val_loss: 0.2495 - val_categorical_accuracy: 0.5353
Epoch 72/200
 - 0s - loss: 0.2309 - categorical_accuracy: 0.5400 - val_loss: 0.2506 - val_categorical_accuracy: 0.5428
Epoch 73/200
 - 0s - loss: 0.2286 - categorical_accuracy: 0.5375 - val_loss: 0.2523 - val_categorical_accuracy: 0.5353
Epoch 74/200
 - 0s - loss: 0.2273 - categorical_accuracy: 0.5425 - val_loss: 0.2500 - val_categorical_accuracy: 0.5428
Epoch 75/200
 - 0s - loss: 0.2257 - categorical_accuracy: 0.5550 - val_loss: 0.2450 - val_categorical_accuracy: 0.5428
Epoch 76/200
 - 0s - loss: 0.2233 - categorical_accuracy: 0.5425 - val_loss: 0.2408 - val_categorical_accuracy: 0.5390
Epoch 77/200
 - 0s - loss: 0.2233 - categorical_accuracy: 0.5450 - val_loss: 0.2418 - val_categorical_accuracy: 0.5539
Epoch 78/200
 - 0s - loss: 0.2208 - categorical_

Epoch 139/200
 - 0s - loss: 0.1891 - categorical_accuracy: 0.5575 - val_loss: 0.2300 - val_categorical_accuracy: 0.5353
Epoch 140/200
 - 0s - loss: 0.1895 - categorical_accuracy: 0.5800 - val_loss: 0.2315 - val_categorical_accuracy: 0.5279
Epoch 141/200
 - 0s - loss: 0.1887 - categorical_accuracy: 0.6125 - val_loss: 0.2305 - val_categorical_accuracy: 0.5279
Epoch 142/200
 - 0s - loss: 0.1889 - categorical_accuracy: 0.5950 - val_loss: 0.2327 - val_categorical_accuracy: 0.5316
Epoch 143/200
 - 0s - loss: 0.1887 - categorical_accuracy: 0.6025 - val_loss: 0.2327 - val_categorical_accuracy: 0.5390
Epoch 144/200
 - 0s - loss: 0.1894 - categorical_accuracy: 0.5950 - val_loss: 0.2331 - val_categorical_accuracy: 0.5353
Epoch 145/200
 - 0s - loss: 0.1892 - categorical_accuracy: 0.5875 - val_loss: 0.2301 - val_categorical_accuracy: 0.5353
Epoch 146/200
 - 0s - loss: 0.1884 - categorical_accuracy: 0.5925 - val_loss: 0.2308 - val_categorical_accuracy: 0.5465
Epoch 147/200
 - 0s - loss: 0.1877 - cat

In [99]:
from sklearn.metrics import accuracy_score
print('Accuracy Rate: %f' % accuracy_score(y_test.argmax(axis=1), preds.argmax(axis=1)))

Accuracy Rate: 0.598513
