# 311 Practice from book, Machine Learning with TensorFlow 2 ed., pub. by Manning Publications

# Prologue

Extracting the required data

Dataset source for download: [NYC OpenData - 311.csv (View based on 311 Service Requests from 2010 to Present)](https://data.cityofnewyork.us/Social-Services/311-csv/i4zx-95p9)

In [None]:
from dateutil import parser as date_parser

import matplotlib.pyplot as plt
import numpy as np
import tensorflow.compat.v1 as tf

import csv
import functools
import json
import time
import pprint

tf.disable_v2_behavior()

pp = pprint.PrettyPrinter(compact=True)

# Chapter 1: Utils for reading and dataset splitting

In [None]:


def read_and_process(filename, date_idx, year=None, bucket=7):
    days_in_year = 365
    freq = {}
    
    if year is not None:
        for period in range(0, days_in_year // bucket):
            freq[period] = 0
            
    with open(filename, 'r') as csvfile:
        csvreader = csv.reader(csvfile)
        
        next(csvreader)
        for row in csvreader:
            if row[date_idx] == '':
                continue
            
            current_dt = date_parser.parse(row[date_idx])
            current_year_day = current_dt.timetuple().tm_yday
            current_year = current_dt.year
            if year is None:
                if not current_year in freq:
                    freq[current_year] = {}
                    for period in range(0, days_in_year // bucket):
                        freq[current_year][period] = 0
            
                if year_day < (days_in_year - 1):
                    freq[current_year][current_year_day // bucket] += 1
            
            else:
                if current_year == year and current_year_day < (days_in_year - 1):
                    freq[current_year_day // bucket] = freq[current_year_day // bucket] + 1
    return freq

In [None]:
filename = '/kaggle/input/bookpractice-mp-mlwtf2ed-ch4-311-csvdownload/311.csv'
freq = read_and_process(filename, date_idx=1, year=2014)

In [None]:
with open('./freq.json', 'w') as freqjsonf:
    json.dump(freq, freqjsonf)
pp.pprint(freq)

In [None]:
X_train = np.array(list(freq.keys()))
Y_train = np.array(list(freq.values()))

Y_max = np.max(Y_train)
Y_ntrain = Y_train / Y_max

print('Samples:', len(X_train))
print('Max Y:', Y_max)

plt.scatter(X_train, Y_ntrain)
plt.show()

In [None]:
learning_rate = 1.5
training_epochs = 5000

X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)

def model(X, mu, sigma):
    return tf.math.exp(tf.div(tf.negative(tf.square(tf.subtract(X, mu))),
                              tf.multiply(2., tf.square(sigma))))

mu = tf.Variable(1., name='mu')
sigma = tf.Variable(1., name='sigma')
y_model = model(X, mu, sigma)
cost = tf.square(Y-y_model)
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

for epoch in range(training_epochs):
    for x, y in zip(X_train, Y_ntrain):
        sess.run(train_op, feed_dict={X: x, Y: y})

mu_val = sess.run(mu)
sigma_val = sess.run(sigma)

sess.close()

print(f"Mu: {mu_val}, Sigma: {sigma_val}")

In [None]:
Y_learned = Y_max * np.exp(-np.square(X_train - mu_val) / (2. * np.square(sigma_val)))

plt.scatter(X_train, Y_train)
plt.plot(X_train, Y_learned, 'r')
plt.savefig('./learned.png')

week_35_id = 34
week_35_position = X_train[week_35_id]
week_35_prediction = Y_learned[week_35_id]
week_35_actual = Y_train[week_35_id]

plt.scatter(week_35_position, week_35_prediction)
plt.scatter(week_35_position, week_35_actual)
plt.text(week_35_position, week_35_prediction + 2000, f'Week 35\'s Prediction:\n    ~{round(week_35_prediction)}', c='#ff8c00')
plt.text(week_35_position, week_35_actual + 2000, f'Week 35\'s Actual:\n    {week_35_actual}', c='g')

plt.savefig('./week-35-graph.png')
plt.show()

In [None]:
Y_error = np.power(np.square(Y_learned - Y_train), 0.5)
plt.bar(X_train, Y_error)
plt.savefig('./error.png')
plt.show()

avg_error = np.abs(functools.reduce(lambda a, b: a+b, (Y_learned - Y_train))) / len(X_train)
accuracy = 1. - (avg_error / Y_max)

print(f'Average Error: {avg_error}, Accuracy: {accuracy}')

In [None]:
result_to_save = {
    'mu': float(mu_val),
    'sigma': float(sigma_val),
    'frequency': freq,
    'xTrain': list(map(lambda x: float(x), X_train)),
    'yTrain': list(map(lambda y: float(y), Y_train)),
    'yLearned': list(map(lambda y: float(y), Y_learned)),
    'yError': list(map(lambda err: float(err), Y_error)),
    'averageError': float(avg_error),
    'accuracy': float(accuracy),
}

with open('./training-result.json', 'w') as training_result_json_f:
    json.dump(result_to_save, training_result_json_f, indent=2)