In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import xlrd

**Problem**: We often hear insurance companies using factors such as number of fire and theft in
a neighborhood to calculate how dangerous the neighborhood is. My question is: is it
redundant? Is there a relationship between the number of fire and theft in a neighborhood, and if
there is, can we find it?  

In other words, can we find a function f so that if X is the number of fires and Y is the number of
thefts, then: Y = f(X)?

In [2]:
DATA_FILE = "data/fire_theft.xls"

In [3]:
# step 1: read in data from the .xsl file
book = xlrd.open_workbook(DATA_FILE, encoding_override='utf-8')
sheet = book.sheet_by_index(0)
data = np.asarray([sheet.row_values(i) for i in range(1, sheet.nrows)])
n_samples = sheet.nrows - 1

In [4]:
# step 2: create placeholders for input X (number of fire) and label Y (number of theft)
X = tf.placeholder(tf.float32, name = "X")
Y = tf.placeholder(tf.float32, name = "Y")

In [5]:
# Step 3: create weight adn bias, initialized to 0
w = tf.Variable(0.0, name="weights")
b = tf.Variable(0.0, name="bias")

In [6]:
# Step 4: construct model to predict Y (number of theft) from the number of fire
Y_predicted = X * w + b

In [7]:
# Step 5: use the square error as the loss function
loss = tf.square(Y - Y_predicted, name="loss")

In [8]:
# Step 6: using gradent descent with learning rate of 0.01 to minimize loss
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)

In [9]:
with tf.Session() as sess:
    # Step 7: initialize the necessary variables, in this case, w adn b
    sess.run(tf.global_variables_initializer())
    
    # Step 8: train the model
    for i in range(100): # run 100 epochs
        loss_val = 0
        for x, y in data:
            # Session runs train_op to minimize loss
            _, loss_one = sess.run([optimizer, loss], feed_dict={X : x, Y : y})
            loss_val += loss_one
    
        if i%10 == 0:
            print("{}/100 loss={}".format(i, loss_val/len(data)))
        
    # Step 9: output the values of w adn b
    w_value, b_value = sess.run([w, b])
    
    print("w = {}, b = {}".format(w_value, b_value))

0/100 loss=2069.6319333978354
10/100 loss=1924.5930823644712
20/100 loss=1773.1024853109072
30/100 loss=1666.1935385839038
40/100 loss=1589.668056331575
50/100 loss=1534.211797797609
60/100 loss=1493.600210891061
70/100 loss=1463.5996563179153
80/100 loss=1441.2782130186733
90/100 loss=1424.5748210840281
w = 1.7183812856674194, b = 15.789156913757324
