In [16]:
import sys 
print(sys.version)
import tensorflow as tf
tf.__version__

3.6.4 |Anaconda, Inc.| (default, Jan 16 2018, 10:22:32) [MSC v.1900 64 bit (AMD64)]


'1.2.1'

# Ollie COMP188 Week 2 

My task this week was to follow the TensorFlow guide on [FIRST STEPS WITH TENSORFLOW](https://developers.google.com/machine-learning/crash-course/).

Task included the step-by-step dissection of data and using the intuition to select important pieces of data to be used in a basic prediction.

## In detail:
### Step 1: Load dependencies

In [4]:
import math

from IPython import display
from matplotlib import cm
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format

### Step 2: Examine the data

In [5]:
cali_df = pd.read_csv("https://storage.googleapis.com/mledu-datasets/california_housing_train.csv", sep=",")

cali_df = cali_df.reindex(
    np.random.permutation(cali_df.index))
cali_df["median_house_value"] /= 1000.0ry
cali_df.describe()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
count,17000.0,17000.0,17000.0,17000.0,17000.0,17000.0,17000.0,17000.0,17000.0
mean,-119.6,35.6,28.6,2643.7,539.4,1429.6,501.2,3.9,207.3
std,2.0,2.1,12.6,2179.9,421.5,1147.9,384.5,1.9,116.0
min,-124.3,32.5,1.0,2.0,1.0,3.0,1.0,0.5,15.0
25%,-121.8,33.9,18.0,1462.0,297.0,790.0,282.0,2.6,119.4
50%,-118.5,34.2,29.0,2127.0,434.0,1167.0,409.0,3.5,180.4
75%,-118.0,37.7,37.0,3151.2,648.2,1721.0,605.2,4.8,265.0
max,-114.3,42.0,52.0,37937.0,6445.0,35682.0,6082.0,15.0,500.0


### Step 3: Building a Model
This guide wants to use the variable `total_rooms` to predict `median_house_values`. The model is a Linear Regressor trained by Gradient Descent implemented by TensorFlow.

In [None]:
feature_columns = [tf.feature_column.numeric_column("total_rooms")]
targets = cali_df["median_house_value"]

optim = tf.train.GradientDescentOptimizer(learning_rate=0.0000001)
optim = tf.estimator.clip_gradients_by_norm(my_optimizer, 5.0)

our_lin_reg = tf.estimator.LinearRegressor(
    feature_columns=feature_columns,
    optimizer=optim
)

### Step 4: Defining an input and training the model

TensorFlow's `linear_regressor.train()` first argument takes a callback function which is how we will preprocess the data to make it nice and squeeky clean, as well as define specific niches that need to be resolved.

Our callback function gives back features and labels, so that we may put in `my_feature` and `targets` as arguments.

In [None]:
def preprocess(features, targets, batch_size=1, shuffle=True, num_epochs=None):
 
    features = {key:np.array(value) for key,value in dict(features).items()}                                           
 
    # Construct a dataset, and configure batching/repeating
    ds = Dataset.from_tensor_slices((features,targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    if shuffle:
      ds = ds.shuffle(buffer_size=10000)
    
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

We'll train it for 100 steps.

In [None]:
our_lin_reg.train(
    input_fn = lambda:my_input_fn(my_feature, targets),
    steps=100
)

### Step 5: Making predictions

This guide uses mean squared error as a measure of how well our model is doing

In [None]:
pred_fn =lambda : preprocess(my_feature, targets, num_epochs=1, shuffle=False)

our_preds = our_lin_reg.predict(input_fn=pred_fn)

pred = np.array([item['predictions'][0] for item in predictions])

mse = metrics.mean_squared_error(pred, targets)
sqrt_mse = math.sqrt(mse)

print("Train MSE:\t%0.3f" % mse)
print("Train Root MSE:\t%0.3f" % sqrt_mse)

Our guide now wants to check our made model values against target values to see how off we are 

In [None]:
calibration_data = pd.DataFrame()
calibration_data["pred"] = pd.Series(predictions)
calibration_data["targets"] = pd.Series(targets)
calibration_data.describe()

We then gather a sample of 300 to make a scatter plot that isn't too overly filled

In [None]:
sple = cali_df.sample(n=300)

Then we plot the values from our model taking from it's bias terms and feature weight.

In [None]:
# Get the min and max total_rooms values.
x_0 = sple["total_rooms"].min()
x_1 = sple["total_rooms"].max()

# Retrieve the final weight and bias generated during training.
weight = linear_regressor.get_variable_value('linear/linear_model/total_rooms/weights')[0]
bias = linear_regressor.get_variable_value('linear/linear_model/bias_weights')

# Get the predicted median_house_values for the min and max total_rooms values.
y_0 = weight * x_0 + bias 
y_1 = weight * x_1 + bias

# Plot our regression line from (x_0, y_0) to (x_1, y_1).
plt.plot([x_0, x_1], [y_0, y_1], c='r')

# Label the graph axes.
plt.ylabel("median_house_value")
plt.xlabel("total_rooms")

# Plot a scatter plot from our data sample.
plt.scatter(sample["total_rooms"], sample["median_house_value"])

# Display graph.
plt.show()