## Primeros pasos con TensorFlow

#### Goals
* Aprender las bases de TensorFlow 
* Usar la clase LinearRegressor para predecir el precio mediano
* Evaluar el rendimiento del modelo haciendo uso del RMSE 
* Mejorar la exactitud del modelo ajustando los hiperparametros

In [21]:
from __future__ import print_function

import math

from IPython import display
from matplotlib import cm
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
import tensorflow as tf
from tensorflow.python.data import Dataset

tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format

In [22]:
#cargamos el dataset
california_housing_dataframe = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv", sep=",")

In [23]:
california_housing_dataframe.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-114.3,34.2,15.0,5612.0,1283.0,1015.0,472.0,1.5,66900.0
1,-114.5,34.4,19.0,7650.0,1901.0,1129.0,463.0,1.8,80100.0
2,-114.6,33.7,17.0,720.0,174.0,333.0,117.0,1.7,85700.0
3,-114.6,33.6,14.0,1501.0,337.0,515.0,226.0,3.2,73400.0
4,-114.6,33.6,20.0,1454.0,326.0,624.0,262.0,1.9,65500.0


In [24]:
#vamos a mover los datos de manera randomica
california_housing_dataframe = california_housing_dataframe.reindex(np.random.permutation(california_housing_dataframe.index))

#vamos a tratar los valores del median_house_value en miles para que todo sea mas facil
california_housing_dataframe["median_house_value"] /= 1000.0

#imprimimos el dataset
california_housing_dataframe

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
13895,-122.0,38.0,45.0,1613.0,338.0,865.0,336.0,3.2,151.1
7446,-118.3,33.9,25.0,1769.0,440.0,1371.0,414.0,3.1,232.7
15607,-122.3,37.9,4.0,2851.0,798.0,1285.0,712.0,4.3,186.8
9078,-119.0,35.4,38.0,918.0,220.0,743.0,222.0,1.7,58.1
15727,-122.4,37.8,52.0,464.0,202.0,286.0,148.0,1.6,112.5
...,...,...,...,...,...,...,...,...,...
2994,-117.8,34.1,26.0,1664.0,344.0,1024.0,339.0,3.5,190.5
8157,-118.4,34.0,42.0,1528.0,244.0,634.0,242.0,8.2,500.0
15571,-122.3,37.9,42.0,2206.0,451.0,989.0,444.0,3.1,143.9
13079,-121.9,37.3,16.0,3298.0,451.0,1542.0,423.0,6.7,305.6


In [25]:
#veremos un breve resumen estadistico del dataset
california_housing_dataframe.describe()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
count,17000.0,17000.0,17000.0,17000.0,17000.0,17000.0,17000.0,17000.0,17000.0
mean,-119.6,35.6,28.6,2643.7,539.4,1429.6,501.2,3.9,207.3
std,2.0,2.1,12.6,2179.9,421.5,1147.9,384.5,1.9,116.0
min,-124.3,32.5,1.0,2.0,1.0,3.0,1.0,0.5,15.0
25%,-121.8,33.9,18.0,1462.0,297.0,790.0,282.0,2.6,119.4
50%,-118.5,34.2,29.0,2127.0,434.0,1167.0,409.0,3.5,180.4
75%,-118.0,37.7,37.0,3151.2,648.2,1721.0,605.2,4.8,265.0
max,-114.3,42.0,52.0,37937.0,6445.0,35682.0,6082.0,15.0,500.0
