In [74]:
from __future__ import absolute_import, division, print_function, unicode_literals

import pathlib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf
import numpy as np

from tensorflow import keras
from tensorflow.keras import layers

In [79]:
'''
  Download Dataset (to '/Users/10yung/.keras/datasets/auto-mpg.data' ) and set the column name
'''

dataset = pd.read_csv('./data/Predict4.csv')
# dataset.tail()

In [85]:
''' 
  Clean na data
'''

dataset.isna().sum()
dataset = dataset.dropna()

------
0
------
0


In [67]:
'''
  Split dataset into training and testing
'''

train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)
print(train_dataset.shape)
print(test_dataset.shape)
print(test_dataset.head())

(1168, 131)
(292, 131)
    Unnamed: 0  MSZoning_C (all)  MSZoning_FV  MSZoning_RH  MSZoning_RL  \
11          11                 0            0            0            1   
23          23                 0            0            0            0   
24          24                 0            0            0            1   
25          25                 0            0            0            1   
28          28                 0            0            0            1   

    MSZoning_RM  LotShape_IR1  LotShape_IR2  LotShape_IR3  LotShape_Reg  ...  \
11            0             1             0             0             0  ...   
23            1             0             0             0             1  ...   
24            0             1             0             0             0  ...   
25            0             0             0             0             1  ...   
28            0             1             0             0             0  ...   

    SaleType_New  SaleType_Oth  SaleType_WD  

In [68]:
'''
  Overall statistic
'''

train_stats = train_dataset.describe()
train_stats.pop("SalePrice")
train_stats = train_stats.transpose()
train_stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Unnamed: 0,1168.0,729.949486,421.221360,0.0,368.5,720.5,1095.25,1459.0
MSZoning_C (all),1168.0,0.006849,0.082512,0.0,0.0,0.0,0.00,1.0
MSZoning_FV,1168.0,0.044521,0.206337,0.0,0.0,0.0,0.00,1.0
MSZoning_RH,1168.0,0.008562,0.092172,0.0,0.0,0.0,0.00,1.0
MSZoning_RL,1168.0,0.789384,0.407921,0.0,1.0,1.0,1.00,1.0
...,...,...,...,...,...,...,...,...
SaleCondition_AdjLand,1168.0,0.002568,0.050637,0.0,0.0,0.0,0.00,1.0
SaleCondition_Alloca,1168.0,0.008562,0.092172,0.0,0.0,0.0,0.00,1.0
SaleCondition_Family,1168.0,0.013699,0.116287,0.0,0.0,0.0,0.00,1.0
SaleCondition_Normal,1168.0,0.807363,0.394539,0.0,1.0,1.0,1.00,1.0


In [69]:
'''
  Split features from labels
'''

train_labels = train_dataset.pop('SalePrice')
test_labels = test_dataset.pop('SalePrice')

In [70]:
'''
  Normalize data
'''

def norm(x):
  return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)
normed_train_stats = normed_train_data.describe()
normed_train_stats

Unnamed: 0.1,Unnamed: 0,MSZoning_C (all),MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,LotShape_IR1,LotShape_IR2,LotShape_IR3,LotShape_Reg,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
count,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,...,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0,1168.0
mean,-1.579787e-16,-3.135334e-16,-3.958496e-16,3.867008e-16,1.887759e-16,-4.025509e-16,-6.463627e-18,2.053627e-16,1.233555e-16,6.843841000000001e-17,...,-3.35372e-16,-1.220485e-16,4.246508e-17,1.2642090000000002e-17,1.099767e-16,2.486714e-16,3.118462e-16,-5.125989e-16,-2.4999030000000002e-17,1.103569e-16
std,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
min,-1.732936,-0.08300992,-0.2157663,-0.09288802,-1.935138,-0.4210314,-0.6995546,-0.1756843,-0.08808329,-1.314341,...,-0.06554041,-0.3174302,-0.05072379,-2.447219,-0.2818056,-0.05072379,-0.09288802,-0.1178007,-2.046344,-0.3223054
25%,-0.8580987,-0.08300992,-0.2157663,-0.09288802,0.5163166,-0.4210314,-0.6995546,-0.1756843,-0.08808329,-1.314341,...,-0.06554041,-0.3174302,-0.05072379,0.4082773,-0.2818056,-0.05072379,-0.09288802,-0.1178007,0.4882581,-0.3223054
50%,-0.02243354,-0.08300992,-0.2157663,-0.09288802,0.5163166,-0.4210314,-0.6995546,-0.1756843,-0.08808329,0.7601863,...,-0.06554041,-0.3174302,-0.05072379,0.4082773,-0.2818056,-0.05072379,-0.09288802,-0.1178007,0.4882581,-0.3223054
75%,0.8672412,-0.08300992,-0.2157663,-0.09288802,0.5163166,-0.4210314,1.428257,-0.1756843,-0.08808329,0.7601863,...,-0.06554041,-0.3174302,-0.05072379,0.4082773,-0.2818056,-0.05072379,-0.09288802,-0.1178007,0.4882581,-0.3223054
max,1.730801,12.03644,4.630676,10.75643,0.5163166,2.373086,1.428257,5.687153,11.34317,0.7601863,...,15.2447,3.147602,19.69774,0.4082773,3.545508,19.69774,10.75643,8.481648,0.4882581,3.099991


In [71]:
'''
  Initial tensor
'''

X = tf.constant( np.array(normed_train_data) , dtype=tf.float64 )
Y = tf.constant( np.array(train_labels) , dtype=tf.float64 ) 
                                   
test_X = tf.constant( np.array(normed_test_data) , dtype=tf.float64 ) 
test_Y = tf.constant( np.array(test_labels) , dtype=tf.float64)

'''
  Define linear regression model functions
'''


def mean_squared_error( Y , y_pred ):
    return tf.reduce_mean( tf.square( y_pred - Y ) )

def mean_squared_error_deriv( Y , y_pred ):
    return tf.reshape( tf.reduce_mean( 2 * ( y_pred - Y ) ) , [ 1 , 1 ] )
    
def h ( X , weights , bias ):
    return tf.tensordot( X , weights , axes=1 ) + bias


'''
  Initialize parameters
'''
num_epochs = 10
num_samples = X.shape[0]
batch_size = 10
learning_rate = 0.0001

dataset = tf.data.Dataset.from_tensor_slices(( X , Y )) 
dataset = dataset.shuffle( 500 ).repeat( num_epochs ).batch( batch_size )
iterator = dataset.__iter__()


num_features = X.shape[1]
weights = tf.random.normal( ( num_features , 1 ) ) 
bias = 0

epochs_plot = list()
loss_plot = list()

In [72]:
'''
  Run tensorflow model
'''

for i in range( num_epochs ) :
    
    epoch_loss = list()
    for b in range( int(num_samples/batch_size) ):
        x_batch , y_batch = iterator.get_next()
   
        output = h( x_batch , weights , bias ) 
        loss = epoch_loss.append( mean_squared_error( y_batch , output ).numpy() )
    
        dJ_dH = mean_squared_error_deriv( y_batch , output)
        dH_dW = x_batch
        dJ_dW = tf.reduce_mean( dJ_dH * dH_dW )
        dJ_dB = tf.reduce_mean( dJ_dH )
    
        weights -= ( learning_rate * dJ_dW )
        bias -= ( learning_rate * dJ_dB ) 
        
    loss = np.array( epoch_loss ).mean()
    epochs_plot.append( i + 1 )
    loss_plot.append( loss ) 
    
    print( 'Loss is {}'.format( loss ) ) 

InvalidArgumentError: cannot compute _MklMatMul as input #1(zero-based) was expected to be a double tensor but is a float tensor [Op:MatMul] name: Tensordot/MatMul/

In [63]:
'''
  Define accuracy with Mean Absolute Error
'''

output = h( test_X , weights , bias ) 
labels = test_Y

accuracy_op = tf.metrics.MeanAbsoluteError() 
accuracy_op.update_state( labels , output )
print( 'Mean Absolute Error = {}'.format( accuracy_op.result().numpy() ) )

InvalidArgumentError: cannot compute _MklMatMul as input #1(zero-based) was expected to be a double tensor but is a float tensor [Op:MatMul] name: Tensordot/MatMul/