In [0]:
# to avoid warning
import warnings
warnings.filterwarnings('ignore')


              
import pandas as pd   
import tensorflow as tf
import numpy as np               
# Visualization
import matplotlib.pyplot as plt  
%matplotlib inline 


# 1)-Loading and preparing dataset

In [0]:
dataframe = pd.read_csv("data.csv")

In [3]:
dataframe.head()

Unnamed: 0,index,area,bathrooms,price,sq_price
0,0,2104.0,3.0,399900.0,190.06654
1,1,1600.0,3.0,329900.0,206.1875
2,2,2400.0,3.0,369000.0,153.75
3,3,1416.0,2.0,232000.0,163.841808
4,4,3000.0,4.0,539900.0,179.966667


In [0]:
# we will only focus on area and bathrooms features.So, removing rest

dataframe = dataframe.drop(["index", "price", "sq_price"], axis=1)

In [5]:
dataframe.head()

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0


In [6]:
dataframe.shape

(47, 2)

In [0]:
# focusing on 1st 10 rows
dataframe = dataframe[0:10]

In [8]:
dataframe

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0
5,1985.0,4.0
6,1534.0,3.0
7,1427.0,3.0
8,1380.0,3.0
9,1494.0,3.0


In [0]:
#Adding labels
dataframe.loc[:, ("y1")] = [1, 1, 1, 0, 0, 1, 0, 1, 1, 1] # 0 shows a bad purchase and 1 shows a good and satisfactory buying of property

In [10]:
dataframe.head()

Unnamed: 0,area,bathrooms,y1
0,2104.0,3.0,1
1,1600.0,3.0,1
2,2400.0,3.0,1
3,1416.0,2.0,0
4,3000.0,4.0,0


In [0]:
dataframe.loc[:, ("y2")] = dataframe["y1"] == 0

In [12]:
dataframe.head()

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,False
1,1600.0,3.0,1,False
2,2400.0,3.0,1,False
3,1416.0,2.0,0,True
4,3000.0,4.0,0,True


In [0]:
dataframe.loc[:, ("y2")] = dataframe["y2"].astype(int) 

In [14]:
dataframe.head()

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,1,0
3,1416.0,2.0,0,1
4,3000.0,4.0,0,1


In [0]:
#matrices to feed it to TensorFlow. Need to convert features into tensor
inputX = dataframe.loc[:, ['area', 'bathrooms']].as_matrix()
inputY = dataframe.loc[:, ["y1", "y2"]].as_matrix()

**Vector is 1D tensor i.e list of number**

**Marix is a 2D tensor i.e list of list of numbers.**

In [16]:
inputX

array([[2.104e+03, 3.000e+00],
       [1.600e+03, 3.000e+00],
       [2.400e+03, 3.000e+00],
       [1.416e+03, 2.000e+00],
       [3.000e+03, 4.000e+00],
       [1.985e+03, 4.000e+00],
       [1.534e+03, 3.000e+00],
       [1.427e+03, 3.000e+00],
       [1.380e+03, 3.000e+00],
       [1.494e+03, 3.000e+00]])

In [17]:
inputY

array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0]])

# 2)-Define tensorflow model

In [0]:
# Parameters
learning_rate = 0.000001
training_epochs = 2000
display_step = 50
n_samples = inputY.size

In [19]:
# define the TensorFlow operations
x = tf.placeholder(tf.float32, [None, 2])   

#use a 2 x 2 float matrix for the weight
            
W = tf.Variable(tf.zeros([2, 2]))  

#maintain two bias values
                                            
b = tf.Variable(tf.zeros([2]))   

#calculating the predictio

y_values = tf.add(tf.matmul(x, W), b)       
                                           
# use softmax which is activation function.It normalized and  translates numbers outputted by the previous layer into probability form
y = tf.nn.softmax(y_values)    

#For training purposes, we'll also feed you a matrix of label
                                           
    
y_ = tf.placeholder(tf.float32, [None,2]) 

Instructions for updating:
Colocations handled automatically by placer.


In [0]:
#cost function 

cost = tf.reduce_sum(tf.pow(y_ - y, 2))/(2*n_samples)

#define Gradient Descent

optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [0]:
# Initialize variabls and tensorflow session

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

# Training model

In [22]:
for i in range(training_epochs):  
    sess.run(optimizer, feed_dict={x: inputX, y_: inputY}) 
    
    #display log in each epoch
    if (i) % display_step == 0:
        cc = sess.run(cost, feed_dict={x: inputX, y_:inputY})
        print ("Training step:", '%04d' % (i), "cost=", "{:.9f}".format(cc))
        
print ("Optimization Finished!")
training_cost = sess.run(cost, feed_dict={x: inputX, y_: inputY})
print ("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')

Training step: 0000 cost= 0.114958666
Training step: 0050 cost= 0.109539948
Training step: 0100 cost= 0.109539866
Training step: 0150 cost= 0.109539807
Training step: 0200 cost= 0.109539740
Training step: 0250 cost= 0.109539665
Training step: 0300 cost= 0.109539591
Training step: 0350 cost= 0.109539531
Training step: 0400 cost= 0.109539464
Training step: 0450 cost= 0.109539405
Training step: 0500 cost= 0.109539330
Training step: 0550 cost= 0.109539263
Training step: 0600 cost= 0.109539188
Training step: 0650 cost= 0.109539129
Training step: 0700 cost= 0.109539054
Training step: 0750 cost= 0.109538995
Training step: 0800 cost= 0.109538913
Training step: 0850 cost= 0.109538853
Training step: 0900 cost= 0.109538786
Training step: 0950 cost= 0.109538712
Training step: 1000 cost= 0.109538652
Training step: 1050 cost= 0.109538570
Training step: 1100 cost= 0.109538510
Training step: 1150 cost= 0.109538451
Training step: 1200 cost= 0.109538376
Training step: 1250 cost= 0.109538309
Training ste

Training cost= 0.10953728 

W= [[ 2.1414905e-04 -2.1415044e-04]
 [ 5.1274819e-05 -5.1274808e-05]] 
 
 b= [ 1.1915519e-05 -1.1915529e-05] 


# Test model

In [23]:
sess.run(y, feed_dict={x: inputX })

array([[0.7112522 , 0.28874776],
       [0.66498977, 0.33501023],
       [0.73657656, 0.26342347],
       [0.6471879 , 0.3528121 ],
       [0.78335613, 0.2166439 ],
       [0.7006948 , 0.29930523],
       [0.6586633 , 0.34133676],
       [0.6482863 , 0.35171372],
       [0.6436828 , 0.35631716],
       [0.65480113, 0.3451989 ]], dtype=float32)

**compare with original y1 and y2 label. In y1, there are seven '1' values meaning 7 purchases were good. In our model , we see most values are higher and close to one. So, we did pick up 7 values correct. But three values were not predicted correct.**

In [24]:
# checking how softmax is calculated
sess.run(tf.nn.softmax([1., 2.]))

array([0.26894143, 0.7310586 ], dtype=float32)