## Linear Classifier in TensorFlow 
Using Low Level API in Eager Execution mode

### Load tensorflow

In [0]:
import tensorflow as tf

### Collect Data

In [0]:
from google.colab import drive
drive.mount('gdrive')

Drive already mounted at gdrive; to attempt to forcibly remount, call drive.mount("gdrive", force_remount=True).


In [0]:
import pandas as pd

In [0]:
data = pd.read_csv('gdrive/My Drive/Colab Notebooks/R6/R6_Internal_Lab/prices.csv')

### Check all columns in the dataset

In [0]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 851264 entries, 0 to 851263
Data columns (total 7 columns):
date      851264 non-null object
symbol    851264 non-null object
open      851264 non-null float64
close     851264 non-null float64
low       851264 non-null float64
high      851264 non-null float64
volume    851264 non-null float64
dtypes: float64(5), object(2)
memory usage: 45.5+ MB


### Drop columns `date` and  `symbol`

In [0]:
data.drop(["date", "symbol"], axis=1, inplace=True)


In [0]:
data.head()

Unnamed: 0,open,close,low,high,volume
0,123.43,125.839996,122.309998,126.25,2163600.0
1,125.239998,119.980003,119.940002,125.540001,2386400.0
2,116.379997,114.949997,114.93,119.739998,2489500.0
3,115.480003,116.620003,113.5,117.440002,2006300.0
4,117.010002,114.970001,114.089996,117.330002,1408600.0


### Consider only first 1000 rows in the dataset for building feature set and target set
Target 'Volume' has very high values. Divide 'Volume' by 1000,000

In [0]:
df = data.loc[:999,:]
df['volume'] = df['volume']/1000000

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


### Divide the data into train and test sets

In [0]:

X = df.drop('volume',axis=1)
y = df[['volume']]



In [0]:

from sklearn.model_selection import train_test_split
X_train,  X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=10900)
print("X_Train : {}, X_test : {}, y_train : {}, y_test : {}".format(X_train.shape,  X_test.shape, y_train.shape, y_test.shape))


X_Train : (700, 4), X_test : (300, 4), y_train : (700, 1), y_test : (300, 1)


#### Convert Training and Test Data to numpy float32 arrays


In [0]:
import numpy as np
X_train_array = np.asarray(X_train, dtype=np.float32)
X_test_array = np.asarray(X_test, dtype=np.float32)
y_train_array = np.asarray(y_train, dtype=np.float32)
y_test_array = np.asarray(y_test, dtype=np.float32)


### Normalize the data
You can use Normalizer from sklearn.preprocessing

In [0]:
from sklearn.preprocessing import Normalizer
normalizer = Normalizer()
X_train_std = normalizer.fit_transform(X_train_array)
X_test_std = normalizer.fit_transform(X_test_array)

## Building the Model in tensorflow

1.Define Weights and Bias, use tf.zeros to initialize weights and Bias; Set placeholders for feature and target vectors 

In [0]:
sess = tf.Session()

In [0]:
n_samples = X_train.shape[0]
n_features = float(X_train.shape[1])
print(n_samples, n_features)
W = tf.Variable(tf.zeros([4,1]), tf.float32, name="weight") 
b = tf.Variable(tf.zeros(1), name="bias") 

X_tf = tf.placeholder(tf.float32, shape=([n_samples, n_features])) 

y_tf = tf.placeholder(tf.float32, shape=([n_samples, 1]))

(700, 4.0)


In [0]:
print(sess.run(tf.rank(W)))
print(sess.run(tf.shape(W)))

2
[4 1]


2.Define a linear model

In [0]:
linear_model = tf.matmul(X_tf , W) + b

*3*.Define Loss (Cost) [Mean square error]

In [0]:
# Mean squared error 
cost = tf.reduce_sum(tf.square(linear_model - y_tf)) / (2*n_samples)    # Operatinal tensor


4.Initialize the optimizer and global variables

In [0]:
learning_rate = 0.1
# Gradient descent 
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) # opearational tensor

# Initializing the variables 
init = tf.global_variables_initializer() 


## Train the model for 100 epochs with TF session
1. Observe the training loss at every iteration
2. Observe Train loss at every 5th iteration
3. Model Prediction on 1st Examples in Test Dataset

In [0]:
# Model Parameters 

training_epochs = 100
display_step = 5

In [0]:
# Launch the graph 
with tf.Session() as sess:
    # Load initialized variables in current session
    sess.run(init)
    # Fit all training data
    for epoch in range(training_epochs):
        # perform gradient descent step
        sess.run(optimizer, feed_dict={X_tf: X_train_std, y_tf: y_train_array})
        if (epoch+1) % display_step == 0:
            c = sess.run(cost, feed_dict={X_tf: X_train_std, y_tf: y_train_array})
            print("---------Iteration-{}---------".format(epoch+1))
            print(sess.run(W))
            print("COST ", c)
    # Print final parameter values
    print("Optimization Finished!")
    training_cost = sess.run(cost, feed_dict={X_tf: X_train_std, y_tf: y_train_array}) 
    
    
    w = sess.run(W)
    intercept = sess.run(b)
    
    print("Final training cost:", training_cost, "W:", w, "b:",intercept, '\n') 
    
    


---------Iteration-5---------
[[0.86796784]
 [0.8777089 ]
 [0.86023515]
 [0.8839077 ]]
('COST ', 94.45406)
---------Iteration-10---------
[[1.1500865]
 [1.1675982]
 [1.140577 ]
 [1.1750749]]
('COST ', 93.16257)
---------Iteration-15---------
[[1.240229 ]
 [1.2648655]
 [1.230894 ]
 [1.2720077]]
('COST ', 93.023834)
---------Iteration-20---------
[[1.2674631]
 [1.2990128]
 [1.2589421]
 [1.305292 ]]
('COST ', 93.00885)
---------Iteration-25---------
[[1.2740828]
 [1.3124763]
 [1.2665855]
 [1.3177195]]
('COST ', 93.007195)
---------Iteration-30---------
[[1.2739474]
 [1.3191619]
 [1.2675425]
 [1.3233124]]
('COST ', 93.00696)
---------Iteration-35---------
[[1.2715985]
 [1.3236263]
 [1.2663083]
 [1.3266656]]
('COST ', 93.00688)
---------Iteration-40---------
[[1.2685243]
 [1.3273628]
 [1.2643563]
 [1.3292849]]
('COST ', 93.0068)
---------Iteration-45---------
[[1.2652124]
 [1.3308609]
 [1.2621689]
 [1.3316636]]
('COST ', 93.00673)
---------Iteration-50---------
[[1.2618227]
 [1.3342808]
 [1

In [0]:

firstSampleYhat = np.dot( X_test_std[0] , w) + intercept
print("First Sample Prediction :- yhat : {} y : {}".format(firstSampleYhat, y_test_array[0]))


First Sample Prediction :- yhat : [5.190363] y : [7.5171]


4. Print the values of Weight and bia

In [0]:
print("Weight: {}, Bias : {}".format(w, intercept))

Weight: [[1.2275673]
 [1.3681145]
 [1.2369053]
 [1.3565962]], Bias : [2.5958383]


## Classification using tf.Keras

In this exercise, we will build a Deep Neural Network using tf.Keras. We will use Iris Dataset for this exercise.

### Load the given Iris data using pandas (Iris.csv)

In [0]:
df = pd.read_csv("gdrive/My Drive/Colab Notebooks/R6/R6_Internal_Lab/Iris.csv")
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


### Target set has different categories. So, Label encode them. And convert into one-hot vectors using get_dummies in pandas.

In [0]:
df = pd.get_dummies(df)


### Splitting the data into feature set and target set

In [0]:
X = df.iloc[:, 1:5]
y = df.iloc[:, 5:8]
print(X.shape, y.shape)

((150, 4), (150, 3))


###  Building Model in tf.keras

Build a Linear Classifier model  <br>
1.  Use Dense Layer  with input shape of 4 (according to the feature set) and number of outputs set to 3<br> 
2. Apply Softmax on Dense Layer outputs <br>
3. Use SGD as Optimizer
4. Use categorical_crossentropy as loss function 

In [0]:
import keras 
from keras.datasets import mnist 
from keras.models import Sequential 
from keras.layers import Dense, Dropout 
from keras.optimizers import SGD 
from keras import utils 
import numpy as np 

Using TensorFlow backend.


### Model Training 

In [0]:
batch_size = 80
n_inputs = 4 
n_classes = 3 
n_epochs = 100


X_train,  X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=989898)

# build a sequential model 
model = Sequential() # the first layer has to specify the dimensions of the input vector 
model.add( Dense( units = 90, activation ='tanh', input_shape =( n_inputs,))) # add dropout layer for preventing overfitting 
model.add( Dropout( 0.1)) 

model.add( Dense( units = 90, activation ='tanh')) 
model.add( Dropout( 0.1)) # output layer can only have the neurons equal to the number of outputs 

model.add( Dense( units = n_classes, activation ='softmax')) # print the summary of our model 

model.summary() 
# compile the model 
model.compile( loss ='categorical_crossentropy', optimizer = SGD(), metrics =['accuracy']) 

# train the model 
model.fit( X_train, y_train, batch_size = batch_size, epochs = n_epochs) 


W0630 13:10:42.103975 140266104403840 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0630 13:10:42.107078 140266104403840 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0630 13:10:42.110883 140266104403840 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0630 13:10:42.127995 140266104403840 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0630 13:10:42.137440 

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 90)                450       
_________________________________________________________________
dropout_1 (Dropout)          (None, 90)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 90)                8190      
_________________________________________________________________
dropout_2 (Dropout)          (None, 90)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 273       
Total params: 8,913
Trainable params: 8,913
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 

<keras.callbacks.History at 0x7f91fb13da10>

### Model Prediction

In [0]:
scores = model.evaluate( X_test, y_test)

print('\ n loss:', scores[ 0]) 

print('\ n accuracy:', scores[ 1])

('\\ n loss:', 0.34040354556507535)
('\\ n accuracy:', 0.9777777777777777)


### Save the Model

In [0]:
model.save("/content/gdrive/My Drive/Colab Notebooks/R6/R6_Internal_Lab/KerasModel1.sav")

### Build and Train a Deep Neural network with 2 hidden layer  - Optional - For Practice

Does it perform better than Linear Classifier? What could be the reason for difference in performance?