<img height=200 width=200 align='left' src='https://avatars2.githubusercontent.com/u/365630?v=3&s=400'>
<img height=200 width=200 align='right' src='https://upload.wikimedia.org/wikipedia/en/7/74/TensorFlow.png'>
<img height=50 width=50 align='middle' src='http://bowriversolutions.com/wp-content/uploads/2014/04/VS-Icon-315x400.png'>

<img src='http://orig13.deviantart.net/a0c9/f/2009/339/f/4/shadic_vs_nazo_by_oskarmandude.gif'>

In this notebook we will run a logistic regression from sklearn and compare its perfomance on the iris data set 
with that of a tensorflow model

In [187]:
from sklearn import datasets
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation
from matplotlib import pyplot as plt
from sklearn.utils import shuffle
from sklearn.preprocessing import scale
from sklearn.metrics import accuracy_score
from __future__ import division
from sklearn.preprocessing import OneHotEncoder

import tensorflow as tf

<h2 style="color:#2980b9"> Preparing the data </h2>

In [152]:
data = datasets.load_iris()
random_indices = np.random.choice(range(len(data.data)),replace=False,size=len(data.data))

In [153]:
X = data.data[random_indices]
X = scale(X)
y = data.target[random_indices]

In [154]:
X = X.astype(np.float32)
y = y.astype(np.float32)

In [155]:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, train_size=.8, test_size=.2)

<h2 style="color:#16a085"> Le Regression Logistique by SKLEARN </h2>

In [156]:
clf = LogisticRegression(fit_intercept=False)

In [157]:
clf.fit(X_train, y_train)
preds = clf.predict(X_test)

In [158]:
print "Accuracy score: {0:.2f}".format(accuracy_score(y_test, preds))

Accuracy score: 0.93


<h2 style="color:#27ae60"> La Random Forest by SKLEARN </h2>

In [189]:
clf = RandomForestClassifier(n_estimators=70)
clf.fit(X_train, y_train)
preds = clf.predict(X_test)
print "Accuracy score: {0:.2f}".format(accuracy_score(y_test, preds))

Accuracy score: 0.97


<h2 style="color:#9b5ab6"> Das Deep Network by TENSORFLOW </h2>

In [163]:
enc = OneHotEncoder()
enc.fit([[0],[1],[2]])

OneHotEncoder(categorical_features='all', dtype=<type 'float'>,
       handle_unknown='error', n_values='auto', sparse=True)

In [164]:
y_train_hot = enc.transform(np.vstack(y_train)).toarray().astype(np.float32)
y_test_hot = enc.transform(np.vstack(y_test)).toarray().astype(np.float32)

In [165]:
print y_train[0:5]
print '===> \n'
print y_train_hot[0:5]
print '\n'
print y_test[0:5]
print '===>\n'
print y_test_hot[0:5]

[ 2.  2.  0.  1.  0.]
===> 

[[ 0.  0.  1.]
 [ 0.  0.  1.]
 [ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 1.  0.  0.]]


[ 2.  1.  1.  2.  1.]
===>

[[ 0.  0.  1.]
 [ 0.  1.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]
 [ 0.  1.  0.]]


### Initiating the Graph

In [183]:
num_labels = np.shape(y_train_hot)[1]

graph = tf.Graph()
with graph.as_default():

    tf_train_dataset = tf.constant(X_train)
    tf_train_labels = tf.constant(y_train_hot)
    tf_valid_dataset = tf.constant(X_test)
  
    # Variables
    v_1 = tf.Variable(
      tf.truncated_normal([np.shape(X_train)[1], 5]))
    b_1 = tf.Variable(tf.zeros([5]))
    
    v_2 = tf.Variable(
      tf.truncated_normal([5, num_labels]))
    b_2 = tf.Variable(tf.zeros([num_labels]))
    
    beta = 0.001
  
    # Training computation
    layer_1 = tf.nn.tanh(tf.matmul(tf_train_dataset, v_1) + b_1)
    logits = tf.matmul(layer_1, v_2) + b_2
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))+beta*tf.nn.l2_loss(v_2)
  
    # Optimizer
    optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
  
    # Predictions for the training, validation, and test data
    valid_prediction = tf.nn.softmax(
    tf.matmul((tf.matmul(tf_valid_dataset, v_1) + b_1), v_2)) + b_2

### Training the Graph

In [184]:
num_steps = 25000

def accuracy(predictions, labels):
    return (np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/ predictions.shape[0])

saver = tf.train.Saver([v_1, v_2],keep_checkpoint_every_n_hours=2)

with tf.Session(graph=graph) as session: 
    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        _, l = session.run([optimizer, loss])
        if (step % 2000 == 0):
            print('Loss at step %d: %f' % (step, l))
            print('Validation accuracy: %.2f%%' % accuracy(valid_prediction.eval(), y_test_hot))
            preds_tf = valid_prediction.eval()
    saver.save(session, 'vs_sklearn', global_step = step)
    
    

Initialized
Loss at step 0: 1.530424
Validation accuracy: 0.47%
Loss at step 2000: 0.564675
Validation accuracy: 0.80%
Loss at step 4000: 0.441082
Validation accuracy: 0.80%
Loss at step 6000: 0.378277
Validation accuracy: 0.93%
Loss at step 8000: 0.332872
Validation accuracy: 0.97%
Loss at step 10000: 0.295824
Validation accuracy: 0.97%
Loss at step 12000: 0.264068
Validation accuracy: 1.00%
Loss at step 14000: 0.236660
Validation accuracy: 1.00%
Loss at step 16000: 0.213305
Validation accuracy: 1.00%
Loss at step 18000: 0.193681
Validation accuracy: 1.00%
Loss at step 20000: 0.177327
Validation accuracy: 1.00%
Loss at step 22000: 0.163727
Validation accuracy: 1.00%
Loss at step 24000: 0.152392
Validation accuracy: 1.00%


<h2 align=middle> And the winner is ... </h2>

<p align='middle'><img src='https://upload.wikimedia.org/wikipedia/en/7/74/TensorFlow.png'></p>

<img src='http://orig06.deviantart.net/bfe7/f/2012/172/f/7/kawaii_plum_by_nyanqueen-d54az71.gif'>