# Font type Recognition with <font color= #70e514>Logistic Regression</font> using  [TensorFlow](https://www.tensorflow.org/)

### Example form:
![Deep Learning with TensorFlow by Dan Van Boxel (Packt Video)](https://img1.od-cdn.com/ImageType-400/6135-1/67E/936/4A/%7B67E9364A-5248-4174-917C-549ED505ABC6%7DImg400.jpg)

# * [MSTC](http://mstc.ssr.upm.es/big-data-track) and MUIT: <font size=5 color='green'>Deep Learning with Tensorflow & Keras</font>

In [0]:
import tensorflow as tf
import numpy as np


## ... remember $data$ and $shape$ in Python

In [0]:
a = np.arange(15)
a

In [0]:
a.shape

In [0]:
a.reshape(3, 5)


In [0]:
image=np.array([[1,2,3,4],
                [5,6,7,8],
                [9,10,11,12]])

In [0]:
image.shape

In [0]:
# Import matplotlib for plots

import matplotlib.pyplot as plt
%matplotlib inline

plt.pcolor(image, cmap=plt.cm.gray)
plt.colorbar()


In [0]:
vector=image.reshape((12,))
print('Vector shape: ',vector.shape)
print('Vector values: \n',vector)

In [0]:
image_back=vector.reshape(3,4)

plt.figure(figsize=(6,4))
plt.subplot(121)
plt.pcolor(image, cmap=plt.cm.gray)
plt.title('image')
plt.subplot(122)
plt.title('image_back')
plt.pcolor(image_back, cmap=plt.cm.gray)
plt.colorbar()

![tqdm](https://raw.githubusercontent.com/tqdm/tqdm/master/images/logo.gif)
[tqdm](https://github.com/tqdm/tqdm) means "progress" in Arabic (taqadum, تقدّم) and is an abbreviation for "I love you so much" in Spanish (te quiero demasiado)

In [0]:
 !pip install tqdm

In [0]:
from tqdm import tqdm
for i in tqdm(np.arange(10000)):
  mylist = []
  for x in range(1000): 
        mylist.append(np.sqrt(x))

# Loading data...



In [0]:
"""
Load data
"""

import os
from six.moves import urllib

file_url = 'https://github.com/bloolizard/PlayWithTensorFlow/raw/master/data_with_labels.npz'
file_name = 'data_with_labels.npz'

if not os.path.exists(file_name):
    urllib.request.urlretrieve(file_url, file_name)
    
# Load data:  npz is a simple zip archive, which contains numpy file
data = np.load('data_with_labels.npz')

train = data['arr_0']/255.
labels = data['arr_1']


In [0]:
ls

In [0]:
type(data)

In [0]:
data.keys()

In [0]:
type(data['arr_0'])

### Data: 2790 36x36 images

In [0]:
train.shape

## ... let´s see the $labels$

In [0]:
plt.plot(labels)

## ... and now let's see one image...

In [0]:
plt.pcolor(train[40], cmap=plt.cm.gray)
plt.colorbar()


In [0]:

# Let's look at a subplot of one of A in each font
f, plts = plt.subplots(5, figsize=(4,14), sharex=True)
c = 20
for i in range(5):
    plts[i].pcolor(train[c + i * 558],
                   cmap=plt.cm.gray)

In [0]:
def to_onehot(labels,nclasses):
    '''
    Convert labels to "one-hot" format.
    >>> a = [0,1,2,3]
    >>> to_onehot(a,5)
    array([[ 1.,  0.,  0.,  0.,  0.],
           [ 0.,  1.,  0.,  0.,  0.],
           [ 0.,  0.,  1.,  0.,  0.],
           [ 0.,  0.,  0.,  1.,  0.]])
    '''
    outlabels = np.zeros((len(labels),nclasses))
    for i,l in enumerate(labels):
        outlabels[i,l] = 1
    return outlabels

onehot = to_onehot(labels,5)

In [0]:
onehot.shape

In [0]:
print(onehot[800])

### Permutation! + split data into training / validation

In [0]:
np.random.seed(100)

indices = np.random.permutation(train.shape[0])

In [0]:
indices[:10]

In [0]:
# Split data into training (90%) and validation (10%)
indices = np.random.permutation(train.shape[0])

valid_cnt = int(train.shape[0] * 0.1)

test_idx, training_idx = indices[:valid_cnt],\
                         indices[valid_cnt:]
  
test, train = train[test_idx,:],\
              train[training_idx,:]
  
onehot_test, onehot_train = onehot[test_idx,:],\
                        onehot[training_idx,:]

In [0]:
print('Train shape: ',train.shape)
print('Test shape: ', test.shape)

In [0]:
onehot_test[0:5]

In [0]:
36*36

## **Complete Graph Definition:**

In [0]:
# These will be inputs
## Input pixels, flattened
x = tf.placeholder("float", [None, 1296])
## Known labels
y_label = tf.placeholder("float", [None,5])

# Variables
W = tf.Variable(tf.random_normal([1296, 5], stddev=0.1), name="weight")
b = tf.Variable(tf.zeros([5]), name="bias")

# Define model
y = tf.add(tf.matmul(x,W), b)


### End model specification, begin training code


# Loss function: cross-entropy
cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=y + 1e-50, labels=y_label))


# How we train
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
#starter_learning_rate=0.01
#train_step = tf.train.AdamOptimizer(starter_learning_rate).minimize(cross_entropy)

# Just initialize
init = tf.global_variables_initializer()

# Obtain the predictions
predictions=tf.argmax(y,1)

# Define accuracy
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_label,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))


# **...Training...**

In [0]:
# for saving the model (see below)
import os 

# Actually train
epochs = 1000

# aux arrays to store train and test accuracy results
# once every 10 epochs

train_acc = np.zeros(epochs//10)
test_acc = np.zeros(epochs//10)

with tf.Session() as sess:
    
    sess.run(init)

    for i in tqdm(range(epochs)):
      # Record summary data, and the accuracy
      if i % 10 == 0:
        # Check accuracy on train set
        A = accuracy.eval(feed_dict={
            x: train.reshape([-1,1296]),
            y_label: onehot_train})
        train_acc[i//10] = A
        # And now the validation set
        A = accuracy.eval(feed_dict={
            x: test.reshape([-1,1296]),
            y_label: onehot_test})
        test_acc[i//10] = A
        
      # ALL data is used in every run (i.e. no minibatches)    
      train_step.run(feed_dict={
        x: train.reshape([-1,1296]),
        y_label: onehot_train})
      
      
    # Get the final values of W 
    W_out = W.eval()
    
    # Get the test logits
    y_logits = y.eval(feed_dict={
            x: test.reshape([-1,1296]),
            y_label: onehot_test})
    
    # Save the model in Logistic_TF_Model directory for later use
    file_path= './Logist_TF_Model/'
    if not os.path.exists(file_path):
        os.mkdir(file_path)
        
    saver = tf.train.Saver()
    saver.save(sess, file_path+ 'model.checkpoint')
    
    print('Model saved')
    
    sess.close()



---


## It is very important to store and check the $History$

In [0]:
# Note train_acc[-1] and test_acc[-1] are the last values
print('Train Accuracy: ',np.round(train_acc[-1],2))
print('Test_Accuracy: ',np.round(test_acc[-1],2))

# Plot the accuracy curves
# Notice that accuracy flattens out
plt.plot(train_acc,'bo')
plt.plot(test_acc,'rX')




## ... you can feed one image and get output

In [0]:
# image index
im_index=4

with tf.Session() as sess:
    
    sess.run(init)
    
    saver = tf.train.Saver()
    saver.restore(sess, file_path+ 'model.checkpoint')
    
    print('Model loaded')
    
    y_out,predictions_out, corr_pred_out = sess.run([y,predictions, correct_prediction],\
        feed_dict={ \
        x: test[im_index].reshape([-1,1296]), \
        y_label: onehot_test[im_index].reshape([-1,5]) })
      
    
print('\nClass probabilities (y_out): ',y_out)
print('One hot label              : ',onehot_test[im_index],'\n')
print('Class predicted: ', predictions_out[0], 'Truth:',np.argmax(onehot_test[im_index]),'\n')
print('Correct prediction?:', corr_pred_out, '\n')

plt.pcolor(test[im_index], cmap=plt.cm.gray)
plt.colorbar()


## ... find errors to inspect them...

In [0]:
Errors=np.argmax(onehot_test,axis=1) != np.argmax(y_logits,axis=1)

In [0]:
MeanClassError=sum(Errors)/len(Errors)
print('MCE :', MeanClassError)

In [0]:
np.where(Errors==True)

# Exploring the information in Weights Matrix


---

## [The Building Blocks of Interpretability](https://distill.pub/2018/building-blocks/)

## * <font color=red>you should consider following [Distill](https://distill.pub/) </font>

In [0]:
plt.pcolor(W_out[:,4].reshape([36,36]),cmap='RdBu')
plt.colorbar()

In [0]:
# Look at a subplot of the weights for each font
f, plts = plt.subplots(5, figsize=(6,20), sharex=True)
for i in range(5):
    plts[i].pcolor(W_out[:,i].reshape([36,36]),cmap='RdBu')
