## Retraining IncetpionV3 model in the Google colab environment

The goal is to have new files classified based on the already exsiting model. \
All of the resources should be confied within the Google environment - training and images to be trained from.

In [None]:
import os,sys
import h5py
import pandas as pd
import numpy as np
from keras.preprocessing.image import ImageDataGenerator,array_to_img, img_to_array, load_img
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
import matplotlib.pyplot as plt
import seaborn as sns
import math
%matplotlib inline
from tqdm import tqdm
from PIL import Image
from google.colab import drive

drive.mount('/content/gdrive')

#### Testing connection to the Google Drive 

In [None]:
with open('/content/gdrive/My Drive/Project/foo.txt', 'w') as f:
  f.write('Hello Google Drive!')
!cat /content/gdrive/My\ Drive/Project/foo.txt

In [None]:
data_root = "."
with open('/content/gdrive/My Drive/Project/train.csv', 'r') as f:
  train = pd.read_csv(f)
with open('/content/gdrive/My Drive/Project/test.csv', 'r') as f:
  test = pd.read_csv(f)

print(train.label.nunique(),'labels')
print(train.label.nunique(),'labels')
label_counts=train.label.value_counts()
print(label_counts)


In [None]:
plt.figure(figsize = (12,6))
sns.barplot(label_counts.index, label_counts.values, alpha = 0.9)
plt.xticks(rotation = 'vertical')
plt.xlabel('Image Labels', fontsize =12)
plt.ylabel('Counts', fontsize = 12)
plt.show()

In [None]:
for img in tqdm(train.values):
  filename = img[0]
  label = img[1]
  print(filename, label)
  label_dir = ('/content/gdrive/My Drive/Project/train/'+ label)
  destination = ('/content/gdrive/My Drive/Project/train/'+label + '/' + filename + '.jpg')
  im = Image.open('/content/gdrive/My Drive/Project/train_img/'+filename+'.png')
  rgb_im = im.convert('RGB')
  if not os.path.exists('/content/gdrive/My Drive/Project/train/'+ label):
    os.makedirs(label_dir)
  rgb_im.save(destination)
  if not os.path.exists('/content/gdrive/My Drive/Project/train_new1/'+ label):
    os.makedirs('/content/gdrive/My Drive/Project/train_new1/'+ label)
  rgb_im.save('/content/gdrive/My Drive/Project/train_new1/'+ label + '/' + filename + '.jpg')
  

In [None]:
datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

class_size = 600

src_train_dir = ('/content/gdrive/My Drive/Project/train/')
src_dest_dir = ('/content/gdrive/My Drive/Project/train_new1/')
it = 0
for count in label_counts.values:
  ratio = math.floor(class_size/count)-1
  print(count,count*(ratio+1))
  src_lab_dir = ('/content/gdrive/My Drive/Project/train/'+label_counts.index[it])
  dest_lab_dir = ('/content/gdrive/My Drive/Project/train_new1/'+label_counts.index[it])
  if not os.path.exists(dest_lab_dir):
    os.makedirs(dest_lab_dir)
  for file in os.listdir(src_lab_dir):
    img = load_img(os.path.join(src_lab_dir,file))
    x = img_to_array(img)
    x=x.reshape((1,) + x.shape)
    i=0
    for batch in datagen.flow(x, batch_size=1,save_to_dir=dest_lab_dir, save_format='jpg'):
        i+=1
        if i > ratio:
            break 
  it=it+1

In [None]:
for dirpath, dirnames, filenames in os.walk('/content/gdrive/My Drive/Project/train_new/'):
    i=0
    label=''
    for filename in [f for f in filenames if f.endswith(".jpg")]:
        label=os.path.split(dirpath)[1]
        i+=1
    print(label,i)

In [None]:
os.chdir('/content/gdrive/My Drive/Project')
!python retrain.py --image_dir train --output_graph model-new-8.pb --output_labels labels-new-8.txt --how_many_training_steps 5000 --learning_rate 0.01 

In [None]:
os.chdir('/tmp')
!rm -r imagenet

In [None]:
!python retrain.py -h

In [None]:
os.chdir('/content/gdrive/My Drive/Project')

In [None]:
IMAGE_SIZE=224
!tflite_convert --graph_def_file=model-new-8.pb --output_file=model-new-8-v1.lite --input_format=TENSORFLOW_GRAPHDEF --output_format=TFLITE --input_shape=1,299,299,3 --input_array=Mul --output_array=final_result --inference_type=FLOAT --input_data_type=FLOAT --post_training_quantize

In [None]:
import tensorflow as tf
converter = tf.lite.TFLiteConverter.from_saved_model('model-new-7.pb')
converter.post_training_quantize = True
tflite_quantized_model = converter.convert()
open("quantized_model-new-7.tflite", "wb").write(tflite_quantized_model)

In [None]:
IMAGE_SIZE=224
!tflite_convert --graph_def_file=model.pb --output_file=model.lite --input_format=TENSORFLOW_GRAPHDEF --output_format=TFLITE --input_shape=1,${IMAGE_SIZE},${IMAGE_SIZE},3 --input_array=input --output_array=final_result --inference_type=FLOAT --input_data_type=FLOAT

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pandas as pd
import argparse
import sys,os
import tensorflow as tf
from PIL import Image
from tqdm import tqdm

In [None]:
t=tqdm(pd.read_csv('test.csv').values)
test=[]
i=0
for tt in t:
    test.append(tt[0])
    i+=1

In [None]:
def load_image(filename):
    #Read in the image_data to be classified."""
    return tf.gfile.FastGFile(filename, 'rb').read()

def load_labels(filename):
    #Read in labels, one label per line."""
    return [line.rstrip() for line in tf.gfile.GFile(filename)]

def load_graph(filename):
    #Unpersists graph from file as default graph."""
    with tf.gfile.FastGFile(filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        tf.import_graph_def(graph_def, name='')

In [None]:
def run_graph(src, dest, labels, input_layer_name, output_layer_name, num_top_predictions):
    with tf.Session() as sess:
        # Feed the image_data as input to the graph.
        # predictions  will contain a two-dimensional array, where one
        # dimension represents the input image count, and the other has
        # predictions per class
        i=0
        with open('submit.csv','w') as outfile:
          for f in os.listdir(src):
              im=Image.open(os.path.join(src,f))
              img=im.convert('RGB')
              img.save(os.path.join(dest,test[i]+'.jpg'))
              image_data=load_image(os.path.join(dest,test[i]+'.jpg'))
              softmax_tensor=sess.graph.get_tensor_by_name(output_layer_name)
              predictions,=sess.run(softmax_tensor, {input_layer_name: image_data})

              # Sort to show labels in order of confidence             
              top_k = predictions.argsort()[-num_top_predictions:][::-1]
              for node_id in top_k:
                  predicted_label = labels[node_id]
                  score = predictions[node_id]
                  print(test[i]+',',predicted_label+","+str(score))
                  outfile.write(test[i]+','+predicted_label+","+str(score)+'\n')
              i+=1

In [None]:
src=os.path.join('test_img')
dest=os.path.join('test_img2')
labels='labels5.txt'
graph='model5.pb'
input_layer='DecodeJpeg/contents:0'
output_layer='final_result:0'
num_top_predictions=1
labels = load_labels(labels)
load_graph(graph)
run_graph(src,dest,labels,input_layer,output_layer,num_top_predictions)

#### Converts the "xxxxxxx" model into the mobile version of it 

In [None]:
!tflite_convert \
    --output_file=xxxxxxx.tflite \
    --graph_def_file=model5.pb \
    --input_arrays=Mul \
    --output_arrays=final_result

In [None]:
!lscp