<a href="https://colab.research.google.com/github/ArthiyaD/TroubleShooters/blob/Development/ACG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gensim==3.8.3

In [None]:
import nltk
nltk.download('stopwords')
import tensorflow as tf
import pandas as pd
import numpy as np
import pickle as pkl
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from gensim.models import Word2Vec,KeyedVectors
from IPython.display import Image, display
from nltk.corpus import stopwords
from tensorflow.keras.preprocessing import image, sequence
from tensorflow.keras.applications.resnet50 import ResNet50

In [None]:
!gdown 1Y6v9wfUkibbYBTso534eK9uWJ3JzXKO9
!gdown 1Z4C6sVs5EhnwhW6I8m-DKhYJw3e7A6WX
!gdown 1Xv3CVAUuQPZbgQJqWJbuiPx14hcApcj5
!gdown 1YN4m-gece5itGTzIUdZC1t8pn1sWWZOc
!gdown 1jNFcFytMGUnG3oTgIYttilXD5B7ORjlD

In [None]:
word_vect = '/content/GoogleNews-vectors-negative300.bin.gz'
model_text = '/content/model_text_categorize.h5'
model_image = '/content/saved_model.hp5'

loaded_model = tf.keras.models.load_model(model_text)
saved_model = tf.keras.models.load_model(model_image)
model = KeyedVectors.load_word2vec_format(word_vect, 
                                          binary=True,
                                          limit=1000000)

In [None]:
resnet = ResNet50(include_top=False, weights='imagenet',
                  input_shape=(224,224,3), pooling='avg')

In [None]:
with open('/content/word_2_indices.pickle', 'rb') as handle:
     word_2_indices = pkl.load(handle)

with open('/content/indices_2_word.pickle', 'rb') as handle:
    indices_2_word = pkl.load(handle)

In [None]:
def preprocessing(img_path):
    im = image.load_img(img_path, target_size=(224,224,3))
    im = image.img_to_array(im)
    im = np.expand_dims(im, axis=0)
    return im

def get_encoding(model1, img):
    image = preprocessing(img)
    pred = model1.predict(image, verbose=0).reshape(2048)
    return pred

def predict_captions(image,model):
    start_word = ["<start>"]
    while True:
        par_caps = [word_2_indices[i] for i in start_word]
        par_caps = sequence.pad_sequences([par_caps], maxlen=40, 
                                          padding='post')
        preds = model.predict([np.array([image]), np.array(par_caps)], verbose=0)
        word_pred = indices_2_word[np.argmax(preds[0])]
        start_word.append(word_pred)
        
        if word_pred == "<end>" or len(start_word) >40:
            break
            
    return ' '.join(start_word[1:-1])

def word_vector(tokens,size,model):
  vec = np.zeros(size).reshape((1, size))
  count = 0
  for word in tokens:
    vec += model[word].reshape((1, size))
    count += 1.
  if count != 0:
      vec /= count
  return vec

def token_check(x,model):
  token_list=[]
  for i in x:
      if len(i) > 3 and i in model.vocab:
          token_list.append(i)
      else:
          continue
  return token_list

def clean_description_text(description):
  description = description.replace(r'\d+','')
  spec_chars = ["!",'"',"#","%","&","'","(",")",
                "*","+",",","-",".","/",":",";","<",
                "=",">","?","@","[","\\","]","^","_",
                "`","{","|","}","~","â€“"]
  for char in spec_chars:
      description = description.replace(char, ' ')

  word_list_t = description.lower().split() 
 
  filtered_words = [word for word in word_list_t 
                    if word not in stopwords.words('english')]
  text = ' '.join(filtered_words)
  return text

def FrameCapture(path):
    vidObj = cv2.VideoCapture(path)
    success, image = vidObj.read()
    cv2.imwrite("frame.jpg", image)


In [None]:
option = input("Enter Video or Image")

if option == "Video"
   # Input the video path
   img = input("Video Path: ")

  # Convert to frames
   FrameCapture(img)
   frame_path = '/content/frame.jpg'

elif option == "Image"
   # Input the image path
   frame_path = input("Image Path: ")


# Load the Image and Preprocess
test_img = get_encoding(resnet, frame_path)

# Predict the caption
caption = predict_captions(test_img,saved_model)

# Predict the image category
clean_description = clean_description_text(caption)
description_tokens = list(clean_description.split(" "))
description_tokens_filtered = token_check(description_tokens,model)

_arrays = np.zeros((1, 300))
_arrays[0,:] = word_vector(description_tokens_filtered,300,model)
vectorized_array = pd.DataFrame(_arrays)

pred = loaded_model.predict([vectorized_array.iloc[:,0:300]], verbose=0)
value = np.argmax(pred, axis=-1)
labels = ['adventure','art and music','food','history','manufacturing',
          'nature','science and technology','sports','travel']

# Print the result
img = mpimg.imread(frame_path)
imgplot = plt.imshow(img)
plt.show()

print("\nPredicted Caption : ", caption)
print("Predicted Label   : ", labels[value.item()]) 

In [None]:
captions = pd.read_csv('/content/ModelCaptions.csv')
is_category = captions['Category'].str.lower()==  labels[value.item()]
df = captions.loc[is_category]
    #df.columns = ('Category', 'Description')
    #df.head()
caption_list = df['Description']
result = 'Recommended Captions for the label:'
count = 1
for caption in caption_list:
        result = caption
        print (result)
        count = count+1
        if (count == 6):
            break 