##Archive

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting Keras-Preprocessing
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 KB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Keras-Preprocessing
Successfully installed Keras-Preprocessing-1.1.2


In [None]:
!pip install -U --no-cache-dir gdown --pre

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gdown
  Downloading gdown-4.6.4-py3-none-any.whl (14 kB)
Installing collected packages: gdown
  Attempting uninstall: gdown
    Found existing installation: gdown 4.4.0
    Uninstalling gdown-4.4.0:
      Successfully uninstalled gdown-4.4.0
Successfully installed gdown-4.6.4


In [None]:
!gdown 1zPH9Xna1RxwGVe4XVUyP6HfnMCaQv023

Downloading...
From: https://drive.google.com/uc?id=1zPH9Xna1RxwGVe4XVUyP6HfnMCaQv023
To: /content/model.h5
100% 43.9M/43.9M [00:00<00:00, 138MB/s]


In [None]:
!gdown 1QRxMhSC-1ooDc1xrZl8lE5gA8TBxDhem

Downloading...
From: https://drive.google.com/uc?id=1QRxMhSC-1ooDc1xrZl8lE5gA8TBxDhem
To: /content/model_gen.h5
  0% 0.00/950k [00:00<?, ?B/s]100% 950k/950k [00:00<00:00, 117MB/s]


In [None]:
!gdown 1AGiFSgNPfoCSquAYaV1kuCw6iu_TXS3v

Downloading...
From: https://drive.google.com/uc?id=1AGiFSgNPfoCSquAYaV1kuCw6iu_TXS3v
To: /content/tokenizer.pkl
  0% 0.00/352k [00:00<?, ?B/s]100% 352k/352k [00:00<00:00, 99.9MB/s]


In [None]:
from keras import preprocessing
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.utils import load_img, img_to_array, pad_sequences
from keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Flatten
from numpy import argmax

import os
import pickle
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [None]:
def extract_features(filename):
    model_x = VGG16(
        weights='imagenet', 
        include_top=False, 
        input_shape=(224, 224, 3))
    input_layer = Input(shape=(224, 224, 3))
    x = model_x(input_layer)
    x = Flatten()(x)
    x = Dense(4096, activation='relu')(x)
    model_x = Model(inputs=input_layer, outputs=x)

    image = load_img(filename, target_size=(224, 224))
    image = img_to_array(image)
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    image = preprocess_input(image)

    feature = model_x.predict(image, verbose=0)

    return feature

def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if word_index == integer:
            return word
    return None

def generate_desc(model, tokenizer, photo, max_length):
    in_text = 'startseq'
    for i in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence], maxlen=max_length)
        yhat = model.predict([photo,sequence], verbose=0)
        yhat = argmax(yhat)
        word = word_for_id(yhat, tokenizer)
        print(word)
        if word is None:
            break
        in_text += ' ' + word
        if word == 'endseq':
            break
    return in_text

In [None]:
print('=================================================')
print('============= Image Captioning Tool =============')
print('=================================================\n')

max_length = 34
image = '/content/20150319102252817.jpg' #input('Enter image path: ')

if os.path.isfile(image):

    # Extract features
    photo = extract_features(image)

    # Generate descriptions
    description = generate_desc(model, tokenizer, photo, max_length)
    description = description.replace("startseq", "")
    description = description.replace("endseq", "").strip()
    description = description[0].upper() + description[1:].strip() + '.'

    # Load the image
    img = mpimg.imread(image)
    print()

    # Plot the image
    plt.imshow(img)
    plt.show()

    print("\nCaption:", description)
else:
    print("Path does not exist!")

##Test 2

In [None]:
from keras import preprocessing
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.utils import load_img, img_to_array, pad_sequences
from keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Flatten
from numpy import argmax

import os
import pickle
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [None]:
!pip install Keras-Preprocessing

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting Keras-Preprocessing
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 KB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Keras-Preprocessing
Successfully installed Keras-Preprocessing-1.1.2


In [None]:
!gdown 1e_AlIo230Ja9Ogv6PDOY0053O7a8c_bf 

Downloading...
From: https://drive.google.com/uc?id=1e_AlIo230Ja9Ogv6PDOY0053O7a8c_bf
To: /content/model_9.h5
100% 66.4M/66.4M [00:01<00:00, 51.8MB/s]


In [None]:
model_source = "/content/model2.h5"
tokenizer_source = "/content/tokenizer.pkl"
model = load_model(model_source)

with open(tokenizer_source, 'rb') as filen:
     tokenizer = pickle.load(filen)

In [None]:
#this function maps an integer to a word
def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

#The function below generates a textual description given a trained model, 
#and a given prepared photo as input. It calls the function word_for_id() 
#in order to map an integer prediction back to a word.
def generate_desc(model, tokenizer, photo, max_length):
    #start tge generation process
    in_text = 'startseq'
    #iterating over the max_length since the maximum length of the description can be that only
    for i in range(max_length):
        #integer ncoding input sequence
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        #padding the input
        sequence = pad_sequences([sequence], maxlen=max_length)
        #predicting next word
        #the predict function will return probability
        prob = model.predict([photo,sequence], verbose=0)
        #converting the probability to integer
        prob = argmax(prob)
        #calling the word_for_id function in order to map integer to word
        word = word_for_id(prob, tokenizer)
        #breaking if word cannot be mapped
        if word is None:
            break
        #appending as input
        in_text += ' ' + word
        #break if end is predicted
        if word == 'endseq':
            break
    return in_text
    

In [None]:
def extract_features(filename):
    model_x = VGG16(
        weights='imagenet', 
        include_top=False, 
        input_shape=(224, 224, 3))
    input_layer = Input(shape=(224, 224, 3))
    x = model_x(input_layer)
    x = Flatten()(x)
    x = Dense(1000, activation='relu')(x)
    model_x = Model(inputs=input_layer, outputs=x)

    image = load_img(filename, target_size=(224, 224))
    image = img_to_array(image)
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    image = preprocess_input(image)

    feature = model_x.predict(image, verbose=0)

    return feature

    # model = VGG16()
    # model.layers.pop()
    # model = Model(inputs=model.inputs, outputs=model.layers[-1].output)
    # image = load_img(filename, target_size=(224, 224))
    # image = img_to_array(image)
    # image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    # image = preprocess_input(image)
    # feature = model.predict(image, verbose=0)
    # return feature

def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

def generate_desc(model, tokenizer, photo, max_length):
    in_text = 'startseq'
    for i in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence], maxlen=max_length)
        yhat = model.predict([photo,sequence], verbose=0)
        yhat = argmax(yhat)
        word = word_for_id(yhat, tokenizer)
        if word is None:
            break
        in_text += ' ' + word
        if word == 'endseq':
            break
    return in_text

with open(tokenizer_source, 'rb') as file:
     tokenizer = pickle.load(file)
max_length = 2114

In [None]:
path = '/content/1.jpeg'
photo = extract_features(path)

In [None]:
description = generate_desc(model, tokenizer, photo, max_length)
print(description)

startseq endseq
