<a href="https://colab.research.google.com/github/KirtiNayak11/NLP/blob/main/satellite_image_caption_generation_using_vgg16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

In [None]:
import os   # handling the files
import pickle # storing numpy features
import numpy as np
from tqdm.notebook import tqdm # how much data is process till now

from tensorflow.keras.applications.vgg16 import VGG16 , preprocess_input # extract features from image data.
from tensorflow.keras.preprocessing.image import load_img , img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.layers import Input , Dense , LSTM , Embedding , Dropout , add

In [None]:
model = VGG16()
model = Model(inputs = model.inputs,outputs = model.layers[-2].output)
print(model.summary())

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
[1m553467096/553467096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


None


In [None]:
BASE_DIR = "/kaggle/input/satellite-image-caption-generation"
WORKING_DIR = "/kaggle/working/"

In [None]:
def feature_mapping(dir):
    features = {}
    directory = os.path.join(BASE_DIR,dir)

    for img_name in tqdm(os.listdir(directory)):
        img_path = directory + '/' + img_name
        image = load_img(img_path,target_size=(224,224))
        image = np.expand_dims(image,axis=0)
        image = preprocess_input(image)
        feature = model.predict(image,verbose=0)
        image_id = img_name.split('.')[0]
        features[image_id] = feature
    return features

In [None]:
test_features = feature_mapping('test')

  0%|          | 0/1093 [00:00<?, ?it/s]

In [None]:
train_features = feature_mapping('train')
valid_features = feature_mapping('valid')

  0%|          | 0/8734 [00:00<?, ?it/s]

In [None]:
with open(os.path.join(WORKING_DIR, 'test_features.pkl'), 'wb') as f:
    pickle.dump(test_features, f)

In [None]:
with open(os.path.join(WORKING_DIR, 'valid_features.pkl'), 'rb') as f:
    valid_features = pickle.load(f)

In [None]:
# Save the features using pickle
with open(os.path.join(WORKING_DIR, 'train_features.pkl'), 'wb') as f:
    pickle.dump(train_features, f)

with open(os.path.join(WORKING_DIR, 'valid_features.pkl'), 'wb') as f:
    pickle.dump(valid_features, f)

In [None]:
with open(os.path.join(WORKING_DIR, 'train_features.pkl'), 'rb') as f:
    train_features = pickle.load(f)

with open(os.path.join(WORKING_DIR, 'valid_features.pkl'), 'rb') as f:
    valid_features = pickle.load(f)

In [None]:
train_df = pd.read_csv('/kaggle/input/satellite-image-caption-generation/train.csv')

In [None]:
valid_df = pd.read_csv('/kaggle/input/satellite-image-caption-generation/valid.csv')

In [None]:
test_df = pd.read_csv('/kaggle/input/satellite-image-caption-generation/test.csv')

In [None]:
train_df.head()

In [None]:
valid_df.head()

In [None]:
test_df.head()

In [None]:
import re

In [None]:
def preprocess_caption(caption):
    caption = str(caption).lower()
    caption = re.sub("'",'',caption)
    caption = re.sub(r'[^a-z\s]','',caption)
    caption = re.sub(r'\s+',' ',caption).strip()
    return caption


In [None]:
train_df['captions'] = train_df['captions'].apply(preprocess_caption)
valid_df['captions'] = valid_df['captions'].apply(preprocess_caption)
test_df['captions'] = test_df['captions'].apply(preprocess_caption)

In [None]:
train_df['captions'] = train_df['captions'].apply(lambda x:'<start>' + x + '<end>')
valid_df['captions'] = valid_df['captions'].apply(lambda x:'<start>' + x + '<end>')
test_df['captions'] = test_df['captions'].apply(lambda x:'<start>' + x + '<end>')

In [None]:
train_df.head()

In [None]:
test_df.head()

In [None]:
valid_df.head()

In [None]:
train_df['image_id'] = train_df['filepath'].apply(lambda x:os.path.basename(x).split(".")[0])

train_captions = train_df.groupby('image_id')['captions'].apply(list).to_dict()


In [None]:
valid_df['image_id'] = valid_df['filepath'].apply(lambda x:os.path.basename(x).split('.')[0])

valid_captions = valid_df.groupby('image_id')['captions'].apply(list).to_dict()

In [None]:
test_df['image_id'] = test_df['filepath'].apply(lambda x:os.path.basename(x).split('.')[0])

test_captions = test_df.groupby('image_id')['captions'].apply(list).to_dict()

In [None]:
train_all_captions = [caption for captions in train_captions.values() for caption in captions ]
valid_all_captions = [caption for captions in valid_captions.values() for caption in captions ]
test_all_captions = [caption for captions in test_captions.values() for caption in captions]

In [None]:
tokenizer = Tokenizer(filters='')
tokenizer.fit_on_texts(train_all_captions)
total_words = len(tokenizer.word_index)+1
max_length = max([len(tokenizer.texts_to_sequences([cap])[0]) for cap in train_all_captions])

In [None]:
def tokenize_caption(tokenizer,total_words,max_length,feature_map,all_captions,caption_map):
    x1_input,x2_input,y_output = [],[],[]
    for image_id,captions in caption_map.items():
        x1,x2,y = generate_sequence(feature_map,image_id,captions,tokenizer,total_words,max_length)
        x1_input.extend(x1)
        x2_input.extend(x2)
        y_output.extend(y)

    return np.array(x1_input),np.array(x2_input),np.array(y_output),total_words,tokenizer

In [None]:
def generate_sequence(features_map,image_id,captions,tokenizer,total_words,max_length):
    for caption in captions:
        seq = tokenizer.texts_to_sequences([caption])[0]

        for i in range(1,len(seq)):
            in_seq,out_seq = seq[:i],seq[i]
            in_seq = pad_sequences([in_seq],maxlen=max_length)[0]
            out_seq = to_categorical([out_seq],num_classes=total_words)[0]
            x1 = features_map[image_id]
            x2 = in_seq
            y  = out_seq

        return np.array(x1),np.array(x2),np.array(y)

In [None]:
train_x1,train_x2,train_y = tokenize_caption(train_features,train_all_captions,train_captions,tokenizer,total_words,max_length)
valid_x1,valid_x2,valid_y = tokenize_caption(valid_features,valid_all_captions,valid_captions,tokenizer,total_words,max_length)
test_x1,test_x2,test_y = tokenize_caption(test_features,test_all_captions,test_captions,tokenizer,total_words,max_length)

In [None]:
inputs1 = Input(shape=(4096,))
fe1 = Dropout(0.4)(inputs1)
fe2 = Dense(256,activation = 'relu')(fe1)

inputs2 = Input(shape=(max_length,))
se1 = Embedding(total_words,256,mask_zero = True)(inputs2)
se2 = Dropout(0.4)(se1)
se3 = LSTM(256)(se2)

decoder1 = add([fe2,se3])
decoder2 = Dense(256,activation ='relu')(decoder1)
outputs = Dense(train_vocab_size,activation = 'softmax')(decoder2)

model = Model(inputs=[inputs1,inputs2],outputs=outputs)
model.compile(loss='categorical_crossentropy',optimizer='adam')

plot_model(model,show_shapes=True)

In [None]:
model.fit([train_x1,train_x2],train_y,epochs=20,batch_size = 64,verbose = 1,validation_data=([valid_x1,valid_x2],valid_y))

In [None]:
def generate_caption(model,toknizer,image_features,max_length):

    in_text = '<start>'

    for _ in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence],maxlen=max_length,padding='post')
        y_pred = model.predict([image_features,sequence],verbose =0)
        next_word_index =np.argmax(y_pred)
        word = tokenizer.index_word.get(next_word_index,None)

        if word is None or word =='<end>' :
            break

        in_text += ' ' + word

    return in_text.replace('<start>','').replace('<end>','').strip()

In [None]:
import random

random_image_id = random.choice(list(test_features.keys()))
random_image_features = np.array(test_features[random_image_id]).reshape(1,4096)

caption = generate_caption(model,test_tokenizer,random_image_features,train_max_len)

import matplotlib.pyplot as plt
import cv2

image_path =
image = cv2.imread(image_path)
image = cv2.cvtcolor(image,cv2.COLOR_BGR2RGB)

plt.imshow(image)
plt.axis('off')
plt.title(caption)
plt.show()