In [None]:
### import kagglehub

# Download latest version
###path = kagglehub.dataset_download("adityajn105/flickr8k")

###print("Path to dataset files:", path)

In [None]:
%pip install tensorflow numpy pandas matplotlib nltk


# loading dataset

In [2]:
import pandas as pd

captions_path = 'captions.txt'
captions_data = pd.read_csv(captions_path, delimiter=',', header=None, names=['image', 'caption'])

print(captions_data.head())


                       image  \
0                      image   
1  1000268201_693b08cb0e.jpg   
2  1000268201_693b08cb0e.jpg   
3  1000268201_693b08cb0e.jpg   
4  1000268201_693b08cb0e.jpg   

                                             caption  
0                                            caption  
1  A child in a pink dress is climbing up a set o...  
2              A girl going into a wooden building .  
3   A little girl climbing into a wooden playhouse .  
4  A little girl climbing the stairs to her playh...  


# re-arrange the dataset since it could be multiple captions to one image

In [3]:
from collections import defaultdict

image_captions = defaultdict(list)
for i, row in captions_data.iterrows():
    image_id, caption = row['image'], row['caption']
    image_captions[image_id].append(caption)

# Example of image ID and corresponding captions
for key, value in list(image_captions.items())[:3]:
    print(f"Image ID: {key}, Captions: {value}")

Image ID: image, Captions: ['caption']
Image ID: 1000268201_693b08cb0e.jpg, Captions: ['A child in a pink dress is climbing up a set of stairs in an entry way .', 'A girl going into a wooden building .', 'A little girl climbing into a wooden playhouse .', 'A little girl climbing the stairs to her playhouse .', 'A little girl in a pink dress going into a wooden cabin .']
Image ID: 1001773457_577c3a7d70.jpg, Captions: ['A black dog and a spotted dog are fighting', 'A black dog and a tri-colored dog playing with each other on the road .', 'A black dog and a white dog with brown spots are staring at each other in the street .', 'Two dogs of different breeds looking at each other on the road .', 'Two dogs on pavement moving toward each other .']


# Split the captions into train and test

In [4]:
import random

# Get unique image IDs
unique_image_ids = list(image_captions.keys())

# Shuffle and split
random.seed(42)
random.shuffle(unique_image_ids)
split_index = int(len(unique_image_ids) * 0.8)

train_image_ids = unique_image_ids[:split_index]
test_image_ids = unique_image_ids[split_index:]

# Split captions dictionary
train_image_captions = {img_id: image_captions[img_id] for img_id in train_image_ids}
test_image_captions = {img_id: image_captions[img_id] for img_id in test_image_ids}

print(f"Training images: {len(train_image_captions)}, Testing images: {len(test_image_captions)}")


Training images: 6473, Testing images: 1619


In [5]:
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt

In [6]:
base_model = InceptionV3(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels.h5


# Extracting Features

In [9]:
import os
import numpy as np
import pandas as pd
images_directory = 'Images/'
from tqdm import tqdm
# Extract features for images
def extract_features(image_ids, directory):
    features = {}
    for img_name in tqdm(image_ids, desc="Extracting Features"):
        img_path = os.path.join(directory, img_name)
        try:
            img = load_img(img_path, target_size=(299, 299))
            img = img_to_array(img)
            img = np.expand_dims(img, axis=0)
            img = preprocess_input(img)

            feature = model.predict(img, verbose=0)
            features[img_name] = feature.flatten()
        except Exception as e:
            print(f"Error processing {img_name}: {e}")
    return features

# Extract features for train and test sets
train_image_features = extract_features(train_image_ids, images_directory)
test_image_features = extract_features(test_image_ids, images_directory)

Extracting Features:  44%|████▍     | 2871/6473 [04:18<04:45, 12.62it/s]

Error processing image: [Errno 2] No such file or directory: 'Images/image'


Extracting Features: 100%|██████████| 6473/6473 [09:59<00:00, 10.80it/s]
Extracting Features: 100%|██████████| 1619/1619 [02:54<00:00,  9.29it/s]
