<a href="https://colab.research.google.com/github/Rohan-14-1/RD_INFRO_TECHNOLOGY/blob/main/IP_Task_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow keras numpy pillow




In [None]:
import os
import zipfile
import requests
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout, add
import numpy as np
import string
from PIL import Image  # Fixes UnidentifiedImageError

# URLs for dataset
IMAGE_DATASET_URL = "https://github.com/jbrownlee/Datasets/releases/download/Flickr8k/Flickr8k_Dataset.zip"
CAPTION_DATASET_URL = "https://github.com/jbrownlee/Datasets/releases/download/Flickr8k/Flickr8k_text.zip"

# Define paths
BASE_DIR = "/content"
IMAGE_ZIP_PATH = os.path.join(BASE_DIR, "Flickr8k_Dataset.zip")
CAPTION_ZIP_PATH = os.path.join(BASE_DIR, "Flickr8k_text.zip")

# Function to download files
def download_file(url, save_path):
    if not os.path.exists(save_path):
        response = requests.get(url, stream=True)
        with open(save_path, "wb") as file:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    file.write(chunk)
        print(f"Downloaded: {save_path}")

# Download datasets if not already present
download_file(IMAGE_DATASET_URL, IMAGE_ZIP_PATH)
download_file(CAPTION_DATASET_URL, CAPTION_ZIP_PATH)

# Function to extract ZIP files
def extract_zip(zip_path, extract_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print(f"Extracted {zip_path} to {extract_path}")

# Extract datasets if not already extracted
extract_zip(IMAGE_ZIP_PATH, BASE_DIR)
extract_zip(CAPTION_ZIP_PATH, BASE_DIR)

# Locate the extracted image folder dynamically, ignoring __MACOSX
IMAGE_DIR = None
for root, dirs, files in os.walk(BASE_DIR):
    if "Flickr8k_Dataset" in dirs:
        IMAGE_DIR = os.path.join(root, "Flickr8k_Dataset")
        break
    elif any(fname.endswith('.jpg') for fname in files):  # If images are directly inside a folder
        IMAGE_DIR = root
        break

if IMAGE_DIR is None or not os.path.exists(IMAGE_DIR):
    raise FileNotFoundError("Flickr8k image dataset not found! Check extraction path.")

print(f"Found image dataset at: {IMAGE_DIR}")

# Locate the extracted caption file dynamically
CAPTION_FILE = None
for root, dirs, files in os.walk(BASE_DIR):
    if "Flickr8k.token.txt" in files:
        CAPTION_FILE = os.path.join(root, "Flickr8k.token.txt")
        break

if CAPTION_FILE is None:
    raise FileNotFoundError("Flickr8k.token.txt not found! Check extraction path.")

print(f"Found caption file at: {CAPTION_FILE}")

# Load and preprocess captions
def load_captions(caption_file):
    with open(caption_file, 'r') as file:
        captions = file.readlines()
    caption_dict = {}
    for line in captions:
        image_id, caption = line.strip().split('\t')
        image_id = image_id.split('#')[0]
        if image_id not in caption_dict:
            caption_dict[image_id] = []
        caption_dict[image_id].append("startseq " + caption.lower().translate(str.maketrans('', '', string.punctuation)) + " endseq")
    return caption_dict

captions = load_captions(CAPTION_FILE)

# Load pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)

# Extract features from images
def extract_features(image_folder):
    features = {}
    image_files = [img for img in os.listdir(image_folder) if img.endswith('.jpg')]

    if len(image_files) == 0:
        raise FileNotFoundError(f"No image files found in {image_folder}.")

    for img_name in image_files:
        img_path = os.path.join(image_folder, img_name)

        # Fix: Ensure the file is a valid image
        try:
            img = Image.open(img_path)  # Try to open the image
            img.verify()  # Verify if it's a valid image
            img = Image.open(img_path)  # Re-open image after verify
            img = img.resize((224, 224))  # Resize image
        except Exception as e:
            print(f"Skipping invalid image file: {img_path}, Error: {e}")
            continue  # Skip non-image files

        # Convert to numpy array
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)

        # Extract features
        feature = model.predict(img_array, verbose=0)
        features[img_name.split('.')[0]] = feature

    return features

features = extract_features(IMAGE_DIR)

# Tokenize captions
tokenizer = Tokenizer()
all_captions = [c for caption_list in captions.values() for c in caption_list]
tokenizer.fit_on_texts(all_captions)
vocab_size = len(tokenizer.word_index) + 1
max_length = max(len(c.split()) for c in all_captions)

# Prepare training data
def create_sequences(tokenizer, max_length, captions, features):
    X1, X2, y = [], [], []
    for img_id, caps in captions.items():
        if img_id not in features:
            continue
        for cap in caps:
            seq = tokenizer.texts_to_sequences([cap])[0]
            for i in range(1, len(seq)):
                in_seq, out_seq = seq[:i], seq[i]
                in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
                out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
                X1.append(features[img_id][0])
                X2.append(in_seq)
                y.append(out_seq)
    return np.array(X1), np.array(X2), np.array(y)

X1, X2, y = create_sequences(tokenizer, max_length, captions, features)

# Define the model
def define_model(vocab_size, max_length):
    inputs1 = Input(shape=(2048,))
    fe1 = Dropout(0.5)(inputs1)
    fe2 = Dense(256, activation='relu')(fe1)

    inputs2 = Input(shape=(max_length,))
    se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
    se2 = LSTM(256)(se1)

    decoder1 = add([fe2, se2])
    decoder2 = Dense(256, activation='relu')(decoder1)
    outputs = Dense(vocab_size, activation='softmax')(decoder2)

    model = tf.keras.models.Model(inputs=[inputs1, inputs2], outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    return model

model = define_model(vocab_size, max_length)
print(model.summary())


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Skipping invalid image file: /content/__MACOSX/Flicker8k_Dataset/._3178005751_fca19815ac.jpg, Error: cannot identify image file '/content/__MACOSX/Flicker8k_Dataset/._3178005751_fca19815ac.jpg'
Skipping invalid image file: /content/__MACOSX/Flicker8k_Dataset/._3563461991_de05537878.jpg, Error: cannot identify image file '/content/__MACOSX/Flicker8k_Dataset/._3563461991_de05537878.jpg'
Skipping invalid image file: /content/__MACOSX/Flicker8k_Dataset/._2756591658_3ca6db1595.jpg, Error: cannot identify image file '/content/__MACOSX/Flicker8k_Dataset/._2756591658_3ca6db1595.jpg'
Skipping invalid image file: /content/__MACOSX/Flicker8k_Dataset/._2752809449_632cd991b3.jpg, Error: cannot identify image file '/content/__MACOSX/Flicker8k_Dataset/._2752809449_632cd991b3.jpg'
Skipping invalid image file: /content/__MACOSX/Flicker8k_Dataset/._3029715635_43ab414dfb.jpg, Error: cannot identify image file '/content/__MACOSX/Flicker8k_Da

None
