In [2]:
import os

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\Hp\\Desktop\\Image_Captioning_End_to_End_Deployment'

In [5]:
import tensorflow as tf

In [5]:
model = tf.keras.models.load_model("artifacts/training/best_model.keras")

In [6]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class EvaluationConfig:
    path_of_model: Path
    training_data: Path
    all_params: dict
    params_image_size: list
    params_batch_size: int

In [7]:
from src.Model.constants import *
from src.Model.utils.common import read_yaml, create_directories, save_json

In [12]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    
    def get_validation_config(self) -> EvaluationConfig:
        eval_config = EvaluationConfig(
            path_of_model="artifacts/training/best_model.keras",
            training_data="artifacts/data_ingestion/Chicken-fecal-images",
            all_params=self.params,
            params_image_size=self.params.IMAGE_SIZE,
            params_batch_size=self.params.BATCH_SIZE
        )
        return eval_config

In [9]:
from urllib.parse import urlparse

In [1]:
import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import time
from tqdm import tqdm
import pickle
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical, plot_model
import numpy as np
from nltk.translate.bleu_score import corpus_bleu

In [23]:
class Evaluation:
    def __init__(self, config: EvaluationConfig):
        self.config = config

    def idx_to_word(self, integer):
        for word, index in self.tokenizer.word_index.items():
            if index == integer:
                return word
        return None
    
    def predict_caption(self, image, tokenizer, max_length):
        in_text = 'startseq'
        for i in range(max_length):
            # encode input sequence
            sequence = tokenizer.texts_to_sequences([in_text])[0]
            # pad the sequence
            sequence = pad_sequences([sequence], max_length, padding='post')
            # predict next word
            yhat = self.model.predict([image, sequence], verbose=0)
            # get index with high probability
            yhat = np.argmax(yhat)
            # convert index to word
            word = self.idx_to_word(yhat)

            if word is None:
                break
            in_text += " " + word
            if word == 'endseq':
                break
        return in_text
    

    def cleaning(self, mapping):

        for key, captions in mapping.items():
            for i in range(len(captions)):
                caption = captions[i]
                caption = caption.lower()
                caption = caption.replace('[^A-Za-z]', '')
                caption = caption.replace('\s+', ' ')
                caption = 'startseq ' + " ".join([word for word in caption.split() if len(word)>1]) + ' endseq'
                captions[i] = caption

    
    def _valid_generator(self):
        with open(os.path.join("artifacts/data_ingestion/", 'features.pkl'), 'rb') as f:
            self.features = pickle.load(f)

        with open(os.path.join("artifacts/training/", 'tokenize.pkl'), 'rb') as f:
            self.tokenizer = pickle.load(f)


        actual, predicted = list(), list()

        with open(os.path.join("", 'captions.txt'), 'r') as f:
            next(f)
            captions_doc = f.read()

        
        self.mapping = {}

        for line in tqdm(captions_doc.split('\n')):
            tokens = line.split(',')
            if len(line) < 2:
                continue
            image_id, caption = tokens[0], tokens[1:]

            image_id = image_id.split('.')[0]

            caption = " ".join(caption)

            if image_id not in self.mapping:
                self.mapping[image_id] = []
            
            self.mapping[image_id].append(caption)
        
        self.cleaning(self.mapping)


        image_ids = list(self.mapping.keys())
        split = int(len(image_ids) * 0.90)
        test = image_ids[split:]


        for key in tqdm(test):
            captions = self.mapping[key]

            y_pred = self.predict_caption(self.features[key], self.tokenizer, 35)
            # Split into words
            actual_captions = [caption.split() for caption in captions]
            y_pred = y_pred.split()
            # Append to the lists
            actual.append(actual_captions)
            predicted.append(y_pred)

        self.bleu1 = corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0))
        self.blue2 = corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0))
        
    
    
    @staticmethod
    def load_model(path: Path) -> tf.keras.Model:
        return tf.keras.models.load_model(path)
    

    def evaluation(self):
        self.model = self.load_model(self.config.path_of_model)
        self._valid_generator()

    
    def save_score(self):
        scores = {"BLEU1": self.bleu1, "BLEU2": self.blue2}
        save_json(path=Path("scores.json"), data=scores)

In [24]:
try:
    config = ConfigurationManager()
    val_config = config.get_validation_config()
    evaluation = Evaluation(val_config)
    evaluation.evaluation()
    evaluation.save_score()

except Exception as e:
   raise e

[2025-06-08 12:18:38,105 - INFO - common - yaml file: config\config.yaml loaded successfully]
[2025-06-08 12:18:38,109 - INFO - common - yaml file: params.yaml loaded successfully]
[2025-06-08 12:18:38,110 - INFO - common - created directory at: artifacts]


100%|██████████| 40456/40456 [00:00<00:00, 902679.36it/s]
100%|██████████| 810/810 [11:36<00:00,  1.16it/s]


[2025-06-08 12:30:15,403 - INFO - common - json file saved at: scores.json]
