In [2]:
!pip install nltk googletrans==4.0.0-rc1



In [15]:
import os
import json
from googletrans import Translator
from sentence_transformers import SentenceTransformer, util

In [4]:
input_file = './Dataset/flickr8k/captions.txt'
output_file = './Dataset/flickr8k/caption_fa.txt'

In [16]:
model = SentenceTransformer('distiluse-base-multilingual-cased-v1')

# Preparing mini-coco Dataset

In [None]:
def convert_json_to_text(json_file, output_file):
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)

    with open(output_file, 'w', encoding='utf-8') as f:
        for entry in data:
            
            image_name = f"{entry['image_id']}.jpg"
            caption = entry['caption']
            
            
            f.write(f"{image_name},{caption}\n")

In [None]:
def Preparing_minicoco():
    directory = "./Dataset/minicoco/images"  
    prefix = "COCO_train2014_"
    for filename in os.listdir(directory):
        if filename.startswith(prefix):
            
            new_name = filename[len(prefix):].lstrip("0")
            
            old_path = os.path.join(directory, filename)
            new_path = os.path.join(directory, new_name)
            
            os.rename(old_path, new_path)
            print(f"Renamed: {filename} -> {new_name}")
    convert_json_to_text('captions.json', 'output.txt')

In [None]:
Preparing_minicoco()

# Translate and Evaluate Captions  

In [13]:
def load_system_translations(output_file):
    system_translations = []
    with open(output_file, 'r', encoding='utf-8') as sys_file:
        for line in sys_file:
            line = line.strip()
            if not line:
                continue
            parts = line.split(',', 1)
            if len(parts) > 1:
                system_translations.append(parts[1])
            else:
                print(f"Skipping malformed line: {line}")
    return system_translations


In [18]:
def evaluate_distiluse(system_translations):
    scores = []
    for i, translation in enumerate(system_translations):
        print(f"Sentence processing{i+1}/{len(system_translations)}: {translation[:50]}...")
        reference = translation
        candidate = translation
        embeddings = model.encode([reference, candidate])
        similarity_score = util.pytorch_cos_sim(embeddings[0], embeddings[1]).item()
        scores.append(similarity_score)
        print(f"Score: {similarity_score:.4f}")
    return scores


In [None]:
def translate(input_file, output_file):
    translator = Translator()
    try:
        with open(output_file, 'r', encoding='utf-8') as outfile:
            i = len(outfile.readlines())
    except FileNotFoundError:
        i = 0
    start_line = 0
    with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'a', encoding='utf-8') as outfile:
        for line_num, line in enumerate(infile, 1):
            if line_num < start_line:
                continue

            filename, caption = line.strip().split(',', 1)
            translated_caption = translator.translate(caption, src='en', dest='fa').text
            outfile.write(f"{filename},{translated_caption}\n")
            print(f"Processed line: {line_num}, Current i: {i}")
            i += 1

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processed line: 11557, Current i: 7609
Processed line: 11558, Current i: 7610
Processed line: 11559, Current i: 7611
Processed line: 11560, Current i: 7612
Processed line: 11561, Current i: 7613
Processed line: 11562, Current i: 7614
Processed line: 11563, Current i: 7615
Processed line: 11564, Current i: 7616
Processed line: 11565, Current i: 7617
Processed line: 11566, Current i: 7618
Processed line: 11567, Current i: 7619
Processed line: 11568, Current i: 7620
Processed line: 11569, Current i: 7621
Processed line: 11570, Current i: 7622
Processed line: 11571, Current i: 7623
Processed line: 11572, Current i: 7624
Processed line: 11573, Current i: 7625
Processed line: 11574, Current i: 7626
Processed line: 11575, Current i: 7627
Processed line: 11576, Current i: 7628
Processed line: 11577, Current i: 7629
Processed line: 11578, Current i: 7630
Processed line: 11579, Current i: 7631
Processed line: 11580, Current i: 7632

In [7]:
def evaluate(output_file):
    system_translations = load_system_translations(output_file)
    similarity_scores = evaluate_distiluse(system_translations)
    for i, score in enumerate(similarity_scores):
        print(f"Sentence {i+1} Similarity Score: {score:.4f}")
        print("-" * 50)
    average_similarity = sum(similarity_scores) / len(similarity_scores) if similarity_scores else 0
    print(f"Average Similarity Score: {average_similarity:.4f}")

In [None]:
translate(input_file, output_file)

In [None]:
evaluate(output_file)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Score: 1.0000
Sentence processing1035/16639: دختر پیراهن صورتی با نوشتن سفید در چمن....
Score: 1.0000
Sentence processing1036/16639: مردی با دوچرخه سوار یک تپه صخره ای....
Score: 1.0000
Sentence processing1037/16639: یک مرد دوچرخه خاکی خود را در یک مسیر صخره ای سوار ...
Score: 1.0000
Sentence processing1038/16639: یک شخص دوچرخه سواری را به پایین تپه صخره ای سوار م...
Score: 1.0000
Sentence processing1039/16639: تصویر مبهم از دوچرخه سواری کوهستانی که دنباله صخره...
Score: 1.0000
Sentence processing1040/16639: دوچرخه سوار کوهستانی با آبی در یک مسیر صخره ای سوا...
Score: 1.0000
Sentence processing1041/16639: سه کودک در شن و ماسه در نزدیکی ساحل بازی می کنند....
Score: 1.0000
Sentence processing1042/16639: سه کودک در ساحل در ماسه بازی می کنند...
Score: 1.0000
Sentence processing1043/16639: سه دختر در ماسه بازی می کنند....
Score: 1.0000
Sentence processing1044/16639: "سه دختر بور کوچک ، دو به رنگ آبی ، یکی به رنگ صور...
Score: 

# Creating CSV File of Datsets

In [2]:
import csv

input_file = "./Dataset/flickr8k/caption_fa.txt"  
output_file = "Captions.csv"  

with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", newline="", encoding="utf-8") as outfile:
    csv_writer = csv.writer(outfile)
    csv_writer.writerow(["Filename", "Caption"])
    for line in infile:
        parts = line.strip().split(",", 1)  
        
        if len(parts) == 2:  
            csv_writer.writerow(parts)