# **Suryakanta Karan (M22AIE207) m22aie207@iitj.ac.in**


In [2]:
# Install Hugging Face's transformers library (this does not require authentication)
!pip install transformers
!pip install nltk

# Import necessary libraries
from transformers import pipeline
import numpy as np
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# Download NLTK data for BLEU
nltk.download('punkt')

# Step 1: Initialize public LLMs from Hugging Face without authentication
llms = {
    'm2m_100': pipeline("translation", model="facebook/m2m100_418M"),  # Meta's M2M-100 translation model
    'mbart-large': pipeline("translation", model="facebook/mbart-large-50-many-to-many-mmt"),  # mBART for multilingual translation
    't5-base': pipeline("translation", model="t5-base")  # T5 for multilingual tasks (supports translation)
}

# Step 2: Define the target languages for dubbing
languages = {
    'French': {'src': 'en', 'tgt': 'fr'},
    'Spanish': {'src': 'en', 'tgt': 'es'}
}

# Step 3: Provide the first 100 sentences from the script (can be loaded from a file or hardcoded here)
script = """Do you know 'Naatu'?

What is Naatu?

Like an aggressive bull jumping
in the dust of the fields

Like the lead dancer dancing
at a local goddesss festival

Like playing with the stick while wearing
wooden slippers

Like a g*ng of young boys assembling
under the shade of a banyan tree

Like eating a jowar roti with a chilli

Listen to my song.
Listen to my song

Listen to my song

Naatu Naatu Naatu

Crazy Naatu

Naatu Naatu Naatu

Wild Naatu

Naatu like a green chilli

Naatu like a sharp dagger

Ive had enough of this nonsense!

You two! Out!

No Jake. Ive had enough of your bullying.

Go away!

Go!

Like beating a drum which makes
your heart beat faster

Like the shrill voice of a bird
which can make your ears ring

Like singing a song which can
make your fingers snap in rhythm

Like the wild dance when there is a fast rhythm

Like dancing which makes your body sweat

Listen to my song

Listen to my song

Listen to my song

Naatu Naatu Naatu

Crazy Naatu

Naatu Naatu Naatu

Wild Naatu

Naatu like a green chilli

Naatu like a sharp dagger

This is disgusting! This is filthy!

Jenny!

Dance in such a way that the blood
in your body

Jumps violently which will make
the earth tremble

Naatu naatu naatu

The life force inside you should dance with joy

So jump until the dust rises into the air

Naatu Naatu Naatu

-Ram! Ram! Ram!
-No.

Come on, Akhtar.

Come on, Akhtar.

Yes!

Oh God!

Get down now.
You are so heavy.

How much further should I carry you?
-Don't say that brother! I beg you.

If I take another step my legs will fall off.
Please carry me a little longer.

Why did you dance so much
if you were in so much pain?

I don't know.

When you looked at me like that
I just felt like competing with you.

Are you hurt? Whats happened?
Is he alright?

Hes got a bad cramp.
Can you drop him at New Bazaar?

Of course.

Get in.

Would you like to come to my place
for a coffee?

Before I drop you home.

She is inviting you to her house
for a cup of coffee.

I would offer to drop you too,
but its only a two seater.

No problem, I am waiting for a friend.

Alright, bye.

The first time we met him,
he rested his hand on my shoulder.

I remember clearly.

I noticed paint in his nails.
He is definitely a painter.

Search that way, uncle.

Listen, did you see him?
-No.

Have you seen him anywhere?
-No.

Good morning, Thomas.
How are you today?

I'm very good. Thank you so much.
How about you?

I'm good, thank you.

Open the gates.

The second gate only opens
once the first is closed.

I dont understand the need for
such elaborate security measures.

Overwhelming, huh?

Uncle Scott is coming back
from London in two days.

he has been knighted by the King.

Aunt Cathy wants to throw a grand party
on his arrival.

Everything is checked, sir.

Have you checked every door?

Hundreds of people are working round
the clock to make sure everything is smooth-

-I don't want any hangups, officer.
-Yes, sir.

See these lights? They were commissioned-
-What the hell are you doing here?

Robert, what are you doing?
He is with me.

He is here by my invitation.
Let him go.

Servants cant use the main entrance, ma'am.

Ill show him the back entrance.

Robert! Robert!

He is not a servant.

He is my friend.
Let him go.

As you wish, ma'am.

That was appalling behavior.
I apologize.

I'm so sorry.

It's okay.

You're fine, right?

Okay

let's forget about him.

I was really looking forward to this.

I have so much that
I wanted to show you.
"""

# Ground truth translations (replace these with actual translations)
ground_truth_french = "Translation of the script into French."  # Replace with actual French translation
ground_truth_spanish = "Translation of the script into Spanish."  # Replace with actual Spanish translation

# Step 4: Functions to calculate BLEU and TER

# Function to calculate BLEU score using NLTK with smoothing
def calculate_bleu(reference, translation):
    reference_tokens = [reference.split()]
    translation_tokens = translation.split()

    # Apply SmoothingFunction to avoid BLEU score of 0 due to lack of higher-order n-grams
    smoothie = SmoothingFunction().method4

    return sentence_bleu(reference_tokens, translation_tokens, smoothing_function=smoothie)

# Function to calculate TER (Placeholder, replace this with a real TER calculation if necessary)
def calculate_ter(reference, translation):
    # TER metric calculation (for illustration purposes, returning a random value)
    return np.random.uniform(0.1, 1.0)  # Placeholder value

# Step 5: Translate the script to the target languages using the LLMs
translations = {}
for model_name, model in llms.items():
    translations[model_name] = {}
    for lang, lang_code in languages.items():
        # Here we specify the source and target languages explicitly for models like M2M-100
        result = model(script, src_lang=lang_code['src'], tgt_lang=lang_code['tgt'])

        # Ensure translation is extracted as a string (sometimes the output is a list)
        if isinstance(result, list):
            translation_text = result[0]['translation_text']  # Extract translation from the list
        else:
            translation_text = result['translation_text']

        translations[model_name][lang] = translation_text  # Store the translation text

# Step 6: Evaluate the translations with BLEU and TER
results = {}
for model_name, lang_translations in translations.items():
    results[model_name] = {}
    for lang, translation in lang_translations.items():
        # Select ground truth based on language
        if lang == 'French':
            reference = ground_truth_french
        elif lang == 'Spanish':
            reference = ground_truth_spanish

        # Calculate BLEU and TER scores
        bleu_score = calculate_bleu(reference, translation)
        ter_score = calculate_ter(reference, translation)

        # Store the results
        results[model_name][lang] = {'BLEU': bleu_score, 'TER': ter_score}

# Step 7: Function to calculate Mean Win Rate (MWR)
def calculate_mean_win_rate(results):
    win_counts = {model: 0 for model in results}
    total_comparisons = 0

    for lang in languages:
        for metric in ['BLEU', 'TER']:
            # Get scores for all models for the current language and metric
            scores = [(model_name, results[model_name][lang][metric]) for model_name in results]

            # Sort models by the current metric (BLEU: higher is better, TER: lower is better)
            if metric == 'BLEU':
                sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)
            else:  # TER
                sorted_scores = sorted(scores, key=lambda x: x[1])

            # Calculate win rates (how often each model performs best)
            for i, (model_name, _) in enumerate(sorted_scores):
                if i == 0:  # Best-performing model
                    win_counts[model_name] += 1
            total_comparisons += 1

    # Calculate MWR for each model
    mwr_scores = {model: win_counts[model] / total_comparisons for model in results}
    return mwr_scores

# Step 8: Calculate Mean Win Rate (MWR) for the models
mwr_scores = calculate_mean_win_rate(results)

# Step 9: Output rankings based on MWR
ranked_models = sorted(mwr_scores.items(), key=lambda x: x[1], reverse=True)
print("Model Rankings (by Mean Win Rate):")
for model, score in ranked_models:
    print(f"{model}: MWR = {score:.2f}")

# Step 10: Display Results for Each Model
print("\nDetailed Results:")
for model_name, lang_scores in results.items():
    print(f"\nResults for {model_name}:")
    for lang, metrics in lang_scores.items():
        print(f"  {lang}: BLEU = {metrics['BLEU']:.2f}, TER = {metrics['TER']:.2f}")




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Your input_length: 983 is bigger than 0.9 * max_length: 200. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
Your input_length: 983 is bigger than 0.9 * max_length: 200. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)
Your input_length: 940 is bigger than 0.9 * max_length: 200. You might consider increasing your max_length manually, e.g. translator('...',

Model Rankings (by Mean Win Rate):
mbart-large: MWR = 0.50
m2m_100: MWR = 0.25
t5-base: MWR = 0.25

Detailed Results:

Results for m2m_100:
  French: BLEU = 0.00, TER = 0.49
  Spanish: BLEU = 0.00, TER = 0.77

Results for mbart-large:
  French: BLEU = 0.00, TER = 0.41
  Spanish: BLEU = 0.01, TER = 0.54

Results for t5-base:
  French: BLEU = 0.00, TER = 0.55
  Spanish: BLEU = 0.00, TER = 0.47
