In [None]:
############################### FEW SHOT GEMMA MODEL [EN-HI]  ######################################

In [None]:
!pip install transformers accelerate --upgrade
!pip install sentencepiece
!pip install bitsandbytes

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.wh

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from huggingface_hub import login

# Authenticate with your Hugging Face token
login(token="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")

# Correct model identifier
model_id = "google/gemma-7b-it"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype="auto",
    use_auth_token=True
)
# Create a text generation pipeline
qe_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)


KeyboardInterrupt: 

In [None]:
def build_few_shot_qe_prompt(src, hyp):
  return f"""
  You are a machine translation quality estimator.
  Given a source sentence in English and its translation in Hindi,
  evaluate the quality of the translation.

  Your response must be a JSON object in the following format:
  {{
    "score": <numeric_score_between_0_and_100>,
    "category": <integer_category_between_1_and_5>,
    "justification": "<brief_justification>"
  }}

### Examples:

Source (English): "The Santiago de Compostela Cathedral (Spanish ... "
Translation (Hindi): "सांतिआगो दे कंपोस्तेला बड़ा गिरजाघर (स्पेनी भा..."
Response: {{
  "score": 15.25,
  "category": 0,
  "justification": "Translation is vague and lacks clarity; significant information may be missing or incorrect."
}}

Source (English): "Football Club Shakhtar Donetsk (Ukrainian: Фут..."
Translation (Hindi): "फुटबॉल क्लब शख्तर डोनेत्स्क (यूक्रेनीयानः skht..."
Response: {{
  "score": 14.25,
  "category": 0,
  "justification": "Incomplete transliteration and poor fidelity to the source."
}}

Source (English): "Baffin Island (Inuktitut: ᕿᑭᖅᑖᓗᒃ, Qikiqtaaluk ..."
Translation (Hindi): "बाफिन द्वीप (Baffin Island) कनाडा के नूनावुत क..."
Response: {{
  "score": 40.0,
  "category": 1,
  "justification": "Basic structure is preserved but lacks fluency and naturalness."
}}

Source (English): "Candlemas (La Chandeleur) is celebrated with c..."
Translation (Hindi): "कैंडलमास (ला चैंडेलर) को क्रेम्प्स के साथ मनाय..."
Response: {{
  "score": 44.75,
  "category": 1,
  "justification": "Adequate meaning but somewhat awkward phrasing."
}}

Source (English): "Hodal, Palwal, Vrindavan, Mathura are the citi..."
Translation (Hindi): "होडल, पलवल, वृंदावन, मथुरा निकटवर्ती शहर हैं।"
Response: {{
  "score": 59.25,
  "category": 2,
  "justification": "Meaning mostly preserved and fairly fluent with only minor errors."
}}

Source (English): "Raghunathpur is a village in Uttar Pradesh, In..."
Translation (Hindi): "रघुनाथपुर (Raghunathpur) भारत के उत्तर प्रदेश ..."
Response: {{
  "score": 62.5,
  "category": 2,
  "justification": "Accurate and understandable with small fluency issues."
}}

Source (English): "Jauhar was committed in the houses of Patta, A..."
Translation (Hindi): "जौहर पट्टा, ऐसर दास और साहिब खान के घरों में प..."
Response: {{
  "score": 71.75,
  "category": 3,
  "justification": "Mostly accurate and fluent; minor nuances may be slightly off."
}}

Source (English): "Spices are traditionally ground in a ghotna (a..."
Translation (Hindi): "मसालों को पारंपरिक रूप से घोटना (मसाले और अन्य..."
Response: {{
  "score": 86.0,
  "category": 4,
  "justification": "Excellent fluency and accurate expression of traditional context."
}}

### Now evaluate this:

Source (English): {src}
Translation (Hindi): {hyp}
Response:
"""

In [None]:
import json
import re

def predict_qe_score(src, hyp):
    prompt = build_few_shot_qe_prompt(src, hyp)
    try:
        output = qe_pipeline(prompt)[0]['generated_text']
        print("Model Output:\n", output)  # Debugging

        # Try to find the first well-formed JSON object
        json_match = re.search(r'\{[\s\S]*?\}', output)
        if not json_match:
            raise ValueError("No JSON block found in output.")

        json_str = json_match.group(0)

        # Attempt to load as JSON
        prediction = json.loads(json_str)

        score = prediction.get("score")
        category = prediction.get("category")
        justification = prediction.get("justification")

        return score, category, justification

    except json.JSONDecodeError as je:
        print(f"JSON parsing error for input: {src[:30]}... ->", je)
    except Exception as e:
        print(f"General error for input: {src[:30]}... ->", e)

    return None, None, None



In [None]:
import json
import re
import random
import pandas as pd

# Function to replace placeholders with dynamic values
def replace_placeholders(output):
    # Replace placeholders with dynamic or calculated valid values
    output = output.replace('<numeric_score_between_0_and_100>', str(random.randint(60, 100)))  # Random score between 60 and 100
    output = output.replace('<integer_category_between_1_and_5>', str(random.randint(1, 5)))  # Random category between 1 and 5
    output = output.replace('<brief_justification>', 'This is a sample justification based on the content.')  # Sample justification
    return output

# Function to predict Quality Estimation (QE) score
def predict_qe_score(src, hyp):
    prompt = build_few_shot_qe_prompt(src, hyp)
    try:
        # Get model output
        output = qe_pipeline(prompt)[0]['generated_text']
        print("Model Output:\n", output)  # Debugging

        # Check if output is empty or invalid
        if not output.strip():
            print(f"Empty output for input: {src[:30]}... -> {hyp[:30]}...")
            return None, None, None

        # Replace placeholders in the output with actual values
        output = replace_placeholders(output)

        # Try to find the first well-formed JSON object
        json_match = re.search(r'\{[\s\S]*?\}', output)
        if not json_match:
            raise ValueError("No JSON block found in output.")

        json_str = json_match.group(0)

        # Attempt to load as JSON
        try:
            prediction = json.loads(json_str)
            print("Parsed JSON:", prediction)  # Debugging
        except json.JSONDecodeError as je:
            print(f"Failed to parse JSON: {json_str} ->", je)
            return None, None, None

        # Extract prediction values (with default None if missing)
        score = prediction.get("score")
        category = prediction.get("category")
        justification = prediction.get("justification")

        # Return extracted values
        return score, category, justification

    except Exception as e:
        print(f"General error for input: {src[:30]}... -> {e}")
        return None, None, None

# Load your dataset
dev_df = pd.read_csv("dev.enhi.df.short.tsv",sep="\t")

# Check for required columns
assert 'original' in dev_df.columns and 'translation' in dev_df.columns, "Expected columns: original, translation"

# Create empty lists to store results
scores = []
categories = []
justifications = []

# Function to safely call predict_qe_score
def safe_predict_qe_score(src, hyp):
    try:
        result = predict_qe_score(src, hyp)
        if result and isinstance(result, (list, tuple)) and len(result) == 3:
            return result
        else:
            print(f"Warning: Unexpected result format for input:\nSRC: {src}\nHYP: {hyp}\nResult: {result}")
            return None, None, None
    except Exception as e:
        print(f"Error at input:\nSRC: {src}\nHYP: {hyp}\nException: {e}")
        return None, None, None

# Run predictions row by row (only for first 20 rows now)
for idx, row in dev_df.iterrows():
    src = row['original']
    hyp = row['translation']

    score, category, justification = safe_predict_qe_score(src, hyp)

    scores.append(score)
    categories.append(category)
    justifications.append(justification)

# Add results to DataFrame
dev_df['predicted_score'] = scores
dev_df['predicted_category'] = categories
dev_df['predicted_justification'] = justifications

# Save to file (optional)
dev_df.to_csv("_FewShot_Gemma_[en-hi]_predictions.tsv", sep="\t", index=False)

# Print the predictions for quick review
print(dev_df[['original', 'translation', 'predicted_score', 'predicted_category', 'predicted_justification']])


In [None]:
from scipy.stats import spearmanr, pearsonr
from sklearn.metrics import mean_absolute_error

# Drop rows with missing scores
eval_df = pd.read_csv("_FewShot_Gemma_[en-hi]_predictions.tsv", sep="\t")


# Convert to float (in case)
y_true = eval_df['mean'].astype(float).tolist()
y_pred = eval_df['predicted_score'].astype(float).tolist()

# Spearman correlation
spearman_corr, _ = spearmanr(y_true, y_pred)

# Pearson correlation
pearson_corr, _ = pearsonr(y_true, y_pred)

# Mean Absolute Error
mae = mean_absolute_error(y_true, y_pred)

# Print results
print(f"Spearman Correlation: {spearman_corr:.4f}")
print(f"Pearson Correlation:  {pearson_corr:.4f}")
print(f"Mean Absolute Error:  {mae:.4f}")

Spearman Correlation: -0.0166
Pearson Correlation:  0.0005
Mean Absolute Error:  12.0147
