In [None]:
!pip install pandas sentence-transformers scikit-learn

In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Load dev.tsv (tab-separated)
df = pd.read_csv("dev.enhi.df.short.tsv", sep="\t")

# Check if required columns exist
assert "original" in df.columns and "translation" in df.columns, "Missing required columns."

# Load the MiniLM model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Encode both columns
original_embeddings = model.encode(df['original'].tolist(), convert_to_numpy=True, batch_size=32, show_progress_bar=True)
translation_embeddings = model.encode(df['translation'].tolist(), convert_to_numpy=True, batch_size=32, show_progress_bar=True)

# Compute cosine similarity
similarities = cosine_similarity(original_embeddings, translation_embeddings)
predicted_scores = [similarities[i][i] for i in range(len(similarities))]  # Diagonal values

# Add predicted score column
df["all-MiniLM_predicted_score"] = predicted_scores

# Save to a new TSV
df.to_csv("all-MiniLM_qe_predicted.tsv", sep="\t", index=False)

print("Done! QE scores saved to 'all-MiniLM_qe_predicted.tsv'")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Done! QE scores saved to 'all-MiniLM_qe_predicted.tsv'


In [None]:
df.head()

Unnamed: 0,index,original,translation,scores,mean,z_scores,z_mean,all-MiniLM_predicted_score
0,0,In the flood-prone districts of the Netherland...,"नीदरलैंड के बाढ़ संभावित जिलों में, विशेष रूप ...","[90, 90, 79, 81]",85.0,"[0.10844457902530406, 0.17855383580414114, 0.6...",0.399822,0.099739
1,1,Group A Group B The top five run scorers (tota...,ग्रुप ए ग्रुप बी शीर्ष पांच रन स्कोरर (कुल रन)...,"[95, 95, 87, 89]",91.5,"[0.5387802100780963, 0.6151157815355373, 1.520...",1.001232,0.158545
2,2,"The final finished as a draw, with Essex winni...","मैच की पहली पारी में बढ़त हासिल करने के बाद, ए...","[95, 95, 70, 60]",80.0,"[0.5387802100780963, 0.6151157815355373, -0.22...",-0.085456,0.023662
3,3,These traits—establishment of a working method...,ये विशेषताएं-कला का अभिन्न अंग कार्य प्रणाली क...,"[90, 90, 78, 78]",84.0,"[0.10844457902530406, 0.17855383580414114, 0.5...",0.306865,0.058475
4,4,"Its two most important members, Britain and Fr...","इसके दो सबसे महत्वपूर्ण सदस्य, ब्रिटेन और फ्रा...","[90, 90, 88, 86]",88.5,"[0.10844457902530406, 0.17855383580414114, 1.6...",0.74285,-0.048797


In [None]:
import pandas as pd
from scipy.stats import spearmanr, pearsonr
from sklearn.metrics import mean_absolute_error

from sklearn.preprocessing import MinMaxScaler

# Scale the reference mean scores
scaler = MinMaxScaler()
df["mean_scaled"] = scaler.fit_transform(df["mean"].values.reshape(-1, 1))

# Evaluate using scaled scores
true_scores = df["mean_scaled"].astype(float)
predicted_scores = df["all-MiniLM_predicted_score"].astype(float)

# Recalculate metrics
spearman_corr, _ = spearmanr(true_scores, predicted_scores)
pearson_corr, _ = pearsonr(true_scores, predicted_scores)
mae = mean_absolute_error(true_scores, predicted_scores)

print(f" Spearman Correlation: {spearman_corr:.4f}")
print(f" Pearson Correlation:  {pearson_corr:.4f}")
print(f" Mean Absolute Error:  {mae:.4f}")



 Spearman Correlation: -0.0993
 Pearson Correlation:  -0.1216
 Mean Absolute Error:  0.5912


In [None]:
# google/flan-t5-large

In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from tqdm import tqdm

# Load FLAN-T5 Large
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Enable GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Load your dev.tsv file (with 'original' and 'translation' columns)
df = pd.read_csv("dev.enhi.df.short.tsv", sep="\t")

# Prepare prediction column
predicted_scores = []

# Iterate and predict
for _, row in tqdm(df.iterrows(), total=len(df)):
    src = row["original"]
    mt = row["translation"]

    prompt = f"""On a scale from 0 (very bad translation) to 100 (perfect translation), rate the following:
                  Source: {src}
                  Translation: {mt}
                  Answer only with a number."""
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device)

    outputs = model.generate(**inputs, max_new_tokens=10)
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract a numeric score from the output
    try:
        score = float(result.strip().split()[0])
    except:
        score = None  # Or assign a default

    predicted_scores.append(score)

# Add predictions to DataFrame
df["qe_with_flan_t5_large_predicted_score"] = predicted_scores

# Save output
df.to_csv("qe_with_flan_t5_large.tsv", sep="\t", index=False)

print(" Translation quality estimation complete and saved to qe_with_flan_t5_large.tsv")


100%|██████████| 1000/1000 [43:24<00:00,  2.60s/it]


 Translation quality estimation complete and saved to qe_with_flan_t5_large.tsv


In [None]:
df["qe_with_flan_t5_large_predicted_score"]

Unnamed: 0,qe_with_flan_t5_large_predicted_score
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0
...,...
995,0.0
996,0.0
997,0.0
998,0.0


In [None]:
from sklearn.metrics import mean_absolute_error
from scipy.stats import spearmanr, pearsonr

true = df["mean"].astype(float)
pred = df["qe_with_flan_t5_large_predicted_score"].astype(float)

print("Spearman:", spearmanr(true, pred).correlation)
print("Pearson:", pearsonr(true, pred)[0])
print("MAE:", mean_absolute_error(true, pred))


Spearman: -0.03753212703807498
Pearson: 0.015627607231023893
MAE: 92.60515
