# TM Project

### Importing the data

In [1]:
import pandas as pd
import numpy as np

In [2]:
#importing the data

path_corpus = "/Users/franz/Desktop/TM Project/corpus/"

ru_en = pd.read_csv(path_corpus + "ru-en/scores.csv")
de_en = pd.read_csv(path_corpus + "de-en/scores.csv")
cs_en = pd.read_csv(path_corpus + "cs-en/scores.csv")
zh_en = pd.read_csv(path_corpus + "zh-en/scores.csv")
en_zh = pd.read_csv(path_corpus + "en-zh/scores.csv")
en_fi = pd.read_csv(path_corpus + "en-fi/scores.csv")

In [3]:
de_en.head()

Unnamed: 0,source,reference,translation,z-score,avg-score,annotators
0,"Ihr Zeitlupentempo maßen sie, als sie vor Spit...",Her timeless pace measures them when they equi...,Their slow speed was measured by researchers o...,-0.345024,76.0,1
1,"Er sagte, dass die Bereiche ruhige Treffpunkte...",He said the areas offer quiet meeting points b...,He said the spaces provided calm meeting point...,0.9038,97.5,2
2,Für die Geschäftsleute an der B 27 ist es nur ...,"For businessmen at the B 27, it's only a small...",This is only a small consolation for businesse...,0.700503,94.0,1
3,Diese Fähigkeit sei möglicherweise angeboren o...,This ability may be born or developed with gen...,"This ability may be innate, or may develop as ...",-1.256572,51.5,2
4,Weil sie Wassertemperaturen um die sechs Grad ...,Because they prefer water temperatures around ...,They generally only come to the surface in win...,0.293909,87.0,2


### Data exploration

In [4]:
descriptions = ["Russian into English", "German into English", "Czech into English", "Chinese into English", "English into Chinese", "English into Finish"]

In [5]:
rows = []
zscores = []
avgscores = []
annots = []

i = 0

for element in [ru_en, de_en, cs_en, zh_en, en_zh, en_fi]:
    rows.append(element.shape[0])
    zscores.append(np.round(element["z-score"].mean(),2))
    avgscores.append(np.round(element["avg-score"].mean(), 2))
    annots.append(np.round(element["annotators"].mean(),2))
    i += 1                   
    
exploration_df = pd.DataFrame([rows, zscores, avgscores, annots]).T.rename(columns={0:"rows", 1:"avg z-score", 2:"avg avg-score", 3:"avg annotators"})
exploration_df["description"] = descriptions
exploration_df = exploration_df.set_index("description")
exploration_df

Unnamed: 0_level_0,rows,avg z-score,avg avg-score,avg annotators
description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Russian into English,17980.0,0.01,74.5,1.3
German into English,21704.0,0.0,71.85,1.5
Czech into English,11585.0,-0.03,69.24,1.89
Chinese into English,26419.0,-0.05,66.06,1.42
English into Chinese,10221.0,-0.06,65.98,1.58
English into Finish,6748.0,-0.14,45.12,1.23


In [6]:
exploration_df.corr()

Unnamed: 0,rows,avg z-score,avg avg-score,avg annotators
rows,1.0,0.597505,0.579839,-0.105454
avg z-score,0.597505,1.0,0.975645,0.310459
avg avg-score,0.579839,0.975645,1.0,0.41711
avg annotators,-0.105454,0.310459,0.41711,1.0


As there are only 6 different types of translations, these correlations might be not very meaningful!

# Lexical metrics

## BLEU Score

The BLEU Score might require multiple reference sentences!

### 1st Try

Inspiration taken from 
* https://www.journaldev.com/46659/bleu-score-in-python

In [7]:
from nltk.translate.bleu_score import sentence_bleu

correlations = []

for element in [ru_en, de_en, cs_en, zh_en, en_zh, en_fi]:
    
    bleu_scores = []

    #calculating the bleu scores for the translations in comparison to their respective reference
    for i in range(2000): #element.shape[0]
        reference = [element.loc[i,"reference"].split()]
        translation = element.loc[i,"translation"].split()
        bleu_scores.append(sentence_bleu(reference, translation,weights=(0.25, 0.25, 0.25, 0.25)))

    #add the bleu scores to the dataframe
    development_df = element.iloc[:2000,:].copy() #element.shape[0]
    development_df["BLEU"] = bleu_scores
    correlations.append(development_df.corr().iloc[-1:,0].values[0])


print("\033[1mCorrelation between z-score and BLEU score\n")
i = 0
for element in correlations:
    print("\033[1m", descriptions[i] + ":",  "\033[0m", np.round(element,4))
    i += 1

The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


[1mCorrelation between z-score and BLEU score

[1m Russian into English: [0m 0.2016
[1m German into English: [0m 0.2337
[1m Czech into English: [0m 0.2638
[1m Chinese into English: [0m 0.2362
[1m English into Chinese: [0m nan
[1m English into Finish: [0m 0.2153


### 2nd Try

Inspiration taken from:
* https://stackoverflow.com/questions/62337356/bleu-error-n-gram-overlaps-of-lower-order

In [8]:
from nltk.translate.bleu_score import corpus_bleu

correlations = []

for element in [ru_en, de_en, cs_en, zh_en, en_zh, en_fi]:
    
    bleu_scores = []

    #calculating the bleu scores for the translations in comparison to their respective reference
    for i in range(2000): #element.shape[0]
        reference = [element.loc[i,"reference"].split()]
        translation = element.loc[i,"translation"].split()
        while len(reference) < len(translation):
            reference.append(" ")
        while len(reference) > len(translation):
            translation.append(" ")
        bleu_scores.append(corpus_bleu(reference, translation))

    #add the bleu scores to the dataframe
    development_df = element.iloc[:2000,:].copy() #element.shape[0]
    development_df["BLEU"] = bleu_scores
    correlations.append(development_df.corr().iloc[-1:,0].values[0])


print("\033[1mCorrelation between z-score and BLEU score\n")
i = 0
for element in correlations:
    print("\033[1m", descriptions[i] + ":",  "\033[0m", np.round(element,4))
    i += 1

[1mCorrelation between z-score and BLEU score

[1m Russian into English: [0m 0.0365
[1m German into English: [0m 0.0776
[1m Czech into English: [0m 0.0996
[1m Chinese into English: [0m 0.0445
[1m English into Chinese: [0m 0.3935
[1m English into Finish: [0m 0.122


## ROUGE Score

Inspiration taken from:
* https://pypi.org/project/rouge-score/

In [9]:
#!pip install --target=/Users/franz/opt/anaconda3/envs/Data_visualization/lib/python3.8/site-packages/ rouge_score

In [10]:
from rouge_score import rouge_scorer

correlations = []

for element in [ru_en, de_en, cs_en, zh_en, en_zh, en_fi]:
    
    precisions = []
    recalls = []
    fmeasures = []

    #calculating the bleu scores for the translations in comparison to their respective reference
    for i in range(6748): #element.shape[0]
        scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
        scores = scorer.score(element.loc[i,"reference"], element.loc[i,"translation"])
        precisions.append(scores["rouge1"].precision)
        recalls.append(scores["rouge1"].recall)
        fmeasures.append(scores["rouge1"].fmeasure)

    #add the bleu scores to the dataframe
    development_df = element.iloc[:6748,:].copy() #element.shape[0]
    development_df["ROUGE precision"] = precisions
    development_df["ROUGE recall"] = recalls
    development_df["ROUGE fmeasure"] = fmeasures
    correlations.append(development_df.corr().iloc[-1:,0].values[0])


print("\033[1mCorrelation between z-score and ROUGE measures\n")

pd.DataFrame(development_df.corr().iloc[0,3:]).T

[1mCorrelation between z-score and ROUGE measures



Unnamed: 0,ROUGE precision,ROUGE recall,ROUGE fmeasure
z-score,0.549302,0.51322,0.54454


In [12]:
from rouge_score import rouge_scorer

correlations = []

for element in [ru_en, de_en, cs_en, zh_en, en_zh, en_fi]:
    
    precisions = []
    recalls = []
    fmeasures = []

    #calculating the bleu scores for the translations in comparison to their respective reference
    for i in range(6748): #element.shape[0]
        scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
        scores = scorer.score(element.loc[i,"reference"], element.loc[i,"translation"])
        precisions.append(scores["rougeL"].precision)
        recalls.append(scores["rougeL"].recall)
        fmeasures.append(scores["rougeL"].fmeasure)

    #add the bleu scores to the dataframe
    development_df = element.iloc[:6748,:].copy() #element.shape[0]
    development_df["ROUGE precision"] = precisions
    development_df["ROUGE recall"] = recalls
    development_df["ROUGE fmeasure"] = fmeasures
    correlations.append(development_df.corr().iloc[-1:,0].values[0])


print("\033[1mCorrelation between z-score and ROUGE measures\n")

pd.DataFrame(development_df.corr().iloc[0,3:]).T

[1mCorrelation between z-score and ROUGE measures



Unnamed: 0,ROUGE precision,ROUGE recall,ROUGE fmeasure
z-score,0.540008,0.505371,0.535137
