# Predictions of Gloss-Text

In [1]:
import re

def extract_data(log_file_path):
    with open(log_file_path, 'r') as file:
        lines = file.readlines()

    references = []
    hypotheses = []

    # Process each line to extract only the part after ":\t"
    for i in range(len(lines) - 1):
        if 'Text Reference' in lines[i]:
            reference_part = lines[i].split(':\t', 1)[1].strip()
            hypothesis_part = lines[i + 1].split(':\t', 1)[1].strip()
            references.append(reference_part)
            hypotheses.append(hypothesis_part)

    return references, hypotheses

In [2]:
def clean_text(text):
    """ Removes all non-alphanumeric characters from text, except spaces. """
    return re.sub(r'[^a-zA-Z0-9\s]', '', text).strip()

In [3]:
def analyze_predictions(references, hypotheses):
    exact_correct_predictions = {}
    loose_correct_predictions = {}
    incorrect_predictions = {}
    sen_length = []

    for reference, hypothesis in zip(references, hypotheses):
        ref_words = reference.split()
        hyp_words = hypothesis.split()
        hyp_words_set = set(hyp_words)  # Using a set for fast membership checking
        correct = False

        for word in ref_words:
            if word in hyp_words:
                loose_correct_predictions[word] = loose_correct_predictions.get(word, 0) + 1
                correct = True
            if word not in hyp_words_set:
                incorrect_predictions[word] = incorrect_predictions.get(word, 0) + 1

        # Analyzing exact word positions
        min_length = min(len(ref_words), len(hyp_words))
        for i in range(min_length):
            if ref_words[i] == hyp_words[i]:
                exact_correct_predictions[ref_words[i]] = exact_correct_predictions.get(ref_words[i], 0) + 1
                correct = True
                
        if correct:
            temp = clean_text(reference).split()
            sen_length.append(len(temp))

    return exact_correct_predictions, loose_correct_predictions, incorrect_predictions, sen_length

## Combined Student

In [4]:
log_file_path = 'T=3, a=0.5/combined_kd_3,0.5.log'  
references, hypotheses = extract_data(log_file_path)
exact_correct_predictions, loose_correct_predictions, incorrect_predictions, length = analyze_predictions(references, hypotheses)

In [5]:
print(len(references), len(hypotheses))

230 230


In [6]:
references

['*** *** saturday',
 '*** *** yellow',
 '*** *** mother',
 '*** *** october',
 'the show is  amazing.',
 'person in a   wheelchair',
 '** *** february',
 '* good noon!',
 'i  am  fine',
 '** *** married',
 '* ** ** blind',
 '* ** ** slow',
 '* how are you',
 '* ** ** son',
 '* ** ** white',
 '**** daughter',
 '**** beer',
 '**** yesterday',
 '**** father',
 '**** monday',
 '*** *** green',
 '*** *** meat',
 '*** *** dark',
 '*** *** white',
 '*** *** two',
 '* ** brown',
 '* ** tomorrow',
 '* no sugar',
 'i saw a  ghost.',
 '* good morning',
 'my dog died.',
 'brown',
 'fish',
 'i like you very much.',
 'red',
 '*** *** tuesday',
 '*** *** juice',
 '*** *** september',
 '*** *** understand',
 '*** *** mother',
 '* ** april',
 '* ** mother',
 '* ** pink',
 'do not know',
 '* ** spaghetti',
 '* ** ten',
 'i hate you!',
 'how are you?',
 'happy new year!',
 'our team won!',
 '* ** ** ***** brown',
 '* ** ** ***** chicken',
 'i do not like you.',
 'i am   scared.',
 '* ** ** ***** violet'

In [7]:
hypotheses

['you are you',
 'you are you',
 'you are you',
 'you are you',
 '*** you  are welcome',
 '****** do not know',
 'do not know',
 'i am   heartbroken.',
 'do not know',
 'do not know',
 'i am 12 old.',
 'i am 12 old.',
 'i am  12  old.',
 'i am 12 old.',
 'i am 12 old.',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'you are welcome',
 'you are welcome',
 'you are welcome',
 'you are welcome',
 'you are welcome',
 'i am fine',
 'i am fine',
 'i am fine',
 'i *** am scared.',
 'i am   fine',
 '** *** fast',
 'fast',
 'fast',
 'i **** *** am   shocked!',
 'fast',
 'how are you?',
 'how are you?',
 'how are you?',
 'how are you?',
 'how are you?',
 'i am fine',
 'i am fine',
 'i am fine',
 'i  am  fine',
 'i am fine',
 'i am fine',
 'i am   worried.',
 'i   am  worried.',
 'i     am  worried.',
 'i   am   worried.',
 'i am 12 years old.',
 'i am 12 years old.',
 'i ** *** am   sorry.',
 '* good afternoon',
 'i am 12 years old.',
 '* good mo

In [8]:
print("Exact Correct Predictions:")
for word, count in exact_correct_predictions.items():
    print(f"{word}: {count}")

Exact Correct Predictions:
i: 21
am: 15
proud: 1
of: 1
you!: 1
fine: 1
no: 1
good: 1


In [9]:
print("Loose Correct Predictions (word found anywhere in the sentence):")
for word, count in loose_correct_predictions.items():
    print(f"{word}: {count}")

Loose Correct Predictions (word found anywhere in the sentence):
i: 21
am: 15
proud: 1
of: 1
you!: 1
fine: 1
no: 1
good: 1


In [10]:
print("\nIncorrect Predictions:")
for word, count in incorrect_predictions.items():
    print(f"{word}: {count}")


Incorrect Predictions:
***: 45
saturday: 2
yellow: 3
mother: 3
october: 6
the: 5
show: 1
is: 11
amazing.: 1
person: 1
in: 1
a: 3
wheelchair: 1
**: 92
february: 3
*: 92
good: 6
noon!: 3
i: 7
am: 6
fine: 1
married: 3
blind: 6
slow: 2
how: 3
are: 7
you: 12
son: 2
white: 3
****: 32
daughter: 1
beer: 3
yesterday: 3
father: 1
monday: 1
green: 2
meat: 1
dark: 3
two: 2
brown: 4
tomorrow: 2
no: 5
sugar: 5
saw: 2
ghost.: 2
morning: 2
my: 10
dog: 4
died.: 4
fish: 2
like: 5
very: 6
much.: 2
red: 1
tuesday: 2
juice: 2
september: 2
understand: 3
april: 2
pink: 2
do: 5
not: 12
know: 2
spaghetti: 1
ten: 2
hate: 1
you!: 1
you?: 2
happy: 5
new: 5
year!: 5
our: 2
team: 2
won!: 2
*****: 9
chicken: 2
you.: 2
scared.: 1
violet: 1
nervous.: 2
auntie: 2
six: 2
woman: 2
coffee: 3
grandfather: 1
july: 2
head: 6
painful.: 6
fine.: 2
rice: 2
hot: 2
march: 1
worried.: 3
see: 1
so: 1
tired.: 5
12: 1
years: 1
old.: 1
this: 1
hard.: 1
failed: 1
exam.: 1
nine: 2
sunday: 3
december: 2
alone.: 1
thank: 4
parents: 3
dis

In [11]:
print("Exact Correct Predictions / (Correct + Incorrect) for Each Word:")

for word in set(exact_correct_predictions.keys()):
    correct_count = exact_correct_predictions.get(word, 0)
    incorrect_count = incorrect_predictions.get(word, 0)
    
    print(f"{word}: {correct_count} / {correct_count + incorrect_count}")

Exact Correct Predictions / (Correct + Incorrect) for Each Word:
no: 1 / 6
good: 1 / 7
i: 21 / 28
fine: 1 / 2
of: 1 / 1
proud: 1 / 1
you!: 1 / 2
am: 15 / 21


### Shorter / Longer

In [12]:
from collections import defaultdict

def count_sentences_by_word_count(references):
    """Counts the number of sentences by their word count in the given list of texts."""
    word_count_dict = defaultdict(int)
    
    for text in references:
        cleaned_text = clean_text(text)
        sentences = cleaned_text.split('.')
        
        for sentence in sentences:
            words = sentence.split()
            if words:  # Only count non-empty sentences
                word_count_dict[len(words)] += 1
    
    return dict(word_count_dict)

In [13]:
result = count_sentences_by_word_count(references)
result

{1: 145, 4: 17, 2: 18, 3: 39, 5: 11}

In [14]:
length

[4, 5, 3, 5, 3, 4, 5, 5, 4, 3, 3, 4, 3, 3, 3, 3, 3, 2, 3, 3, 2, 4, 4]

In [15]:
def compare_lengths(lengths, word_count_dict):
    """Compares the provided lengths list with the word count dictionary."""
    length_counts = defaultdict(int)
    
    for length in lengths:
        length_counts[length] += 1
    
    for word_count in sorted(word_count_dict.keys()):
        num_sentences = word_count_dict[word_count]
        count_in_lengths = length_counts.get(word_count, 0)
        print(f'There are {count_in_lengths} out of {num_sentences} that are correct for {word_count}-worded sentences.')

In [16]:
compare_lengths(length, result)

There are 0 out of 145 that are correct for 1-worded sentences.
There are 2 out of 18 that are correct for 2-worded sentences.
There are 11 out of 39 that are correct for 3-worded sentences.
There are 6 out of 17 that are correct for 4-worded sentences.
There are 4 out of 11 that are correct for 5-worded sentences.


## FSL-NMS Student

In [17]:
log_file_path = 'T=3, a=0.5/2d_kd_3,0.5.log'  
references, hypotheses = extract_data(log_file_path)
exact_correct_predictions, loose_correct_predictions, incorrect_predictions, length = analyze_predictions(references, hypotheses)

In [18]:
print(len(references), len(hypotheses))

170 170


In [19]:
references

['i   am  sorry.',
 '* good morning!',
 'you are so slow!',
 'i am not tired.',
 'i am heartbroken.',
 'i like you very much.',
 'my dog died.',
 'i am scared.',
 'is it new year?',
 'i am worried.',
 'this is not  hard.',
 'my head is   painful.',
 'i am ** ***** sorry.',
 'i am   tired.',
 '* how old are   you?',
 'i am ** ***** scared.',
 '* good morning!',
 'i am fine.',
 'my dog died.',
 'you are disgusting!',
 'i ** like you.',
 'this is very hard.',
 'i am *** tired.',
 'i do not like you.',
 '* john likes mary.',
 'i saw a  ghost.',
 'i am shocked!',
 'you are disgusting!',
 'how old are you?',
 'i am worried.',
 'i am so tired.',
 'i am alone.',
 'i am ** ***** sorry.',
 'i am shocked!',
 '* the trip  is exciting.',
 'you are so slow!',
 'i ** ** hate  you!',
 'i like you very much.',
 'this is hard.',
 '* does john like  mary?',
 'i am sorry.',
 'how are you?',
 'you are slow.',
 'you are sick.',
 'my dog died.',
 '* ** good morning!',
 'i saw a  ghost.',
 '* thank you.',
 'i

In [20]:
hypotheses

['you are sick.',
 'i am   fine.',
 'you are so slow!',
 'i am *** fine.',
 'i am fine.',
 'i **** *** am   worried.',
 'i  am  worried.',
 'i am worried.',
 '** i  am  worried.',
 'i am worried.',
 '**** i  like you.',
 '** i    like you.',
 'i am 12 years old.',
 'i like you.',
 'i am  12  years old.',
 'i am 12 years old.',
 'i am   tired.',
 'i am fine.',
 'i  am  alone.',
 'i   am  tired.',
 'i am so   tired.',
 'i    am not  tired.',
 'i am not tired.',
 'i am not **** tired.',
 'i am   so    tired.',
 'i *** am tired.',
 'i am fine.',
 'i   am  heartbroken.',
 '*** i   am  fine.',
 'i am tired.',
 'i am ** heartbroken.',
 'i am shocked!',
 'i am 12 years old.',
 'i am shocked!',
 'i am  proud of you!',
 '*** i   am alone.',
 'i am 12 years old.',
 'i **** *** am   alone.',
 'i    am alone.',
 'i am   12   years old.',
 'i am tired.',
 'i   am  nervous.',
 'i   am  alone.',
 'i   am  nervous.',
 'i  am  tired.',
 'i am not  tired.',
 'i *** am fine.',
 'i am    nervous.',
 'i ***

In [21]:
print("Exact Correct Predictions:")
for word, count in exact_correct_predictions.items():
    print(f"{word}: {count}")

Exact Correct Predictions:
you: 1
are: 1
so: 1
slow!: 1
i: 76
am: 50
worried.: 1
fine.: 1
tired.: 1
not: 1
shocked!: 1
proud: 1
of: 1
you!: 1
12: 1
years: 1
old.: 1
nervous.: 1


In [22]:
print("Loose Correct Predictions (word found anywhere in the sentence):")
for word, count in loose_correct_predictions.items():
    print(f"{word}: {count}")

Loose Correct Predictions (word found anywhere in the sentence):
you: 1
are: 1
so: 1
slow!: 1
i: 76
am: 50
worried.: 1
fine.: 1
tired.: 1
not: 1
shocked!: 1
proud: 1
of: 1
you!: 1
12: 1
years: 1
old.: 1
nervous.: 1


In [23]:
print("\nIncorrect Predictions:")
for word, count in incorrect_predictions.items():
    print(f"{word}: {count}")


Incorrect Predictions:
i: 1
am: 2
sorry.: 7
*: 39
good: 9
morning!: 5
not: 13
tired.: 10
heartbroken.: 1
like: 12
you: 32
very: 9
much.: 3
my: 12
dog: 7
died.: 7
scared.: 5
is: 22
it: 2
new: 7
year?: 2
this: 10
hard.: 10
head: 5
painful.: 5
**: 46
*****: 19
how: 9
old: 7
are: 38
you?: 9
disgusting!: 9
you.: 12
***: 1
do: 3
john: 5
likes: 3
mary.: 3
saw: 5
a: 5
ghost.: 5
shocked!: 4
worried.: 3
so: 8
alone.: 3
the: 7
trip: 3
exciting.: 3
slow!: 4
hate: 8
you!: 11
does: 2
mary?: 2
slow.: 8
sick.: 3
thank: 5
failed: 2
exam.: 2
our: 1
team: 1
won!: 1
noon!: 4
happy: 5
year!: 5
fine.: 2
nervous.: 5
proud: 3
of: 3
sick!: 5
12: 2
years: 2
old.: 2
show: 2
amazing.: 2


In [24]:
print("Exact Correct Predictions / (Correct + Incorrect) for Each Word:")

for word in set(exact_correct_predictions.keys()):
    correct_count = exact_correct_predictions.get(word, 0)
    incorrect_count = incorrect_predictions.get(word, 0)
    
    print(f"{word}: {correct_count} / {correct_count + incorrect_count}")

Exact Correct Predictions / (Correct + Incorrect) for Each Word:
shocked!: 1 / 5
worried.: 1 / 4
12: 1 / 3
fine.: 1 / 3
tired.: 1 / 11
old.: 1 / 3
i: 76 / 77
so: 1 / 9
years: 1 / 3
nervous.: 1 / 6
of: 1 / 4
slow!: 1 / 5
proud: 1 / 4
you!: 1 / 12
am: 50 / 52
you: 1 / 33
are: 1 / 39
not: 1 / 14


### Shorter / Longer

In [25]:
from collections import defaultdict

def count_sentences_by_word_count(references):
    """Counts the number of sentences by their word count in the given list of texts."""
    word_count_dict = defaultdict(int)
    
    for text in references:
        cleaned_text = clean_text(text)
        sentences = cleaned_text.split('.')
        
        for sentence in sentences:
            words = sentence.split()
            if words:  # Only count non-empty sentences
                word_count_dict[len(words)] += 1
    
    return dict(word_count_dict)

In [26]:
result = count_sentences_by_word_count(references)
result

{3: 92, 2: 14, 4: 48, 5: 16}

In [27]:
length

[4,
 4,
 3,
 5,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 5,
 4,
 3,
 3,
 4,
 3,
 3,
 3,
 3,
 5,
 3,
 4,
 4,
 3,
 3,
 4,
 3,
 3,
 3,
 4,
 4,
 4,
 3,
 5,
 3,
 4,
 3,
 3,
 5,
 3,
 3,
 5,
 3,
 5,
 3,
 5,
 4,
 3,
 3,
 3,
 3,
 3,
 4,
 3,
 5,
 3,
 3,
 3,
 4,
 5,
 3,
 3,
 4,
 3,
 5,
 3,
 5,
 3,
 5,
 3,
 3,
 3,
 3,
 3,
 3]

In [28]:
def compare_lengths(lengths, word_count_dict):
    """Compares the provided lengths list with the word count dictionary."""
    length_counts = defaultdict(int)
    
    for length in lengths:
        length_counts[length] += 1
    
    for word_count in sorted(word_count_dict.keys()):
        num_sentences = word_count_dict[word_count]
        count_in_lengths = length_counts.get(word_count, 0)
        print(f'There are {count_in_lengths} out of {num_sentences} that are correct for {word_count}-worded sentences.')

In [29]:
compare_lengths(length, result)

There are 0 out of 14 that are correct for 2-worded sentences.
There are 49 out of 92 that are correct for 3-worded sentences.
There are 15 out of 48 that are correct for 4-worded sentences.
There are 13 out of 16 that are correct for 5-worded sentences.


## FSL-NMS Teacher

In [30]:
log_file_path = 'tfl_augmented/2d_unf_logs.log'  
references, hypotheses = extract_data(log_file_path)
exact_correct_predictions, loose_correct_predictions, incorrect_predictions, length = analyze_predictions(references, hypotheses)

In [31]:
print(len(references), len(hypotheses))

200 200


In [32]:
references

['i am *** sorry.',
 '* ** good morning!',
 'you are so slow!',
 'i am not tired.',
 'i am *** heartbroken.',
 'i like you very much.',
 'my dog died.',
 'i am scared.',
 'is it new year?',
 'i am worried.',
 'this is not  hard.',
 'my head is   painful.',
 'i am ** sorry.',
 'i am tired.',
 'how old are you?',
 'i am ** ***** scared.',
 '* good morning!',
 'i am fine.',
 'my dog died.',
 'you are disgusting!',
 'i ** like you.',
 'this is very hard.',
 'i am tired.',
 'i do not like you.',
 '* john likes mary.',
 'i saw a  ghost.',
 'i am shocked!',
 'you are disgusting!',
 '* how old   are you?',
 'i am worried.',
 'i am so tired.',
 'i am *** alone.',
 'i am ** sorry.',
 'i am shocked!',
 'the trip is  exciting.',
 'you are so slow!',
 'i hate you!',
 'i like you very much.',
 'this is hard.',
 'does john like mary?',
 'i am sorry.',
 '* how are you?',
 '* you are slow.',
 'you are sick.',
 'my dog died.',
 '* ** good morning!',
 'i saw a  ghost.',
 '* thank you.',
 'i failed the ex

In [33]:
hypotheses

['i am not tired.',
 'i am not  tired.',
 'you are so slow!',
 'i am not tired.',
 'i am not tired.',
 'i **** *** am   tired.',
 'i  am  worried.',
 'i am worried.',
 '** i  am  worried.',
 'i am tired.',
 '**** i  like you.',
 '** i    like you.',
 'i am so tired.',
 'i am worried.',
 'i   am  so  tired.',
 'i am 12 years old.',
 'i am   fine.',
 'i am fine.',
 'i  am  fine.',
 'i   am  fine.',
 'i am so   tired.',
 '**** i  am   shocked!',
 'i am alone.',
 'i ** *** am   alone.',
 'i am   so    tired.',
 'i *** am tired.',
 'i am fine.',
 'i   am  heartbroken.',
 'i am  proud of  you!',
 'i am fine.',
 'i am ** heartbroken.',
 'i am not tired.',
 'i am so tired.',
 'i am shocked!',
 'i   am   not tired.',
 '*** i   am alone.',
 'i am   alone.',
 'i **** *** am   alone.',
 'i    am alone.',
 '**** i    am   alone.',
 'i am fine.',
 'i saw a   ghost.',
 'i saw a   ghost.',
 'i   am  alone.',
 'i  am  fine.',
 'i am not  tired.',
 'i *** am fine.',
 'i am    heartbroken.',
 'i ****** a

In [34]:
print("Exact Correct Predictions:")
for word, count in exact_correct_predictions.items():
    print(f"{word}: {count}")

Exact Correct Predictions:
i: 87
am: 61
you: 1
are: 2
so: 2
slow!: 1
not: 3
tired.: 2
fine.: 2
shocked!: 1
my: 1
you!: 2
worried.: 1
nervous.: 1


In [35]:
print("Loose Correct Predictions (word found anywhere in the sentence):")
for word, count in loose_correct_predictions.items():
    print(f"{word}: {count}")

Loose Correct Predictions (word found anywhere in the sentence):
i: 87
am: 61
you: 1
are: 2
so: 2
slow!: 1
not: 3
tired.: 2
fine.: 2
shocked!: 1
my: 1
you!: 2
worried.: 1
nervous.: 1


In [36]:
print("\nIncorrect Predictions:")
for word, count in incorrect_predictions.items():
    print(f"{word}: {count}")


Incorrect Predictions:
***: 4
sorry.: 9
*: 37
**: 35
good: 10
morning!: 5
heartbroken.: 1
like: 14
you: 32
very: 9
much.: 3
my: 15
dog: 8
died.: 8
scared.: 7
is: 28
it: 3
new: 8
year?: 3
worried.: 5
this: 11
not: 16
hard.: 11
head: 8
painful.: 8
tired.: 12
how: 11
old: 9
are: 39
you?: 11
*****: 9
disgusting!: 9
you.: 13
do: 4
john: 8
likes: 5
mary.: 5
saw: 5
a: 5
ghost.: 5
shocked!: 6
so: 8
alone.: 5
the: 9
trip: 3
exciting.: 3
slow!: 4
hate: 8
you!: 10
does: 3
mary?: 3
slow.: 8
sick.: 3
thank: 5
failed: 3
exam.: 3
i: 5
am: 4
****: 3
our: 3
team: 3
won!: 3
noon!: 5
happy: 5
year!: 5
fine.: 1
proud: 4
of: 4
nervous.: 5
sick!: 5
12: 3
years: 3
old.: 3
show: 3
amazing.: 3


In [37]:
print("Exact Correct Predictions / (Correct + Incorrect) for Each Word:")

for word in set(exact_correct_predictions.keys()):
    correct_count = exact_correct_predictions.get(word, 0)
    incorrect_count = incorrect_predictions.get(word, 0)
    
    print(f"{word}: {correct_count} / {correct_count + incorrect_count}")

Exact Correct Predictions / (Correct + Incorrect) for Each Word:
my: 1 / 16
shocked!: 1 / 7
worried.: 1 / 6
tired.: 2 / 14
fine.: 2 / 3
i: 87 / 92
so: 2 / 10
nervous.: 1 / 6
slow!: 1 / 5
you!: 2 / 12
am: 61 / 65
you: 1 / 33
are: 2 / 41
not: 3 / 19


### Shorter / Longer

In [38]:
from collections import defaultdict

def count_sentences_by_word_count(references):
    """Counts the number of sentences by their word count in the given list of texts."""
    word_count_dict = defaultdict(int)
    
    for text in references:
        cleaned_text = clean_text(text)
        sentences = cleaned_text.split('.')
        
        for sentence in sentences:
            words = sentence.split()
            if words:  # Only count non-empty sentences
                word_count_dict[len(words)] += 1
    
    return dict(word_count_dict)

In [39]:
result = count_sentences_by_word_count(references)
result

{3: 107, 2: 15, 4: 60, 5: 18}

In [40]:
length

[3,
 4,
 4,
 3,
 5,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 5,
 4,
 3,
 3,
 4,
 3,
 3,
 3,
 3,
 5,
 3,
 4,
 4,
 3,
 3,
 4,
 3,
 3,
 3,
 4,
 3,
 4,
 3,
 5,
 3,
 4,
 3,
 3,
 5,
 3,
 3,
 5,
 3,
 5,
 3,
 5,
 4,
 3,
 3,
 3,
 3,
 3,
 4,
 3,
 5,
 3,
 3,
 3,
 4,
 5,
 3,
 3,
 4,
 3,
 5,
 3,
 5,
 3,
 5,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 3,
 3,
 3,
 3,
 4,
 3,
 4,
 3,
 5,
 3,
 4]

In [41]:
def compare_lengths(lengths, word_count_dict):
    """Compares the provided lengths list with the word count dictionary."""
    length_counts = defaultdict(int)
    
    for length in lengths:
        length_counts[length] += 1
    
    for word_count in sorted(word_count_dict.keys()):
        num_sentences = word_count_dict[word_count]
        count_in_lengths = length_counts.get(word_count, 0)
        print(f'There are {count_in_lengths} out of {num_sentences} that are correct for {word_count}-worded sentences.')

In [42]:
compare_lengths(length, result)

There are 0 out of 15 that are correct for 2-worded sentences.
There are 60 out of 107 that are correct for 3-worded sentences.
There are 18 out of 60 that are correct for 4-worded sentences.
There are 14 out of 18 that are correct for 5-worded sentences.


## Combined Teacher

In [43]:
log_file_path = 'tfl_augmented/combined_unf_logs.log'  
references, hypotheses = extract_data(log_file_path)
exact_correct_predictions, loose_correct_predictions, incorrect_predictions, length = analyze_predictions(references, hypotheses)

In [44]:
print(len(references), len(hypotheses))

60 60


In [45]:
references

['**** saturday',
 '**** yellow',
 '**** mother',
 '**** october',
 '* the show  is amazing.',
 'person in a  wheelchair',
 '* ** february',
 '* good noon!',
 'i am fine',
 '* ** married',
 '* ** ** ***** blind',
 '* ** ** ***** slow',
 '* ** how are   you',
 '**** son',
 '**** white',
 '**** daughter',
 '**** beer',
 '**** yesterday',
 '**** father',
 '**** monday',
 '**** green',
 '**** meat',
 '**** dark',
 '**** white',
 '**** two',
 '* ** brown',
 '* ** tomorrow',
 '* no sugar',
 'i saw a  ghost.',
 '* good morning',
 'my dog died.',
 'brown',
 'fish',
 'i like you very much.',
 'red',
 '**** tuesday',
 '**** juice',
 '**** september',
 '**** understand',
 '**** mother',
 '**** april',
 '**** mother',
 '**** pink',
 'do not  know',
 '**** spaghetti',
 '* ** ten',
 'i ** hate you!',
 'how are you?',
 '* happy new year!',
 'our team won!',
 '**** brown',
 '**** chicken',
 'i do not like you.',
 'i am   scared.',
 '**** violet',
 'i am   nervous.',
 '**** auntie',
 '**** six',
 'i li

In [46]:
hypotheses

['good afternoon',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'i am  proud of you!',
 '****** i  am heartbroken.',
 'i am heartbroken.',
 'i am   heartbroken.',
 'i am heartbroken.',
 'i am heartbroken.',
 'i am 12 years old.',
 'i am 12 years old.',
 'i am 12  years old.',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'i am fine',
 'i am fine',
 'i am fine',
 'i *** am scared.',
 'i am   fine',
 'i  am  fine',
 'fast',
 'fast',
 'i **** *** am   shocked!',
 'fast',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'good afternoon',
 'good evening',
 'good evening',
 'good evening',
 '** good evening',
 'good evening',
 'i am fine',
 'i am so   tired.',
 'i   am  worried.',
 'i am    so  tired.',
 'i   am   worried.',
 'good afternoon',
 'good afternoon',
 'i ** **

In [47]:
print("Exact Correct Predictions:")
for word, count in exact_correct_predictions.items():
    print(f"{word}: {count}")

Exact Correct Predictions:
i: 5
am: 1


In [48]:
print("Loose Correct Predictions (word found anywhere in the sentence):")
for word, count in loose_correct_predictions.items():
    print(f"{word}: {count}")

Loose Correct Predictions (word found anywhere in the sentence):
i: 5
am: 1


In [49]:
print("\nIncorrect Predictions:")
for word, count in incorrect_predictions.items():
    print(f"{word}: {count}")


Incorrect Predictions:
****: 31
saturday: 1
yellow: 1
mother: 3
october: 1
*: 13
the: 1
show: 1
is: 1
amazing.: 1
person: 1
in: 1
a: 2
wheelchair: 1
**: 11
february: 1
good: 2
noon!: 1
fine: 1
married: 1
*****: 2
blind: 1
slow: 1
how: 2
are: 2
you: 3
son: 1
white: 2
daughter: 1
beer: 1
yesterday: 1
father: 1
monday: 1
green: 1
meat: 1
dark: 1
two: 1
brown: 3
tomorrow: 1
no: 1
sugar: 1
saw: 1
ghost.: 1
morning: 1
my: 1
dog: 1
died.: 1
fish: 1
like: 3
very: 2
much.: 2
red: 1
tuesday: 1
juice: 1
september: 1
understand: 1
april: 1
pink: 1
do: 2
not: 2
know: 1
spaghetti: 1
ten: 1
hate: 1
you!: 1
you?: 1
happy: 1
new: 1
year!: 1
our: 1
team: 1
won!: 1
chicken: 1
you.: 1
i: 3
am: 2
scared.: 1
violet: 1
nervous.: 1
auntie: 1
six: 1
woman: 1


In [50]:
print("Exact Correct Predictions / (Correct + Incorrect) for Each Word:")

for word in set(exact_correct_predictions.keys()):
    correct_count = exact_correct_predictions.get(word, 0)
    incorrect_count = incorrect_predictions.get(word, 0)
    
    print(f"{word}: {correct_count} / {correct_count + incorrect_count}")

Exact Correct Predictions / (Correct + Incorrect) for Each Word:
i: 5 / 8
am: 1 / 3


### Shorter / Longer

In [51]:
from collections import defaultdict

def count_sentences_by_word_count(references):
    """Counts the number of sentences by their word count in the given list of texts."""
    word_count_dict = defaultdict(int)
    
    for text in references:
        cleaned_text = clean_text(text)
        sentences = cleaned_text.split('.')
        
        for sentence in sentences:
            words = sentence.split()
            if words:  # Only count non-empty sentences
                word_count_dict[len(words)] += 1
    
    return dict(word_count_dict)

In [52]:
result = count_sentences_by_word_count(references)
result

{1: 41, 4: 3, 2: 3, 3: 10, 5: 3}

In [53]:
length

[3, 4, 5, 3, 5]

In [54]:
def compare_lengths(lengths, word_count_dict):
    """Compares the provided lengths list with the word count dictionary."""
    length_counts = defaultdict(int)
    
    for length in lengths:
        length_counts[length] += 1
    
    for word_count in sorted(word_count_dict.keys()):
        num_sentences = word_count_dict[word_count]
        count_in_lengths = length_counts.get(word_count, 0)
        print(f'There are {count_in_lengths} out of {num_sentences} that are correct for {word_count}-worded sentences.')

In [55]:
compare_lengths(length, result)

There are 0 out of 41 that are correct for 1-worded sentences.
There are 0 out of 3 that are correct for 2-worded sentences.
There are 2 out of 10 that are correct for 3-worded sentences.
There are 1 out of 3 that are correct for 4-worded sentences.
There are 2 out of 3 that are correct for 5-worded sentences.


# Predictions per N-gram

In [56]:
import re
from typing import List, Tuple
import json

# Function to extract data from the log file
def extract_data(log_file_path: str) -> Tuple[List[str], List[str]]:
    with open(log_file_path, 'r') as file:
        lines = file.readlines()
    
    references = []
    hypotheses = []
    
    for i in range(len(lines) - 1):
        if 'Text Reference' in lines[i]:
            reference_part = lines[i].split(':\t', 1)[1].strip()
            hypothesis_part = lines[i + 1].split(':\t', 1)[1].strip()
            references.append(reference_part)
            hypotheses.append(hypothesis_part)
    
    return references, hypotheses

# Function to clean text by removing non-alphanumeric characters except spaces
def clean_text(text: str) -> str:
    return re.sub(r'[^a-zA-Z0-9\s]', '', text).strip()

In [57]:
from collections import Counter

# Function to generate n-grams from a given text
def generate_ngrams(text: str, n: int) -> List[str]:
    words = text.split()
    ngrams = [' '.join(words[i:i+n]) for i in range(len(words)-n+1)]
    return ngrams

# Function to analyze and count correct predictions per n-gram
def analyze_predictions(references: List[str], hypotheses: List[str], n: int) -> Counter:
    correct_ngrams = Counter()
    total_ngrams = Counter()
    
    for reference, hypothesis in zip(references, hypotheses):
        ref_ngrams = generate_ngrams(reference, n)
        hyp_ngrams = generate_ngrams(hypothesis, n)
        
        total_ngrams.update(ref_ngrams)
        
        for ngram in hyp_ngrams:
            if ngram in ref_ngrams:
                correct_ngrams[ngram] += 1
    
    return correct_ngrams, total_ngrams

## Combined Student

In [58]:
import pandas as pd

# Path to the log file (update with the actual path if necessary)
log_file_path = 'T=3, a=0.5/combined_kd_3,0.5.log'  # Update with the correct path

# Extract and clean data
references, hypotheses = extract_data(log_file_path)
references = [clean_text(ref) for ref in references]
hypotheses = [clean_text(hyp) for hyp in hypotheses]

# Loop through n-grams from 1 to 5 and analyze predictions
results = {}
for n in range(1, 6):
    correct_ngrams, total_ngrams = analyze_predictions(references, hypotheses, n)
    df_correct_ngrams = pd.DataFrame.from_dict(correct_ngrams, orient='index', columns=['Count'])
    results[f'{n}-gram'] = df_correct_ngrams

# Display the results for each n-gram
for n in range(1, 6):
    print(f"Correct Predictions Per {n}-gram:")
    print(results[f'{n}-gram'])
    print("\n")

Correct Predictions Per 1-gram:
       Count
i         21
am        15
proud      1
of         1
you        3
fine       1
no         1
good       1


Correct Predictions Per 2-gram:
          Count
i am         15
am proud      1
proud of      1
of you        1
am fine       1


Correct Predictions Per 3-gram:
              Count
i am proud        1
am proud of       1
proud of you      1
i am fine         1


Correct Predictions Per 4-gram:
                 Count
i am proud of        1
am proud of you      1


Correct Predictions Per 5-gram:
                   Count
i am proud of you      1




## FSL-NMS Student

In [59]:
# Path to the log file (update with the actual path if necessary)
log_file_path = 'T=3, a=0.5/2d_kd_3,0.5.log'  # Update with the correct path

# Extract and clean data
references, hypotheses = extract_data(log_file_path)
references = [clean_text(ref) for ref in references]
hypotheses = [clean_text(hyp) for hyp in hypotheses]

# Loop through n-grams from 1 to 5 and analyze predictions
results = {}
for n in range(1, 6):
    correct_ngrams, total_ngrams = analyze_predictions(references, hypotheses, n)
    df_correct_ngrams = pd.DataFrame.from_dict(correct_ngrams, orient='index', columns=['Count'])
    results[f'{n}-gram'] = df_correct_ngrams

# Display the results for each n-gram
for n in range(1, 6):
    print(f"Correct Predictions Per {n}-gram:")
    print(results[f'{n}-gram'])
    print("\n")

Correct Predictions Per 1-gram:
         Count
you          7
are          1
so           1
slow         1
i           76
am          50
worried      1
old          3
fine         1
tired        1
not          1
shocked      1
proud        1
of           1
12           1
years        1
nervous      1


Correct Predictions Per 2-gram:
            Count
you are         1
are so          1
so slow         1
i am           50
am worried      1
am fine         1
am shocked      1
am proud        1
proud of        1
of you          1
am 12           1
12 years        1
years old       1
am nervous      1


Correct Predictions Per 3-gram:
              Count
you are so        1
are so slow       1
i am worried      1
i am fine         1
i am shocked      1
i am proud        1
am proud of       1
proud of you      1
i am 12           1
am 12 years       1
12 years old      1
i am nervous      1


Correct Predictions Per 4-gram:
                 Count
you are so slow      1
i am proud of       

## Combined Teacher

In [60]:
# Path to the log file (update with the actual path if necessary)
log_file_path = 'tfl_augmented/combined_unf_logs.log' 

# Extract and clean data
references, hypotheses = extract_data(log_file_path)
references = [clean_text(ref) for ref in references]
hypotheses = [clean_text(hyp) for hyp in hypotheses]

# Loop through n-grams from 1 to 5 and analyze predictions
results = {}
for n in range(1, 6):
    correct_ngrams, total_ngrams = analyze_predictions(references, hypotheses, n)
    df_correct_ngrams = pd.DataFrame.from_dict(correct_ngrams, orient='index', columns=['Count'])
    results[f'{n}-gram'] = df_correct_ngrams

# Display the results for each n-gram
for n in range(1, 6):
    print(f"Correct Predictions Per {n}-gram:")
    print(results[f'{n}-gram'])
    print("\n")

Correct Predictions Per 1-gram:
    Count
i       5
am      1


Correct Predictions Per 2-gram:
      Count
i am      1


Correct Predictions Per 3-gram:
Empty DataFrame
Columns: [Count]
Index: []


Correct Predictions Per 4-gram:
Empty DataFrame
Columns: [Count]
Index: []


Correct Predictions Per 5-gram:
Empty DataFrame
Columns: [Count]
Index: []




## FSL-NMS Teacher

In [61]:
# Path to the log file (update with the actual path if necessary)
log_file_path = 'tfl_augmented/2d_unf_logs.log' 

# Extract and clean data
references, hypotheses = extract_data(log_file_path)
references = [clean_text(ref) for ref in references]
hypotheses = [clean_text(hyp) for hyp in hypotheses]

# Loop through n-grams from 1 to 5 and analyze predictions
results = {}
for n in range(1, 6):
    correct_ngrams, total_ngrams = analyze_predictions(references, hypotheses, n)
    df_correct_ngrams = pd.DataFrame.from_dict(correct_ngrams, orient='index', columns=['Count'])
    results[f'{n}-gram'] = df_correct_ngrams

# Display the results for each n-gram
for n in range(1, 6):
    print(f"Correct Predictions Per {n}-gram:")
    print(results[f'{n}-gram'])
    print("\n")

Correct Predictions Per 1-gram:
         Count
i           87
am          61
you          9
are          2
so           2
slow         1
not          3
tired        2
fine         2
shocked      1
my           1
worried      1
nervous      1


Correct Predictions Per 2-gram:
            Count
i am           61
you are         1
are so          1
so slow         1
am not          1
not tired       1
am fine         2
am shocked      1
am worried      1
am nervous      1


Correct Predictions Per 3-gram:
              Count
you are so        1
are so slow       1
i am not          1
am not tired      1
i am fine         2
i am shocked      1
i am worried      1
i am nervous      1


Correct Predictions Per 4-gram:
                 Count
you are so slow      1
i am not tired       1


Correct Predictions Per 5-gram:
Empty DataFrame
Columns: [Count]
Index: []


