In [1]:
import os
import re
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
import pandas as pd
import ast

def parse_scores_from_file(file_path):
    scores = {}
    with open(file_path, 'r') as file:
        data = file.read()

        # Corpus BLEU scores
        bleu_scores = re.findall(r'corpus BLEU-(\d) score: (\d+\.\d+)', data)
        for n, score in bleu_scores:
            scores[f'BLEU-{n}'] = float(score)

        # ROUGE scores
        rouge_scores_match = re.search(r'ROUGE score: ({.+})', data)
        if rouge_scores_match:
            rouge_scores_str = rouge_scores_match.group(1)
            rouge_scores = ast.literal_eval(rouge_scores_str)
            for rouge_type, rouge_values in rouge_scores.items():
                for metric, value in rouge_values.items():
                    scores[f'{rouge_type}-{metric}'] = value

    return scores

In [5]:
path = r"C:\Users\gxb18167\OneDrive - University of Strathclyde\Desktop\PhD\2nd year\Results\EEG-To-Text\best\Contextual"
dir_list = os.listdir(path)

In [6]:
#reading in files and assigning categories
list_of_dfs = []
for file in dir_list:
    if "25" in file:
        augmentation_size = "25"
    elif "100" in file:
        augmentation_size = "100"
    elif "50" in file:
        augmentation_size = "50"
    elif "75" in file:
        augmentation_size = "75"
    elif "40" in file:
        augmentation_size = "40"
    elif "60" in file:
        augmentation_size = "60"
    elif "55" in file:
        augmentation_size = "55"

    if "WGAN_v1_Text" in file:
        model = "WGAN_v1_Text"
    elif "DCGAN_v1_Text" in file:
        model = "DCGAN_v1_Text"
    elif "WGAN_v2_Text" in file:
        model = "WGAN_v2_Text"
    elif "DCGAN_v2_Text" in file:
        model = "DCGAN_v2_Text"

    #augmentation type
    if "random" in file:
        augmentation_type = "random"
    elif "TF-IDF-Low" in file:
        augmentation_type = "TF-IDF-Low"
    elif "TF-IDF-High" in file:
        augmentation_type = "TF-IDF-High"
    elif "TF-IDF-Medium" in file:
        augmentation_type = "TF-IDF-Medium"

    #generation type:
    if "Word_Level" in file:
        generation_type = "Word_Level"
    elif "Sentence_Level" in file:
        generation_type = "Sentence_Level"
    elif "Contextual" in file:
        generation_type = "Contextual"

    scores = parse_scores_from_file(rf"{path}\{file}")
    df = pd.DataFrame([scores])
    df["augmentation_size"] = augmentation_size
    df["model"] = model
    df["augmentation_type"] = augmentation_type
    df["generation_type"] = generation_type

    list_of_dfs.append(df)

In [20]:
df = pd.concat(list_of_dfs)

In [31]:
#average accuracy for each augmentation size with standard deviation
#just BLEU-1 and rouge-1-f and standard deviation for contextual only
contextual = df[df["generation_type"] == "Contextual"]
contextual = contextual[["BLEU-1", "rouge-1-f", "augmentation_size"]]
contextual = contextual.groupby("augmentation_size").agg(["mean", "std"])

#only 25, 55, 75, and 100
contextual = contextual.loc[["25", "55", "75", "100"]]

word_level = df[df["generation_type"] == "Word_Level"]
word_level = word_level[["BLEU-1", "rouge-1-f", "augmentation_size"]]
word_level = word_level.groupby("augmentation_size").agg(["mean", "std"])

word_level = word_level.loc[["25", "55", "75", "100"]]

sentence_level = df[df["generation_type"] == "Sentence_Level"]
sentence_level = sentence_level[["BLEU-1", "rouge-1-f", "augmentation_size"]]
sentence_level = sentence_level.groupby("augmentation_size").agg(["mean", "std"])

sentence_level = sentence_level.loc[["25", "55", "75", "100"]]



In [32]:
contextual

Unnamed: 0_level_0,BLEU-1,BLEU-1,rouge-1-f,rouge-1-f
Unnamed: 0_level_1,mean,std,mean,std
augmentation_size,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
25,0.12045,0.041639,0.118965,0.025753
55,0.140294,,0.136051,
75,0.121529,,0.124051,
100,0.119086,0.019711,0.118868,0.019129


In [33]:
sentence_level

Unnamed: 0_level_0,BLEU-1,BLEU-1,rouge-1-f,rouge-1-f
Unnamed: 0_level_1,mean,std,mean,std
augmentation_size,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
25,0.132441,0.022749,0.118836,0.022132
55,0.165029,,0.152001,
75,0.137012,,0.139051,
100,0.115428,0.016566,0.122503,0.015207


In [34]:
word_level

Unnamed: 0_level_0,BLEU-1,BLEU-1,rouge-1-f,rouge-1-f
Unnamed: 0_level_1,mean,std,mean,std
augmentation_size,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
25,0.129482,0.025163,0.124639,0.014464
55,0.146294,,0.135051,
75,0.110529,,0.120051,
100,0.115494,0.013927,0.113215,0.018192
