In [1]:
import pandas as pd
from utils import build_gen_df, build_source_df, scale_rows

model_delta_map = {
    "mimir-7b-books":"books",
    "mimir-7b-fiction": "fiction",
    "mimir-7b-nonfiction":"nonfiction",
    "mimir-mistral-7b-base-scratch": "all",
    "mimir-7b-translated":"translated",
    "mimir-7b-untranslated":"untranslated",
    "mimir-7b-newspapers":"newspapers",
    "mimir-7b-factual":"factual"
}

base_scratch_and_deltas = [
    'mimir-mistral-7b-base-scratch','mimir-7b-factual','mimir-7b-nonfiction','mimir-7b-untranslated-withnewspapers', 
    'mimir-7b-translated', 'mimir-7b-books', 'mimir-7b-rightholders', 'mimir-7b-fiction','mimir-7b-untranslated', 'mimir-7b-newspapers',
]
scores_to_display = ["compression_ratio", "pos_compression_ratio", "stopword_density", "number_of_tokens", "self_bleu", "lix_score"]

# build_source_df()
build_gen_df()

# source_df = pd.read_csv("lex_lix_source.csv")
# source_df = source_df[source_df.score.isin(scores_to_display)]
# source_df = source_df.apply(scale_rows, axis=1)
# source_df["name"] = source_df.delta.apply(lambda x: "dataset_" + x)


df_generated1 = pd.read_csv("lex_lix_generated_greedy.csv")
df_generated2  = pd.read_csv("lex_lix_generated_contrastive.csv")
df_generated1["name"] = df_generated1.model.apply(lambda x: x.split("-")[-1] + "_greedy" )
df_generated2["name"] = df_generated2.model.apply(lambda x: x.split("-")[-1] + "_contrastive")

df_generated = pd.concat([df_generated1, df_generated2])

df_generated= df_generated[df_generated.score.isin(scores_to_display)]
df_generated = df_generated.apply(scale_rows, axis=1)
df_generated = df_generated[df_generated.model.isin(model_delta_map)]
df_generated["delta"] = df_generated.model.apply(lambda x: model_delta_map[x])
# df_generated = df_generated.drop(columns=["model"])

# df = pd.concat([source_df, df_generated])
df = df_generated

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/tita/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df_generated

Unnamed: 0,model,score,value,language,name,delta
35,mimir-7b-factual,number_of_tokens/100,1.884672,both,factual_greedy,factual
36,mimir-7b-factual,stopword_density,0.656218,both,factual_greedy,factual
37,mimir-7b-factual,self_bleu,0.497288,both,factual_greedy,factual
38,mimir-7b-factual,compression_ratio,3.637045,both,factual_greedy,factual
39,mimir-7b-factual,pos_compression_ratio,5.399492,both,factual_greedy,factual
...,...,...,...,...,...,...
487,mimir-7b-fiction,compression_ratio,1.679184,nob,fiction_contrastive,fiction
488,mimir-7b-fiction,pos_compression_ratio,3.444823,nob,fiction_contrastive,fiction
492,mimir-7b-fiction,lix_score/10,3.006628,both,fiction_contrastive,fiction
493,mimir-7b-fiction,lix_score/10,2.966599,nno,fiction_contrastive,fiction


In [3]:
from dash import Dash, html, dcc, callback, Output, Input
import plotly.express as px

app = Dash()

app.layout = [
    html.H1(children='Lexical scores per model trained on delta datasets', style={'textAlign':'center', "color": "White"}),
    dcc.Dropdown(df.language.unique(), 'both', id='dropdown-language'),
    dcc.Dropdown(df.score.unique(), 'compression_ratio', id='dropdown-score'),
    dcc.Dropdown(["greedy", "contrastive"], 'greedy', id='dropdown-generation'),

    dcc.Graph(id='graph-content')
]

@callback(
    Output('graph-content', 'figure'),
    [Input('dropdown-language', 'value'), Input('dropdown-score', 'value'), Input('dropdown-generation', 'value')]
)
def update_graph(selected_language, selected_score, selected_generation):
    dff = df[df.language==selected_language]
    dff = dff[dff.name.apply(lambda x: x.endswith(selected_generation))]
    dff = dff[dff.score==selected_score].sort_values("value")
    return px.bar(dff, x='model', y='value', color='model')

if __name__ == '__main__':
    app.run(debug=True)
