In [92]:
import pandas as pd
import sys
import os
import inspect
import s5_postprocess_evaluation as pe
import configparser
import s6_display_results as dr
import s4_evaluation as evl
import warnings
# Ignore all warnings
warnings.filterwarnings("ignore")
# access parent directory from notebooks directory
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


#### For the reflection example, we are taking the following usecase: mistral medium (as tst and evaluator), non-parallel data prompt, 5 shots and no-context

In [93]:
df = dr.get_input_data()
###  'cutting' the input dataframe, evaluator is mistral medium, non-parallel data only (promptID 2), 
df = df[(df.evaluator == 'mistral-medium') & (df.isParallel == False) & (df.shots == 5) & (df.prompting == 'NoContext') & (df.model == 'mistral-medium')]
df = evl.extract_explanation(df,'explanation_accuracy')
df.head(3)

Unnamed: 0,fileID,user,promptID,model,shots,runID,neutral_sentence,tst_sentence_0,user_sentence,explanation_accuracy,explanation_content_preservation,explanation_fluency,score_accuracy,score_content_preservation,score_fluency,prompting,evaluator,isParallel
4,231,U1,2,mistral-medium,5,911714,I'm all about fast food. I'm having some fries...,"Hey, I'm all about that fast food, y'all! I'm ...","I just love fast food! I’ll have some fries, s...","Both paragraphs S1 and S2 have an informal, c...",Score: 10\nExplanation: Both S1 and S2 convey ...,Score: 9\n\nExplanation: The text is highly co...,4.0,10.0,9.0,NoContext,mistral-medium,False
24,231,U1,2,mistral-medium,5,911714,Just having this vegan hotdog from the school ...,"Ah, just grabbing this vegan hotdog from the s...",Im having this vegan hotdog at school today. I...,Both paragraphs convey a similar informal and...,Score: 10\n\nExplanation: Both paragraphs disc...,Score: 9\n\nExplanation: The text is coherent ...,3.0,10.0,9.0,NoContext,mistral-medium,False
118,202,U6,2,mistral-medium,5,209327,"I went to the ""Groene Burger"" fast food restau...","So, I just popped by the 'Groene Burger' fast ...",I went to the “Groene Burger” fast food joint....,"Paragraph S1 has a more casual, conversationa...",Score: 10\n\nExplanation: Both S1 and S2 conve...,Score: 10\nExplanation: The text is highly coh...,8.0,10.0,10.0,NoContext,mistral-medium,False


In [94]:
prompts_dict_new_accuracy = {
    # Generic prompt
    'prompt_p1': 'You are an expert in text style transfer. Here is text T1, supposedly writen in a style of person X: {} ',
                 
    # Accuracy
    'prompt_p2': 'and here is another text T2, containing are a set of unrelated sentences (separated with ;), that are actually written by person X: {} ',
    'prompt_p3': 'How different is the conversational style in T1 and T2 on a continuous scale from 1 (completely identical styles) to 10 (completely different styles)? For scoring focus on semantics and syntax. Difference in discussed topics or contexts is irrelevant for the score. Result = . Format result as "score" and "explanation".',
}

In [72]:
## GET UPDATED ACCURACIES - MISTRAL CALLS
# evl_df = pd.DataFrame()
# for index, row in df.iterrows():
#     evl_df = pd.concat([evl_df, evl.get_accuracy_score(prompts_dict_new_accuracy, row, 'NoContext_' + format(index), 0)], ignore_index=True)
# evl_df.to_csv('f8_llm_evaluation_data/Mistral/0_loop_updated_accuracy_evaluation_summary.csv')

In [96]:
evl_df = pd.read_csv('f8_llm_evaluation_data/Mistral/0_updated_accuracy_evaluation_summary.csv')
evl_df = evl.extract_explanation(evl_df,'explanation_accuracy')
df['score_accuracy_0'] = evl_df['score_accuracy_0']
df['explanation_accuracy_0'] = evl_df['explanation_accuracy_0']

In [97]:
df['score_accuracy'].astype(float).mean(),df['t0_score_accuracy'].astype(float).mean()

(4.895833333333333, 7.0)

In [98]:
df.shape,df.columns

((96, 20),
 Index(['fileID', 'user', 'promptID', 'model', 'shots', 'runID',
        'neutral_sentence', 'tst_sentence_0', 'user_sentence',
        'explanation_accuracy', 'explanation_content_preservation',
        'explanation_fluency', 'score_accuracy', 'score_content_preservation',
        'score_fluency', 'prompting', 'evaluator', 'isParallel',
        't0_score_accuracy', 't0_explanation_accuracy'],
       dtype='object'))

### Refining t0_score_accuracy 

In [88]:
refine_prompt_dict = {
    # System prompt
    'prompt_p1': '''You are an AI model that improves a text style transfer based on provided feedback. 
    Here is text T1, supposedly writen in a style of person X: {} ''',
                 
    # Accuracy
    'prompt_p2': '''and here is another text T2, containing are a set of unrelated sentences (separated with ;),
    that are actually written by person X: {} ''',

    # Inference
    'prompt_p3': '''Use the following feedback on text style difference between T1 and T2: {},
    to rewrite T1 so that it is more similar in style to T2. Keep the style conversational and informal.
    Result = . Format result as "sentence" and "explanation".''',
}

In [89]:
refine_prompt_dict

{'prompt_llm': 'You are an AI model that improves a text style transfer based on provided feedback. \n    Here is text T1, supposedly writen in a style of person X: {} ',
 'prompt_s2': 'and here is another text T2, containing are a set of unrelated sentences (separated with ;),\n    that are actually written by person X: {} ',
 'prompt_inference': 'Use the following feedback on text style difference between T1 and T2: {},\n    to rewrite T1 so that it is more similar in style to T2. Keep the style conversational and informal.\n    Result = . Format result as "sentence" and "explanation".'}

In [158]:
feedback_df = pd.DataFrame()
for index, row in df.iterrows():
    feedback_df = pd.concat([feedback_df, evl.get_updated_evaluation_mistral(prompts_dict_feedback, row, 'NoContext_' + format(index),'feedback_generation')], ignore_index=True)
feedback_df.to_csv('f8_llm_evaluation_data/Mistral/0_loop_updated_feedback_summary.csv')

In [91]:
feedback_df  = pd.read_csv('f8_llm_evaluation_data/Mistral/0_loop_updated_feedback_summary.csv')
feedback_df

Unnamed: 0.1,Unnamed: 0,index,fileID,user,promptID,model,shots,runID,original,rewritten_sentence,...,score_accuracy,score_content_preservation,score_fluency,prompting,evaluator,isParallel,new_score_accuracy,new_explanation_accuracy,tst_feedback,explanation_feedback
0,0,4,231,U1,2,mistral-medium,5,911714,I'm all about fast food. I'm having some fries...,"Hey, I'm all about that fast food, y'all! I'm ...",...,4.0,10.0,9.0,NoContext,mistral-medium,False,7.0,The conversational styles in T1 and T2 are si...,"I reckon fast food suits my taste just fine, c...","Sentence: ""I reckon fast food suits my taste j..."
1,1,24,231,U1,2,mistral-medium,5,911714,Just having this vegan hotdog from the school ...,"Ah, just grabbing this vegan hotdog from the s...",...,3.0,10.0,9.0,NoContext,mistral-medium,False,7.0,The conversational styles in T1 and T2 exhibi...,Just tried this vegan hotdog from the canteen...,Sentence: Just tried this vegan hotdog from th...
2,2,118,202,U6,2,mistral-medium,5,209327,"I went to the ""Groene Burger"" fast food restau...","So, I just popped by the 'Groene Burger' fast ...",...,8.0,10.0,10.0,NoContext,mistral-medium,False,,Although both texts are written in an informa...,"So, I decided to pop by this ""Groene Burger"" ...","Sentence: So, I decided to pop by this ""Groene..."
3,3,147,202,U6,2,mistral-medium,5,209327,This vegan fried chicken from KFC is on the sp...,"So, you know that vegan fried chicken from KFC...",...,7.0,10.0,9.0,NoContext,mistral-medium,False,,\n\nAlthough both texts are written in an info...,"So, I've given that vegan fried chicken from ...","Sentence: So, I've given that vegan fried chic..."
4,4,155,201,U0,2,mistral-medium,5,379944,"I went to the ""Groene Burger"" fast food restau...","So you know what, I just went to this 'Groene ...",...,3.0,10.0,10.0,NoContext,mistral-medium,False,,Although both texts are written in an informa...,"You should try ""Groene Burger"" for fast food,...","Sentence: You should try ""Groene Burger"" for f..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,91,2266,82,U5,2,mistral-medium,5,556987,"Just had my first vegan cake at ""groene bakker...","Hey, just had my first vegan cake at 'groene b...",...,7.0,10.0,10.0,NoContext,mistral-medium,False,,The conversational style in T1 and T2 is sign...,He looks like he enjoyed his first vegan cake...,Sentence: He looks like he enjoyed his first v...
92,92,2285,220,U8,2,mistral-medium,5,386065,Just getting my vitamins in at the school cant...,"Sure thing, getting my daily dose of vitamins ...",...,4.0,10.0,10.0,NoContext,mistral-medium,False,,The syntax and semantics of the sentences in ...,I'm absolutely loving this Quinoa and chickpe...,Sentence: I'm absolutely loving this Quinoa an...
93,93,2296,201,U0,2,mistral-medium,5,379944,I'm all about fast food. I'm having some fries...,"So, you know what, I'm totally into fast food....",...,4.0,10.0,10.0,NoContext,mistral-medium,False,,\n\nAlthough the texts T1 and T2 are written b...,"You should give fast food a try, it's my go-t...","Sentence: You should give fast food a try, it'..."
94,94,2320,201,U0,2,mistral-medium,5,379944,Just having this vegan hotdog from the school ...,"You know what, I'm just vibing with this vegan...",...,4.0,9.0,10.0,NoContext,mistral-medium,False,,The conversational style in T1 and T2 differ ...,"Vegan hotdog for lunch, it's crispy and spicy;...","Sentence: ""Vegan hotdog for lunch, it's crispy..."


In [200]:
def remove_quotes(s):
    if s.startswith(' "'):
        s = s[2:]
    if s.endswith('"'):
        s = s[:-1]
    return s

In [215]:
for i,r in feedback_df.iterrows():
    s = remove_quotes(r['explanation_feedback'].split(':')[1].split('\n')[0])
    if s:
        feedback_df['tst_feedback'].iloc[i] = s 
    else:
        # some sentences are messed up, as Sentence: is followed with new line, so they require different handling 
        feedback_df['tst_feedback'].iloc[i] = r['explanation_feedback'].split(':')[1].strip().split('\n')[0]

### Evaluate the feedback - regenerate accuracy

In [221]:
prompts_dict_new_accuracy = {
    # Generic prompt
    'prompt_p1': 'You are an expert in text style transfer. Here is text T1, supposedly writen in a style of person X: {} ',
                 
    # Accuracy
    'prompt_p2': 'and here is another text T2, containing are a set of unrelated sentences (separated with ;), that are actually written by person X: {} ',
    'prompt_p3': 'How different is the conversational style in T1 and T2 on a continuous scale from 1 (completely identical styles) to 10 (completely different styles)? For scoring focus on semantics and syntax. Difference in discussed topics or contexts is irrelevant for the score. Result = . Format result as "score" and "explanation".',
}

In [224]:
feedback_tst_evl_df = pd.DataFrame()
for index, row in feedback_df.iterrows():
    feedback_tst_evl_df = pd.concat([feedback_tst_evl_df, evl.get_updated_evaluation_mistral(prompts_dict_new_accuracy, row, 'NoContext_' + format(index),'feedback_evaluation')], ignore_index=True)


In [229]:
feedback_tst_evl_df.to_csv('f8_llm_evaluation_data/Mistral/0_loop_accuracy_feedback_summary.csv')

In [232]:
feedback_tst_evl_df.columns

Index(['index', 'fileID', 'user', 'promptID', 'model', 'shots', 'runID',
       'original', 'rewritten_sentence', 'your_text', 'explanation_accuracy',
       'explanation_content_preservation', 'explanation_fluency',
       'score_accuracy', 'score_content_preservation', 'score_fluency',
       'prompting', 'evaluator', 'isParallel', 'new_score_accuracy',
       'new_explanation_accuracy', 'tst_feedback', 'explanation_feedback',
       'accuracy_feedback', 'explanation_feedback_evaluation'],
      dtype='object')

In [240]:
''.join(feedback_tst_evl_df['tst_feedback'][0:2])

"I reckon fast food suits my taste just fine, currently munching on some fries, a soy burger, and sipping on a coke at the local burger joint near school. And looking forward to catching a football game this weekend. Just tried this vegan hotdog from the canteen, it's got a nice crunch to it and a bit of a kick! Gonna chill with the playstation this afternoon."

In [228]:
df['score_accuracy'].astype(float).mean(),df['new_score_accuracy'].astype(float).mean(),feedback_tst_evl_df['accuracy_feedback'].astype(float).mean()

(4.895833333333333, 7.0, 5.6875)

# Second loop

In [242]:
feedback_df_2 = pd.DataFrame()
for index, row in feedback_tst_evl_df.iterrows():
    feedback_df_2 = pd.concat([feedback_df_2, evl.get_updated_evaluation_mistral(prompts_dict_feedback, row, 'NoContext_' + format(index),'feedback_generation')], ignore_index=True)


In [244]:
feedback_df_2.to_csv('f8_llm_evaluation_data/Mistral/1_loop_updated_feedback_summary.csv')

In [245]:
feedback_df_2.columns

Index(['index', 'fileID', 'user', 'promptID', 'model', 'shots', 'runID',
       'original', 'rewritten_sentence', 'your_text', 'explanation_accuracy',
       'explanation_content_preservation', 'explanation_fluency',
       'score_accuracy', 'score_content_preservation', 'score_fluency',
       'prompting', 'evaluator', 'isParallel', 'new_score_accuracy',
       'new_explanation_accuracy', 'tst_feedback', 'explanation_feedback',
       'accuracy_feedback', 'explanation_feedback_evaluation',
       'tst_feedback_2', 'explanation_feedback_generation_2'],
      dtype='object')

In [254]:
for i,r in feedback_df_2.iterrows():
    s = remove_quotes(r['explanation_feedback_generation_2'].split(':')[1].split('\n')[0])
    if s:
        feedback_df_2['tst_feedback_2'].iloc[i] = s 
    else:
        # some sentences are messed up, as Sentence: is followed with new line, so they require different handling 
        feedback_df_2['tst_feedback_2'].iloc[i] = r['explanation_feedback_generation_2'].split(':')[1].strip().split('\n')[0]

In [255]:
feedback_df_2[['original','rewritten_sentence','tst_feedback','tst_feedback_2']]

Unnamed: 0,original,rewritten_sentence,tst_feedback,tst_feedback_2
0,I'm all about fast food. I'm having some fries...,"Hey, I'm all about that fast food, y'all! I'm ...","I reckon fast food suits my taste just fine, c...","Hey, I'm good with fast food, currently chompi..."
1,Just having this vegan hotdog from the school ...,"Ah, just grabbing this vegan hotdog from the s...",Just tried this vegan hotdog from the canteen...,Just gave that vegan hotdog a go from the cant...
2,"I went to the ""Groene Burger"" fast food restau...","So, I just popped by the 'Groene Burger' fast ...","So, I decided to pop by this ""Groene Burger"" ...","So, I decided to check out this ""Groene Burger..."
3,This vegan fried chicken from KFC is on the sp...,"So, you know that vegan fried chicken from KFC...","So, I've given that vegan fried chicken from ...","So, I finally gave that vegan fried chicken f..."
4,"I went to the ""Groene Burger"" fast food restau...","So you know what, I just went to this 'Groene ...","You should try ""Groene Burger"" for fast food,...","You should totally check out ""Groene Burger"" ..."
...,...,...,...,...
91,"Just had my first vegan cake at ""groene bakker...","Hey, just had my first vegan cake at 'groene b...",He looks like he enjoyed his first vegan cake...,"Dude, he totally loved that vegan cake at ""gr..."
92,Just getting my vitamins in at the school cant...,"Sure thing, getting my daily dose of vitamins ...",I'm absolutely loving this Quinoa and chickpe...,I've been really enjoying this Quinoa and chi...
93,I'm all about fast food. I'm having some fries...,"So, you know what, I'm totally into fast food....","You should give fast food a try, it's my go-t...",You should really consider giving fast food a...
94,Just having this vegan hotdog from the school ...,"You know what, I'm just vibing with this vegan...","Vegan hotdog for lunch, it's crispy and spicy;...",Thinking about grabbing a vegan hotdog for lun...


In [259]:
# Create an empty list to store the joined strings
joined_strings = []

# Iterate over the DataFrame rows
for index, row in feedback_df_2.iterrows():
    # Join the columns of the current row into a single string with newline characters
    joined_row = '\n'.join(row[['original','rewritten_sentence','tst_feedback','tst_feedback_2']])
    
    # Add the row ID information
    joined_row_with_id = f"Row ID: {index}, {joined_row}"
    
    # Append the joined string with row ID to the list
    joined_strings.append(joined_row_with_id)

# Convert the list of joined strings to a Series
joined_strings_series = pd.Series(joined_strings)

print(joined_strings_series)

0     Row ID: 0, I'm all about fast food. I'm having...
1     Row ID: 1, Just having this vegan hotdog from ...
2     Row ID: 2, I went to the "Groene Burger" fast ...
3     Row ID: 3, This vegan fried chicken from KFC i...
4     Row ID: 4, I went to the "Groene Burger" fast ...
                            ...                        
91    Row ID: 91, Just had my first vegan cake at "g...
92    Row ID: 92, Just getting my vitamins in at the...
93    Row ID: 93, I'm all about fast food. I'm havin...
94    Row ID: 94, Just having this vegan hotdog from...
95    Row ID: 95, Just having this vegan hotdog from...
Length: 96, dtype: object


In [266]:
joined_strings[0:2]

["Row ID: 0, I'm all about fast food. I'm having some fries, a soy burger and coke at the burger place near school. And having a football game in the weekend.\nHey, I'm all about that fast food, y'all! I'm chowing down on some fries, a soy burger, and a coke over at the burger joint near school. And come the weekend, I'm all about the football game!\nI reckon fast food suits my taste just fine, currently munching on some fries, a soy burger, and sipping on a coke at the local burger joint near school. And looking forward to catching a football game this weekend.\nHey, I'm good with fast food, currently chomping on some fries and a soy burger here at the local burger joint near school. You know what, I'm thinking about catching a football game this weekend, wanna join?",
 "Row ID: 1, Just having this vegan hotdog from the school canteen. It’s crispy and a bit spicy! And I am just chilling in the afternoon with the playstation.\nAh, just grabbing this vegan hotdog from the school canteen

In [262]:
feedback_df_2.columns

Index(['index', 'fileID', 'user', 'promptID', 'model', 'shots', 'runID',
       'original', 'rewritten_sentence', 'your_text', 'explanation_accuracy',
       'explanation_content_preservation', 'explanation_fluency',
       'score_accuracy', 'score_content_preservation', 'score_fluency',
       'prompting', 'evaluator', 'isParallel', 'new_score_accuracy',
       'new_explanation_accuracy', 'tst_feedback', 'explanation_feedback',
       'accuracy_feedback', 'explanation_feedback_evaluation',
       'tst_feedback_2', 'explanation_feedback_generation_2'],
      dtype='object')

### evaluate this second loop

In [267]:
feedback_tst_evl_df_2 = pd.DataFrame()
for index, row in feedback_df_2.iterrows():
    feedback_tst_evl_df_2 = pd.concat([feedback_tst_evl_df_2, evl.get_updated_evaluation_mistral(prompts_dict_new_accuracy, row, 'NoContext_' + format(index),'feedback_evaluation')], ignore_index=True)


In [268]:
feedback_tst_evl_df_2.to_csv('f8_llm_evaluation_data/Mistral/1_loop_accuracy_feedback_summary.csv')

In [269]:
feedback_tst_evl_df_2.columns

Index(['index', 'fileID', 'user', 'promptID', 'model', 'shots', 'runID',
       'original', 'rewritten_sentence', 'your_text', 'explanation_accuracy',
       'explanation_content_preservation', 'explanation_fluency',
       'score_accuracy', 'score_content_preservation', 'score_fluency',
       'prompting', 'evaluator', 'isParallel', 'new_score_accuracy',
       'new_explanation_accuracy', 'tst_feedback', 'explanation_feedback',
       'accuracy_feedback', 'explanation_feedback_evaluation',
       'tst_feedback_2', 'explanation_feedback_generation_2',
       'accuracy_feedback_2', 'explanation_feedback_evaluation_2'],
      dtype='object')

In [270]:
feedback_tst_evl_df_2['accuracy_feedback_2'].astype(float).mean()

5.005208333333333

In [287]:
import s6_display_results as dr
col_display = ['original', 'rewritten_sentence','your_text','tst_feedback','explanation_feedback','tst_feedback_2']

In [288]:
dr.display_interactive_dataframe(feedback_tst_evl_df_2,col_display)

interactive(children=(Dropdown(description='User:', index=1, options=('All', 'U1', 'U6', 'U0', 'U8', 'U3', 'U2…

In [286]:
for i,r in feedback_tst_evl_df_2[feedback_tst_evl_df_2.user == 'U0'].iterrows():
    print(r[col_display], '\n')

original              I went to the "Groene Burger" fast food restau...
rewritten_sentence    So you know what, I just went to this 'Groene ...
your_text             I went to the “groene burger”, really recommen...
tst_feedback           You should try "Groene Burger" for fast food,...
tst_feedback_2         You should totally check out "Groene Burger" ...
Name: 4, dtype: object 

original              This vegan chocolate is on point. Its with oat...
rewritten_sentence    You've gotta try this vegan chocolate, it's le...
your_text             This vegan cake is just amazinggg! It has oat ...
tst_feedback          You've gotta try this vegan chocolate, no kidd...
tst_feedback_2         You might want to check out this vegan chocol...
Name: 21, dtype: object 

original              Just getting my vitamins in at the school cant...
rewritten_sentence    Alright, so I'm popping into the school cantee...
your_text             I am going for a something healthy today, quin...
tst_feedback 