In [None]:
import utils
import pandas as pd
import spacy
from utils import *

import gspread
import gspread_formatting

In [64]:
import re

def parse_evaluation_file(content):
    # Split the content into systems
    system_blocks = re.split(r'\n(?=System \d sentences:)', content)
    
    parsed_data = {}
    
    for block in system_blocks:
        # Extract system name
        system_name_match = re.search(r'(System \d) sentences:', block)
        if system_name_match:
            system_name = system_name_match.group(1)
            parsed_data[system_name] = {'sentences': [], 'sentence_diversity': '', 'comment': ''}
            
            # Extract sentences
            sentences = re.findall(r'\d+\.\sDifficulty:\s"?(.*?)"?;\sSense:\s"?(.*?)"?;\sReject:\s"?(.*?)"?', block)
            for difficulty, sense, reject in sentences:
                parsed_data[system_name]['sentences'].append({
                    'difficulty': difficulty,
                    'sense': sense.capitalize(),
                    'reject': reject
                })
            
            # Extract sentence diversity
            diversity_match = re.search(r'Sentence diversity: "?(.*?)"?', block)
            if diversity_match:
                parsed_data[system_name]['sentence_diversity'] = diversity_match.group(1)
    
    # Extract system ranking
    ranking_match = re.search(r'System ranking: (.*)', content)
    if ranking_match:
        parsed_data['system_ranking'] = ranking_match.group(1)
    
    # Extract comments, assuming they always appear at the end and start with "Comment:"
    comment_match = re.search(r'Comment: (.*)', content, re.DOTALL)
    if comment_match:
        parsed_data['comment'] = comment_match.group(1).strip()
    
    return parsed_data

def update_cells_gpt(content, sheet):
    gpt_resps = content.split(sep='Comment:')[:3]
    resp_numbers = [0,1,2]
    # resp_number = 0 # which gpt response
    for resp_number in resp_numbers:
        offset = 20*resp_number
        data = []
        parse = parse_evaluation_file(gpt_resps[resp_number])
        ranking = parse['system_ranking']
        data += [{'range': f'A{34+offset}', 'values': [[ranking]]}]
        for sys_id in [1,2,3]:
            # get diff labels for sys 1
            l = parse[f'System {sys_id}']['sentences']
            diff_labels = [x['difficulty'] for x in l]
            sense_labels = [x['sense'] for x in l]
            reject_labels = [x['reject'] for x in l]
            diversity = parse[f'System {sys_id}']['sentence_diversity']
            
            letter_diff = chr(ord('B') + 4*(sys_id-1))
            letter_sense = chr(ord('C') + 4*(sys_id-1))
            letter_reject = chr(ord('D') + 4*(sys_id-1))
            letter_diversity = chr(ord('A') + 4*(sys_id-1))
            data += [{'range': f'{letter_diff}{25+offset}:{letter_diff}{29+offset}', 'values': [diff_labels], 'major_dimension' : 'COLUMNS'},
                    {'range': f'{letter_sense}{25+offset}:{letter_sense}{29+offset}', 'values': [sense_labels], 'major_dimension' : 'COLUMNS'},
                    {'range': f'{letter_reject}{25+offset}:{letter_reject}{29+offset}', 'values': [reject_labels], 'major_dimension' : 'COLUMNS'},
                    {'range': f'{letter_diversity}{31+offset}', 'values': [[diversity]]}]
        sheet.batch_update(data)

In [None]:
gc = gspread.service_account()

In [None]:
sh = gc.open("Evaluation-sheet-jp-v3.0_gpt4")

In [None]:
sheet = sh.worksheet("14")

In [58]:
# copy the block under it starting form row 21
for sheet_number in range(16,26):
    sheet = sh.worksheet(f"{sheet_number}")
    block_start_cells = ['A21', 'A41', 'A61']
    for dest_cell in block_start_cells:
        sheet.copy_range(source="A1:L13", dest=dest_cell, paste_type='PASTE_NORMAL')

In [None]:
block_id = 14
file_name = f"/data/enrico_benedetti/nihongoexample/evaluation/annotation/gpt/short_responses/gpt-4-0125-preview_{block_id}.txt"

content = ""
with open(file_name, 'r') as file:
    content = file.read()

parse = parse_evaluation_file(content)


In [61]:
import time

In [None]:
### It still needs to fix it manually.

In [65]:
for sheet_number in range(16,26):
    sheet = sh.worksheet(f"{sheet_number}")
    block_id = sheet_number
    file_name = f"/data/enrico_benedetti/nihongoexample/evaluation/annotation/gpt/short_responses/gpt-4-0125-preview_{block_id}.txt"
    content = ""
    
    with open(file_name, 'r') as file:
        content = file.read()

    update_cells_gpt(content, sheet)
    time.sleep(10)