# Title
[]()

In [2]:
import pandas as pd
import sys
import os
sys.path.append(r"C:\Users\silvh\OneDrive\lighthouse\custom_python")
sys.path.append(r"C:\Users\silvh\OneDrive\lighthouse\Ginkgo coding\content-summarization\src")
from file_functions import *
import time
import re
from itertools import product
import openai

from response_processing import *
from article_processing import create_text_dict_from_folder
import traceback
from file_functions import *

In [3]:
# set the option to wrap text within cells
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

# Set up

In [None]:
chatbot_dict = dict()
simple_summaries_dict = dict()
relevance_dict = dict()
chain_results_dict = dict()
qna_dict = dict()

In [None]:
# Create text dictionary
folder_path = '../text/2023-06-12 1' # ** UPDATE REQUIRED**

encoding='ISO-8859-1'
subset=None

text_dict = create_text_dict_from_folder(folder_path, encoding=encoding, subset=subset)


Keys for text_dict: dict_keys([1, 2])



# Load most recent response for processing

In [None]:
filename = '/batch_Chaining_attributes_2023-06-12_2110.sav'

# loaded_pickle = loadpickle(filename, folder_path)
# chatbot_dict[0] = revive_chatbot_dict(loaded_pickle)
# sample_Chaining_attr(iteration_id=0)

with open(folder_path+filename) as file:
    jsonfile = json.load(file)

chatbot_dict[0] = revive_chatbot_dict(jsonfile)

# Prompts

In [None]:
prep_step = [
    "Think about why this might be relevant for the audience in the grand scheme of things.\
    \nIdentify 1 or 2 key concepts from this article that would make interesting or helpful health content. \
    Exclude details that do not add value to the audience.\
    \nBased on the key concepts from the previous steps, extract the key points and numerical descriptors to",
]

summarize_task = [
    "summarize for a LinkedIn post.",
    # "Describe the interesting points to your coworker at the water cooler",
    # "Create an Instagram post without hashtags.",
]
edit_task = [
    "\nIf applicable, include a brief description of the research participants, such as age and sex.\
    Otherwise, you can skip this step.\
    \nEvaluate whether or not your writing may be confusing or redundant. \
    \nIf so, re-write it so it is clear and concise. Otherwise, keep it the same. \
    \nCreate a journalistic headline to hook the audience.\
    \nReturn your response in this format:\
    \n<headline>\n\n<summary>\
    \nwhere the summary is in paragraph form.\
    \nDo not label the headline and summary.",
]

system_role = "You are a journalist writing content based on science research articles."
prompts_df = pd.DataFrame(product(prep_step, summarize_task, edit_task), 
    columns=['prep_step', 'summarize_task', 'edit_task'])

user_simplify_task = [
    """If needed, rewrite the text using terms appropriate for the audience. If not keep it the same.\
    Follow these steps to accomplish this: \
    \n1. Check if the content and language are appropriate for the audience. \
    \n2. If it is suitable for the audience, keep it the same. If not, rewrite using terms appropriate for the audience. \ 
    \n3. Return the final version of the summary to be shown to the audience. \
    \n\nYour audience is""",
]

simplify_audience = [
    # "a lay audience",
    "people who are not science experts",
]

user_relevance_task = [
    """Rewrite this summary to include a statement of how it is relevant for the audience. \
        Follow these steps to accomplish this: \
        \n1. Think about why this might be relevant for the audience in the grand scheme of things.\
        \n2. If it is not evident why the text is relevant for the audience in the grand scheme of things, \
        add a sentence to inform the audience. Otherwise, keep it the same. \
        \n3. Modify the summary if needed to reduce redundancy. \
        \n4. Check if the content and language are appropriate for the audience. \
        If it is suitable for the audience, keep it the same. If not, rewrite using terms appropriate for the audience. \ 
        \n5. Return the final version of the summary to be shown to the audience. \
        \n6. Remove the backticks.
        \n\nYour audience consists of""",
]

relevance_audience = [
    "seniors",
    "people who enjoy sports",
    # "people new to resistance training"
]

# Iteration 1: Carry over code from previous notebook to test

In [None]:
class Chaining:
    """
    Parameters:
    -----------
    text : str
        Text to feed to GPT for summarization.

    Optional parameters:
    --------------------
    system_role : str
        The role of the ChatGPT system in the conversation. Default is "You are an expert at science communication."
    temperature : float
        Controls the randomness of responses. Lower values result in more predictable responses. Default is 0.7.
    n_choices : int
        Number of ChatGPT responses to generate. Default is 5.
    max_tokens : int
        Token limit for ChatGPT response. Default is 1000.
    model : str
        ChatGPT model to use. Default is "gpt-3.5-turbo".
    """

    def __init__(self, text, folder_path, system_role="You are a helpful assistant.", 
            model="gpt-3.5-turbo", temperature=0.7, max_tokens=1000, 
        ):
        self.text = text
        self.folder = re.sub(r'(?:.*\/)?(.*)$', r'\1', folder_path)
        self.system_role = system_role
        self.temperature = temperature
        self.max_tokens = max_tokens
        self.model = model

    def create_prompt(self, task, text):
        """
        Creates a prompt for ChatGPT with the given task and text.

        Parameters:
        -----------
        task : str
            The task to include in the ChatGPT prompt.
        text : str
            The text to include in the ChatGPT prompt.

        Returns:
        --------
        messages : list
            A list of dictionaries representing the system and user messages in the prompt.
        """
        system_role = f'{self.system_role}'
        user_input = f"""Given the following text delimited by triple backticks: ```{text}``` \n {task}"""
        messages = [
        {"role": "system", "content": system_role},
        {"role": "user", "content": user_input},]

        print('\tDone creating prompt')
        return messages

    def gpt(self, messages, n_choices, temperature):
        """
        Sends a request to the ChatGPT API with the given messages.

        Parameters:
        -----------
        messages : list
            A list of dictionaries representing the system and user messages in the prompt.
        n_choices : int
            Number of ChatGPT responses to generate.
        temperature : float
            Controls the randomness of responses. Lower values result in more predictable responses.

        Returns:
        --------
        response : dict
            A dictionary representing the ChatGPT response.
        """
        print('\tSending request to GPT-3')
        print(f'\t\tRequesting {n_choices} choices using {self.model}')
        openai.api_key = os.getenv('api_openai')
        response = openai.ChatCompletion.create(
            model=self.model, messages=messages, 
            temperature=temperature, 
            max_tokens=self.max_tokens,
            n=n_choices
            )
        print('\tDone sending request to GPT-3')
        return response

    def summarize(self, task, prep_step=None, edit_task=None, n_choices=5):
        """
        Generates summaries from the text using ChatGPT.

        Parameters:
        -----------
        task : str
            The task to include in the ChatGPT prompt.
        prep_step : str, optional
            A preparatory step for the task, if applicable.
        edit_task : str, optional
            The final step for the task, if applicable.
        n_choices : int, optional
            Number of ChatGPT responses to generate. Default is 5.

        Returns:
        --------
        qna : dict
            A dictionary representing the summarization task and the generated summaries.
        """
        chatbot = Chaining(self.text, self.folder)
        full_task = f'{prep_step} {task} {edit_task}'
        prompt = chatbot.create_prompt(full_task, self.text)
        firstline_pattern = r'\s?(\S*)(\n*)(.+)'
        title = re.match(firstline_pattern, self.text)[0]
        self.qna = dict() 
        self.qna['date'] = datetime.now().strftime("%Y-%m-%d %H%M")
        self.qna['folder'] = self.folder
        self.qna['article_title'] = title
        self.qna['system_role'] = self.system_role
        self.qna['model'] = self.model
        self.qna[f'text'] = self.text
        self.qna['prep step'] = prep_step
        self.qna['summarization task'] = task
        self.qna['edit task'] = edit_task
        self.qna['full summarization task'] = full_task
        self.summaries_dict = dict()
        self.article_title = title
        self.response_regex = r'response_(.*)'
        self.simple_summary_dict = dict()
        self.relevance_dict = dict()
        self.n_previous_prompts = dict()

        try:
            response = chatbot.gpt(prompt, n_choices=n_choices, temperature=self.temperature)
        except Exception as error:
            exc_type, exc_obj, tb = sys.exc_info()
            f = tb.tb_frame
            lineno = tb.tb_lineno
            filename = f.f_code.co_filename
            print("An error occurred on line", lineno, "in", filename, ":", error)
            print('\t**API request failed for `.summarize()`**')
            return self.qna
        try:
            for index, choice in enumerate(response.choices):
                self.summaries_dict[f'response_{"{:02d}".format(index+1)}'] = choice["message"]["content"]
            self.qna.setdefault('summary', [])
            self.qna['summary'].extend([value for value in self.summaries_dict.values()])
            # self.summaries_dict['prep_step'] = prep_step
            # self.summaries_dict['task'] = task
            # self.summaries_dict['edit_task'] = edit_task
            # self.summaries_dict['prompt'] = full_task
        except Exception as error:
            exc_type, exc_obj, tb = sys.exc_info()
            f = tb.tb_frame
            lineno = tb.tb_lineno
            filename = f.f_code.co_filename
            print("An error occurred on line", lineno, "in", filename, ":", error)
            print('\t**Error with response parsing**')


    def simplify(self, simplify_task, audience, 
                    model="gpt-3.5-turbo", temperature=0.0, n_choices=1, 
                    pause_per_request=0
                    ):
        simplify_iteration = len(self.simple_summary_dict) + 1 
        self.n_previous_prompts['simply_summary'] = len(self.simple_summary_dict)
        self.simple_summary_dict[simplify_iteration] = dict()
        if simplify_iteration == None:
            simplify_iteration = 1
        full_simplify_task = f'{simplify_task} {audience}'
        print('simplify_iteration: ', simplify_iteration)
        print('Task:', full_simplify_task)
        summaries_keys = [key for key in self.summaries_dict.keys() if re.match(self.response_regex, key)]
        print('summaries_keys: \n\t', summaries_keys)
        for key in summaries_keys:
            new_key = re.sub(self.response_regex, rf'simple_summary\1', key)
            print(f'\t\t...Preparing to summarize {key}')
            simplify_prompt = self.create_prompt(full_simplify_task, self.summaries_dict[key])
            try:
                response = self.gpt(simplify_prompt, n_choices=n_choices, temperature=temperature)
            except Exception as error:
                exc_type, exc_obj, tb = sys.exc_info()
                f = tb.tb_frame
                lineno = tb.tb_lineno
                filename = f.f_code.co_filename
                print("An error occurred on line", lineno, "in", filename, ":", error)
                print('\t**API request failed for `.simplify()`**')
                return self.qna
            try:
                self.simple_summary_dict[simplify_iteration][key] = dict()
                for index, choice in enumerate(response.choices):
                    self.simple_summary_dict[simplify_iteration][key][index] = {
                        'simple summary choice': index+1, 
                        'simplify task': simplify_task,
                        'audience': audience,
                        'full simplify task': f'{simplify_task} {"for" if audience else ""} {audience}',
                        'simple summary': choice["message"]["content"],
                        'original summary': self.summaries_dict[key]
                    }
                    print(f'\t...Summary given')
            except Exception as error:
                exc_type, exc_obj, tb = sys.exc_info()
                f = tb.tb_frame
                lineno = tb.tb_lineno
                filename = f.f_code.co_filename
                print("An error occurred on line", lineno, "in", filename, ":", error)
                self.simple_summary_dict[simplify_iteration][new_key] = response
                print(f'\t...Error parsing response for summary request')
            if pause_per_request > 0:
                print(f'[.simplify()] Sleeping {pause_per_request} sec to avoid exceeding API rate limit')
                time.sleep(pause_per_request)
        return self.simple_summary_dict
    
    def add_relevance(self, relevance_task, audience, 
                    model="gpt-3.5-turbo", temperature=0.0, n_choices=1, summary_type='original',
                    # relevance_iteration=None, 
                    pause_per_request=0
                    ):
        relevance_iteration = len(self.relevance_dict) + 1 
        self.n_previous_prompts['relevance'] = len(self.relevance_dict)
        self.relevance_dict[relevance_iteration] = dict()
        if relevance_iteration == None:
            relevance_iteration = 1
        full_relevance_task = f'{relevance_task} {audience}'
        print('relevance_iteration: ', relevance_iteration)
        print('Task:', full_relevance_task)
        if summary_type=='original':
            summaries_keys = [key for key in self.summaries_dict.keys() if re.match(self.response_regex, key)]
            summary_regex = self.response_regex
        else:
            self.simple_summary_response_regex = r'simple_summary_(.*)'
            summaries_keys = [key for key in self.summaries_dict.keys() if re.match(self.simple_summary_response_regex, key)]
            summary_regex = self.simple_summary_response_regex
        print('summaries_keys: \n\t', summaries_keys)
        input_summary_dict = self.summaries_dict if summary_type=='original' else self.simple_summary_dict
        for key in summaries_keys:
            new_key = re.sub(summary_regex, rf'relevance_\1', key)
            print(f'\t\t...Preparing to add relevance to {key}')
            relevance_prompt = self.create_prompt(full_relevance_task, input_summary_dict[key])
            try:
                response = self.gpt(relevance_prompt, n_choices=n_choices, temperature=temperature)
            except Exception as error:
                exc_type, exc_obj, tb = sys.exc_info()
                f = tb.tb_frame
                lineno = tb.tb_lineno
                filename = f.f_code.co_filename
                print("An error occurred on line", lineno, "in", filename, ":", error)
                print('\t**API request failed for `.add_relevance()`**')
                return self.qna
            try:
                self.relevance_dict[relevance_iteration][key] = dict()
                for index, choice in enumerate(response.choices):
                    self.relevance_dict[relevance_iteration][key][index] = {
                        'relevance choice': index+1, 
                        'relevance task': relevance_task,
                        'audience': audience,
                        'full relevance task': full_relevance_task,
                        'relevance statement': choice["message"]["content"],
                        'preceding summary': input_summary_dict[key]
                    }
                    print(f'\t...Relevance statement given')
            except Exception as error:
                exc_type, exc_obj, tb = sys.exc_info()
                f = tb.tb_frame
                lineno = tb.tb_lineno
                filename = f.f_code.co_filename
                print("An error occurred on line", lineno, "in", filename, ":", error)
                self.relevance_summary_dict[relevance_iteration][new_key] = response
                print(f'\t...Error parsing response for relevance request')
            if pause_per_request > 0:
                print(f'[.add_relevance()] Sleeping {pause_per_request} sec to avoid exceeding API rate limit')
                time.sleep(pause_per_request)
        return self.relevance_dict
    
def batch_summarize_chain(text_dict, folder_path, prep_step, summarize_task, edit_task, chaining_bot_dict, iteration_id, 
    system_role=None, temperature=0.7, pause_per_request=0, n_choices=5,
    save_outputs=False, csv_path=folder_path, pickle_path=folder_path, json_path=folder_path
    ):
    """
    Summarize multiple texts using the same prompts.
    Parameters:
        - text_dict (dict) A dictionary containing the text data to be summarized. 
            The keys of the dictionary are the text IDs and the values are the full texts.
        - prep_step, summarize_task, edit task (list)
        - qna_dict: Dictionary to store the input and outputs.
        - iteration_id (int, float, or string): Unique ID serving as the key for results in the qna_dict

        iteration_id: int, float or string
            A unique identifier for the current iteration.
        temperature: float, optional (default=0.7)
            The level of "creativity" to use when generating summaries. Higher temperatures will result in more diverse summaries, but may also result in lower quality summaries.
        pause_per_request: int or float, optional (default=0)
            The number of seconds to pause between requests to avoid exceeding API rate limits. Defaults to 0, which means no pause.
        save_outputs: bool, optional (default=False)
            Whether to save the outputs of the summarization process to disk.
        filename: str, optional (default=None)
            The name of the file to save the outputs to. If no filename is specified, a default filename will be used.
        csv_path: str, optional 
            The path to the directory where CSV output files will be saved. Defaults to the 'output' folder in the project directory.
        pickle_path: str, optional 
            The path to the directory where pickle output files will be saved. Defaults to the 'pickles' folder in the project directory.

        Returns:
        --------
        chaining_bot_dict: dict
            A dictionary containing the Chaining instances. 
                The keys of the dictionary are the iteration IDs and the values are dictionaries whose
                values are the Chaining instances.

    """
    prompts_df = pd.DataFrame(product(prep_step, summarize_task, edit_task), 
        columns=['prep_step', 'summarize_task', 'edit_task'])

    chaining_bot_dict[iteration_id] = dict()
    for key in text_dict:
        text = text_dict[key]
        for index in prompts_df.index:
            print(f'**Text #{key} prompt #{index} of {prompts_df.index.max()}**')
            task = prompts_df.loc[index, 'summarize_task']
            prep_step = prompts_df.loc[index, 'prep_step']
            edit_task = prompts_df.loc[index, 'edit_task']
            try:
                print('Creating Chaining class instance')
                chatbot = Chaining(
                    text, folder_path=folder_path, temperature=temperature, system_role=system_role)
                print('Chaining class instance created')
                chatbot.summarize(
                    task=task, prep_step=prep_step, edit_task=edit_task, n_choices=n_choices
                    )
                chaining_bot_dict[iteration_id][f'text{key}_prompt{"{:02d}".format(index)}'] = chatbot
                print('\t...Success!')
                if pause_per_request > 0:
                    print(f'[batch_summarize()] Sleeping {pause_per_request} sec to avoid exceeding API rate limit')
                    time.sleep(pause_per_request) # Account for API rate limit of 3 API requests/limit 
            except Exception as error:
                exc_type, exc_obj, tb = sys.exc_info()
                f = tb.tb_frame
                lineno = tb.tb_lineno
                file = f.f_code.co_filename
                print("An error occurred on line", lineno, "in", file, ":", error)
                print('\t...Error making chatbot request')
                break
    if save_outputs:
        try:
            save_instance_to_dict(
                chaining_bot_dict[iteration_id], 
                description=f'batch_Chaining_attributes_initial',
                ext=None, json_path=json_path
                )
        except Exception as error:
            exc_type, exc_obj, tb = sys.exc_info()
            f = tb.tb_frame
            lineno = tb.tb_lineno
            file = f.f_code.co_filename
            print(f'An error occurred on line {lineno} in {file}: {error}')
            print('[batch_summarize_chain()] Unable to save API response')

    return chaining_bot_dict

def create_qna_df(
    qna_dict, chatbot_dict, iteration_id, chatbot_id=None, 
    ):
    """
    Create DataFrame from initial ChatGPT summaries.
    """
    dfs_list = []
    chatbot_id = iteration_id if chatbot_id == None else chatbot_id
    for chatbot_key in chatbot_dict[chatbot_id].keys():
        print(f'Processing {chatbot_key}...')
        dfs_list.append(pd.DataFrame(
            chatbot_dict[chatbot_id][chatbot_key].qna, 
            index=[choice for choice in range(1, len(chatbot_dict[chatbot_id][chatbot_key].qna['summary'])+1)])
            )
    
    qna_df = pd.concat(dfs_list).reset_index(names=['choice'])
    columns = qna_df.columns.tolist()
    columns.remove('choice')
    columns.insert(3, 'choice') # Move 'choice' column

    # qna_df['date'] = pd.Series('2023-06-12', index=qna_df.index)
    # columns.insert(0, 'date')

    qna_dict[iteration_id] = qna_df[columns]
    print(f'Original summaries DataFrame shape: {qna_df.shape}')
    print(f'\tOriginal summaries Dataframe columns: {qna_df.columns}')
    return qna_dict

def spreadsheet_columns(qna_dict, chatbot_dict, iteration_id, chatbot_id=None,
    save=False, filename=None, path=folder_path
    ):
    """
    Update column names to include corresponding column in a spreadsheet (e.g. A, B, C)
    """
    qna_dict = create_qna_df(
        qna_dict, chatbot_dict, iteration_id, chatbot_id=chatbot_id, 
        )
    qna_dict[iteration_id]['date'] = qna_dict[iteration_id]['date'].str.replace(r'_\d*', r'', regex=True)
    spreadsheet_columns = [letter for letter in string.ascii_uppercase]+['A'+letter for letter in string.ascii_uppercase]
    qna_dict[iteration_id].columns = [
        f'{spreadsheet_columns[index]}: {column}' for index, column in enumerate(qna_dict[iteration_id].columns)
        ]
    str_columns = qna_dict[iteration_id].dtypes[qna_dict[iteration_id].dtypes == 'O'].index.tolist()
    for column in str_columns:
        qna_dict[iteration_id][column] = qna_dict[iteration_id][column].str.strip()
    if save:
        description = filename if filename else 'batch_Chaining_summaries_initial'
        try:
            save_csv(
                qna_dict[iteration_id], filename=description, append_version=True,
                path=path, index=False
                )
        except Exception as error:
            exc_type, exc_obj, tb = sys.exc_info()
            f = tb.tb_frame
            lineno = tb.tb_lineno
            file = f.f_code.co_filename
            print(f'An error occurred on line {lineno} in {file}: {error}')
            print('[spreadsheet_columns()] Unable to save original summaries DataFrame')
    return qna_dict

def prompt_chaining_dict(simplify_prompts, audience, simple_summaries_dict, chaining_bot_dict, iteration_id,
    summary_iteration_id=None, n_choices=None, pause_per_request=0,
    prompt_column='simplify', 
    # simplify_iteration=None
    ):
    """
    Simplify or add context to a summary.
    """
    summary_iteration_id = summary_iteration_id if summary_iteration_id else iteration_id
    print('summary_iteration_id:', summary_iteration_id)
    prompts_df = pd.DataFrame(product(simplify_prompts, audience), columns=[prompt_column, 'audience'])
    if n_choices == None:
        n_choices = 1 if prompt_column == 'simplify' else 5
    print('n_choices:', n_choices)

    simple_summaries_master_list = []
    for text_prompt_key in chaining_bot_dict.keys():
        print(f'**{text_prompt_key}')

        for index in prompts_df.index:
            prompt = prompts_df.loc[index, prompt_column]
            audience = prompts_df.loc[index, 'audience']
            if prompt_column == 'simplify':
                summary_dict = chaining_bot_dict[text_prompt_key].simplify(
                    prompt, audience, n_choices=n_choices, pause_per_request=pause_per_request, 
                    )
            else: 
                summary_dict = chaining_bot_dict[text_prompt_key].add_relevance(
                    prompt, audience, n_choices=n_choices, pause_per_request=pause_per_request, 
                    )
            simple_summaries_master_list.append(summary_dict)
  
    simple_summaries_dict[iteration_id] = simple_summaries_master_list
    return simple_summaries_dict

def merge_all_chaining_results2(
    chatbot_dict, qna_dict, iteration_id, 
    empty_columns=None, pivot=True, validate=None,
    chatbot_id=None, save_df=False, save_chatbot=False, 
    csv_path=folder_path,
    pickle_path=None,
    json_path=None
    ):
    """
    Create a dataframe of original, 'simple', 'relevance' summaries from a Chaining object.
    Merge it with the original summaries DataFrame.

    Parameters:
        - chain_results_dict (dict): dictionary of DataFrames.
        - chatbot_dict (dict): dictionary of Chaining objects.
        - iteration_id (int, float, or string): iteration_id (dict key) of the chatbot_dict to process.
        - empty_columns (Bool, int, or dict): dictionary of empty columns to add to the DataFrame. 
            If True or 1, default dictionary is used.
            If False or 0, no empty columns are added.
        - pivot (Bool): whether to pivot the relevance summaries DataFrame. Default is True.
        - validate (str): Argument to pass to pd.merge() to validate the merge.
        - chatbot_id (int, float, or string): chatbot_id (dict key) of the chatbot_dict to process.
        - save_df, save_chatbot (Bool): whether to save the DataFrame and chatbot_dict.
        - csv_path, pickle_path, and json_path (raw string or string): Location to save the 
            outputs. Must provide csv_path to save; pickle_path and json_path are optional and 
            default to the same as csv_path if not provided.
    """
    df_list_simple = []
    df_list_relevance = []
    qna_dfs_list = []
    chatbot_id = iteration_id if chatbot_id == None else chatbot_id
    for chatbot_key in chatbot_dict[chatbot_id].keys():
        print(f'Processing {chatbot_key}...')
        try: 
            n_previous_prompts_simple = chatbot_dict[chatbot_id][chatbot_key].previous_n_prompts['simple']
            print(f'\tNumber of previous relevance prompts: {n_previous_prompts_simple}', end='.')
        except:
            n_previous_prompts_simple = 0
            print(f'\tNo previous simple prompts for {chatbot_key}', end='.')
        print('')
        try: 
            n_previous_prompts_relevance = chatbot_dict[chatbot_id][chatbot_key].previous_n_prompts['relevance']
            print(f'\tNumber of previous relevance prompts: {n_previous_prompts_relevance}', end='.')
        except:
            n_previous_prompts_relevance = 0
            print(f'\tNo previous relevance prompts for {chatbot_key}', end='.')
        print('')
            
        qna_dfs_list.append(pd.DataFrame(chatbot_dict[chatbot_id][chatbot_key].qna).reset_index(names=['choice']))

        # create results dictionaries that only grabs the results of the new prompts instead of all
        results_dict_simple = dict()
        total_n_prompts_simple = len(chatbot_dict[chatbot_id][chatbot_key].simple_summary_dict)
        for prompt_number in range(n_previous_prompts_simple+1, total_n_prompts_simple+1):
            results_dict_simple[prompt_number] = chatbot_dict[chatbot_id][chatbot_key].simple_summary_dict[prompt_number]
        chatbot_dict[chatbot_id][chatbot_key].simple_summary_dict

        results_dict_relevance = dict()
        total_n_prompts_relevance = len(chatbot_dict[chatbot_id][chatbot_key].relevance_dict)
        for prompt_number_relevance in range(n_previous_prompts_relevance+1, total_n_prompts_relevance+1):
            # print(f'\tAppending results for prompt {prompt_number_relevance} of {total_n_prompts_relevance}')
            results_dict_relevance[prompt_number_relevance] = chatbot_dict[chatbot_id][chatbot_key].relevance_dict[prompt_number_relevance]

        for iteration_key_simple in results_dict_simple.keys():
            response_keys_simple = sorted([text_prompt_key for text_prompt_key in results_dict_simple[iteration_key_simple].keys()])
            # print(f'\tAppending results for {iteration_key_simple}: ', end='')

            for response_key_simple in response_keys_simple:
                df_list_simple.append(pd.DataFrame(results_dict_simple[iteration_key_simple][response_key_simple]).transpose())
        for iteration_key_relevance in results_dict_relevance.keys():
            response_keys_relevance = sorted([text_prompt_key for text_prompt_key in results_dict_relevance[iteration_key_relevance].keys()])
            for response_key_relevance in response_keys_relevance:
                df_list_relevance.append(pd.DataFrame(results_dict_relevance[iteration_key_relevance][response_key_relevance]).transpose())
    
    simple_summary_df = pd.concat(df_list_simple)
    relevance_df = pd.concat(df_list_relevance)
    qna_df = create_qna_df(qna_dict, chatbot_dict, iteration_id, chatbot_id)[iteration_id]
    # qna_df.rename(columns={'summary': 'original summary'}, inplace=True)
    # print(f'Original summaries DataFrame shape: {qna_df.shape}')
    # print(f'Original summaries Dataframe columns: {qna_df.columns}')
    print('Simple summaries DataFrame shape:', simple_summary_df.shape)
    print(f'\tSimple summaries DataFrame columns: {[col for col in simple_summary_df.columns]}')
    print('Relevance summaries DataFrame shape:', relevance_df.shape)
    print(f'\tRelevance summaries DataFrame columns: {[col for col in relevance_df.columns]}')

    relevance_audience_list = sorted(relevance_df.audience.unique().tolist())
    print(f'Unique relevance audience values: {relevance_audience_list}')
    print(f'\noriginal summaries df columns: {qna_df.columns}\n')

    new_results = qna_df.merge(
        simple_summary_df, how='right',
        right_on='original summary',
        left_on='summary',
        validate=validate
        ).drop(columns='original summary')
    if pivot == False:
        spreadsheet_column_names = [
            "article_title",
            "choice",
            "system_role",
            "model",
            "text",
            "prep step",
            "summarization task",
            "full summarization task",
            "summary",
            "simple summary choice",
            "audience simplify",
            "simplify task",
            "full simplify task",
            "simple summary",
            "audience relevance",
            "relevance task",
            "full relevance task",
            "relevance statement"
        ]  

        validate=None
        
        print(f'DataFrame shape after merging with simple summaries: {new_results.shape}')
        print(f'\tColumns after merging with simple summaries: {[col for col in new_results.columns]}')
        new_results= new_results.merge(
            relevance_df, how='outer', suffixes=(' simplify', ' relevance'),
            left_on='summary', right_on='preceding summary', validate=validate
            ).drop(columns='preceding summary')
    else:
        spreadsheet_column_names = [
            "article_title",
            "choice",
            "system_role",
            "model",
            "text",
            "prep step",
            "summarization task",
            "full summarization task",
            "summary",
            "simple summary choice",
            "audience",
            "simplify task",
            "full simplify task",
            "simple summary",
            "relevance task",
            "full relevance task"
        ] 
        relevance_pivot_df = relevance_df.pivot(
            columns=['audience'],
            values='relevance statement',
            index=['preceding summary', 'relevance task',]
        ).sort_index().reset_index()
        new_results = new_results.merge(
            relevance_pivot_df, how='outer', suffixes=(' simplify', ' relevance'),
            left_on='summary', right_on='preceding summary',
            validate='m:1' if validate else None
        ).drop(columns='preceding summary')
        new_results['full relevance task'] = new_results['relevance task'].apply(lambda x: f'{x} {relevance_audience_list[0]}')
        new_results['add relevance task (seniors)'] = new_results["relevance task"]
        new_results['full add relevance task (seniors)'] =new_results['relevance task'].apply(lambda x: f'{x} {relevance_audience_list[1]}')
        spreadsheet_column_names.append(relevance_audience_list[0])
        spreadsheet_column_names.append('add relevance task (seniors)')
        spreadsheet_column_names.append('full add relevance task (seniors)')
        spreadsheet_column_names.append(relevance_audience_list[1])
        
    new_results = new_results[spreadsheet_column_names]
    if empty_columns:
        if pivot == False:
            if (type(empty_columns) != dict):
                empty_columns = {
                    # "choice numnber": "C",
                    "original summary content rating": "K",
                    "original summary language rating": "L",
                    "top summary": "M",
                    "simple summary content rating": "S",
                    "simple summary language rating": "T",
                    "top simple summary": "U",
                }
        else:           
            if (type(empty_columns) != dict):
                empty_columns = {
                    # "choice numnber": "C",
                    "original summary content rating": "K",
                    "original summary language rating": "L",
                    "top summary": "M",
                    "simple summary content rating": "S",
                    "simple summary language rating": "T",
                    'top simple summary': 'u',
                    # 'full add relevance task': 'w',
                    'added relevance content rating': 'y',
                    'added relevance language rating': 'z',
                    'top added relevance': 'aa',
                }
        print(f'Merged DataFrame shape: {new_results.shape}')
        print('\nColumns before adding empty columns:', [column for column in new_results.columns])
        print('Inserting empty columns...', end='\n\t')
        spreadsheet_column_names = [letter for letter in string.ascii_uppercase]+['A'+letter for letter in string.ascii_uppercase]
        alphabet_dict = {char:idx for idx, char in enumerate(spreadsheet_column_names)}
        for column_name, column_number in empty_columns.items():
            empty_column_loc = alphabet_dict[empty_columns[column_name].upper()] -1
            new_results.insert(loc=empty_column_loc, column=column_name, value='')
            print(f'{empty_columns[column_name].upper()} ({empty_column_loc}): {column_name}', end=', ')
        new_results.columns = [
            f'{spreadsheet_column_names[index+1]}: {column}' for index, column in enumerate(new_results.columns)
            ]

    print(f'\n** Merged dataframe shape:', new_results.shape)
    print([column for column in new_results.columns])
    qna_dict[iteration_id] = new_results
    try:
        original_summary_time = next(iter(chatbot_dict[chatbot_id].values())).date_created
        description_tag = f'_{original_summary_time}_updated'
    except:
        description_tag=''
    try:
        original_summary_time = next(iter(chatbot_dict[chatbot_id].values())).date_created
        description_tag = f'_{original_summary_time}_updated'
        print(f'Original summary time: {original_summary_time}')
    except:
        description_tag=''
    if save_df:
        try:
            save_output(
                qna_dict[iteration_id], 
                description=f'batch_Chaining_summaries{description_tag}',
                csv_path=csv_path, pickle_path=pickle_path)
            print('')
        except Exception as error:
            exc_type, exc_obj, tb = sys.exc_info()
            f = tb.tb_frame
            lineno = tb.tb_lineno
            filename = f.f_code.co_filename
            print("An error occurred on line", lineno, "in", filename, ":", error)
            print(f'Unable to save DataFrame')
    if save_chatbot:
        json_path = csv_path if json_path is None else json_path
        try:
            print('Saving Chaining object (chatbot)...')
            save_instance_to_dict(
                chatbot_dict[chatbot_id], 
                description=f'batch_Chaining_attributes{description_tag}',
                pickle_path=pickle_path, json_path=json_path
                )
        except Exception as error:
            exc_type, exc_obj, tb = sys.exc_info()
            f = tb.tb_frame
            lineno = tb.tb_lineno
            filename = f.f_code.co_filename
            print("An error occurred on line", lineno, "in", filename, ":", error)
            print(f'Unable to save chatbot')
            
    return qna_dict

# Set parameters
iteration_id = 2.10
n_choices = 1
pause_per_request=0
# chatbot_id = iteration_id
summary_iteration_id = 1.43
save_outputs = False
# save = True
save = False
empty_columns = False

# # Create initial summaries
# chaining_dict = batch_summarize_chain(
#     text_dict, folder_path, prep_step, summarize_task, edit_task, chatbot_dict,
#     system_role=system_role, 
#     n_choices=n_choices, pause_per_request=pause_per_request,
#     iteration_id=iteration_id, save_outputs=save_outputs
#     )
# qna_dict = spreadsheet_columns(
#     qna_dict, chatbot_dict, iteration_id, chatbot_id=chatbot_id, save=save
#     )

# # Create simple summaries
# audience = simplify_audience
# simple_summaries = prompt_chaining_dict(user_simplify_task, simplify_audience, simple_summaries_dict, 
#     chatbot_dict[chatbot_id], iteration_id,
#     n_choices=1, pause_per_request=pause_per_request, summary_iteration_id=summary_iteration_id
#     )

# # Add relevance
# relevance = prompt_chaining_dict(user_relevance_task, relevance_audience, relevance_dict, 
#     chatbot_dict[summary_iteration_id], iteration_id, prompt_column='relevance', 
#     n_choices=1, pause_per_request=pause_per_request, summary_iteration_id=summary_iteration_id
#     )

# Merge the results
# try:
#     df_dict = merge_all_chaining_results2(
#         chatbot_dict, qna_dict, iteration_id=iteration_id, pivot=True,
#         empty_columns=empty_columns, chatbot_id=summary_iteration_id,
#         save_df=save, save_chatbot=save, 
#             csv_path=folder_path,
#     )
#     print(f'\nCompleted merge_all_chaining_results!:)')
# except Exception as error:
#     exc_type, exc_obj, tb = sys.exc_info()
#     f = tb.tb_frame
#     lineno = tb.tb_lineno
#     file = f.f_code.co_filename
#     print(f'An error occurred on line {lineno} in {file}: {error}')
#     print('Unable to merge results')
#     if save:
#         save_instance_to_dict(chatbot_dict[chatbot_id], ext=None, json_path=folder_path)
#         print(f'\nCould not merge; saved Chaining instances as JSON.')
qna_dict = merge_all_chaining_results2(
    chatbot_dict, qna_dict, iteration_id=iteration_id, pivot=True,
    empty_columns=empty_columns, chatbot_id=summary_iteration_id,
    save_df=save, save_chatbot=save, 
        csv_path=folder_path,
)
print(f'\nCompleted merge_all_chaining_results!:)')

# df_dict[iteration_id]

Processing text1_prompt00...
	No previous simple prompts for text1_prompt00.
	No previous relevance prompts for text1_prompt00.
Processing text2_prompt00...
	No previous simple prompts for text2_prompt00.
	No previous relevance prompts for text2_prompt00.
Processing text1_prompt00...
Processing text2_prompt00...
Original summaries DataFrame shape: (4, 12)
	Original summaries Dataframe columns: Index(['choice', 'date', 'folder', 'article_title', 'system_role', 'model',
       'text', 'prep step', 'summarization task', 'edit task',
       'full summarization task', 'summary'],
      dtype='object')
Simple summaries DataFrame shape: (4, 6)
	Simple summaries DataFrame columns: ['audience', 'full simplify task', 'original summary', 'simple summary', 'simple summary choice', 'simplify task']
Relevance summaries DataFrame shape: (8, 6)
	Relevance summaries DataFrame columns: ['audience', 'full relevance task', 'preceding summary', 'relevance choice', 'relevance statement', 'relevance task']
U

# *End of Page*