# LLM Response Formatter Notebook

This Jupyter notebook is designed to format the output of your LLM responses. 

In [13]:
import pandas as pd
import ast
from typing import List
from langchain.chains import LLMChain
from pydantic import BaseModel, Field
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser

In [2]:
llm_res_df = pd.read_csv('./llm_output/llm_res_df.csv')

In [3]:
llm_res_df.describe()

Unnamed: 0,user_variable,user_description,llm_response
count,104,104,104
unique,104,102,104
top,submitter_id,Measurement of the mass concentration (mcnc) o...,"('lines', ['Based on the description provided ..."
freq,1,2,1


In [5]:
# Output parser will split the LLM result into a list of queries
class LineList(BaseModel):
    # "lines" is the key (attribute name) of the parsed output
    lines: List[str] = Field(description="Lines of text")


class LineListOutputParser(PydanticOutputParser):
    def __init__(self) -> None:
        super().__init__(pydantic_object=LineList)

    def parse(self, text: str) -> LineList:
        lines = text.strip().split("\n")
        return LineList(lines=lines)

output_parser = LineListOutputParser()


In [11]:
def join_lines(lines: List[str]) -> str:
    return "\n".join(lines)

In [14]:
llm_res_df['llm_response'] = llm_res_df['llm_response'].apply(lambda x: ast.literal_eval(x)[1])

In [15]:
llm_res_df.head()

Unnamed: 0,user_variable,user_description,llm_response
0,submitter_id,A project-specific identifier for a node. This...,[Based on the description provided for the new...
1,participant_id,Unique identifier that can be used to retrieve...,"[Based on the provided context, the best fit f..."
2,consent_codes,Data Use Restrictions that are used to indicat...,"[Based on the provided context, the new variab..."
3,amputation_type,"If amputated, the amputation type for leg, abo...","[Based on the provided context, the new variab..."
4,cohort_id,The study subgroup that the participant belong...,"[Based on the context provided, the new variab..."


In [23]:
with open('formatted_llmres.txt', 'w') as file:
    for row in llm_res_df.iterrows():
        row = row[1]
        user_variable = row['user_variable']
        prompt = row['user_description']
        llm_response = row['llm_response']
        
        print(f"Submitted Variable: {user_variable}", file=file)
        print(f"Prompt: {prompt}", file=file)
        print(f"Response: {join_lines(llm_response)}", file=file)
        print(file=file)
        print("-" * 50, file=file)  # Add a line break with dashes
        print(file=file)  # Add a line break
