In [2]:
import pandas as pd

from langchain_dartmouth.llms import ChatDartmouth
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser


from itertools import batched

In [None]:
docs = pd.read_csv("data/raw/2024_COFE_SS_sample_text.csv")
question = docs.loc[0, "outcometxt"]
question

In [None]:
docs = docs.iloc[1:]  # The first row is a comment on the column contents
docs = docs.dropna(subset="outcometxt")
survey_responses = docs["outcometxt"].to_list()
survey_responses[:5]

In [5]:
llm = ChatDartmouth(
    model_name="llama-3-1-8b-instruct",
    temperature=0,
    seed=42,
    max_tokens=60_000,
)

example_response = (
    "Response ID 1\n"
    "Dartmouth has been a transformative experience that's shaped my academic, "
    "personal, and professional growth. I've gained invaluable knowledge and "
    "skills through rigorous courses in econ and international relations, which "
    "have helped me develop a nuanced perspective on global issues. However, I've "
    "also struggled with the intense academic pressure and lack of diversity in "
    "certain departments.\n-----\n"
    "Response ID 2\n"
    "My time at Dartmouth has been marked by incredible opportunities for growth "
    "and learning, with notable high points including studying abroad in Asia and "
    "leading a successful student organization. However, I've also struggled with "
    "the intense competition for research funding and the sometimes isolating "
    "Upper Valley setting, which can make it difficult to connect with peers from "
    "other parts of the country."
)

example_output = """[
    {{
        "response_id" : 1,
        "topics": [
            {{"name": "academic and personal growth", "sentiment": "positive"}},
            {{"name": "academic rigor", "sentiment": "positive"}},
            {{"name": "academic pressure", "sentiment": "negative"}},
            {{"name": "lack of diversity", "sentiment": "negative"}}
        ]
    }},
    {{
        "response_id" : 2,
        "topics": [
            {{"name": "Dartmouth student organization experience", "sentiment": "positive"}},
            {{"name": "study abroad experience", "sentiment": "positive"}},
            {{"name": "research funding", "sentiment": "negative"}},
            {{"name": "Upper Valley setting", "sentiment": "negative"}}
        ]
    }}
]"""

unlimited_prompt = PromptTemplate.from_template(
    (
        "The following are survey responses from students in their senior year at Dartmouth College.\n"
        "The prompt was: '{question}'\n"
        "Identify the topics mentioned in each response, "
        "as well as the sentiments expressed towards those topics. "
        "Format your response in valid JSON. Respond only with the JSON itself. "
        "Here is an example: \n\n"
        "Response: \n'" + example_response + "'\n\n"
        "Output: \n" + example_output + "\n\n"
        "Here are the responses to process, separated by '\\n------\\n':\n\n"
        "{responses}"
    )
)

limited_prompt = PromptTemplate.from_template(
    (
        "The following are survey responses from students in their senior year at Dartmouth College.\n"
        "The prompt was: '{question}'\n"
        "Identify the topics mentioned in each response, "
        "as well as the sentiments expressed towards those topics. "
        "The goal is to have a minimal set of topics that represent the common ideas expressed across the responses. "
        "Do not use more than a total of 15 different topics. "
        "Format your response in valid JSON. Respond only with the JSON itself. "
        "Here is an example: \n\n"
        "Response: \n'" + example_response + "'\n\n"
        "Output: \n" + example_output + "\n\n"
        "Here are the responses to process, separated by '\\n------\\n':\n\n"
        "{responses}"
    )
)

limited_with_default_prompt = PromptTemplate.from_template(
    (
        "The following are survey responses from students in their senior year at Dartmouth College.\n"
        "The prompt was: '{question}'\n"
        "Identify the topics mentioned in each response, "
        "as well as the sentiments expressed towards those topics. "
        "The goal is to have a small number of topics that represent the common ideas expressed across the responses. "
        "Do not use more than a total of 15 different topics. "
        "If a response does not fall into any of the 15 topics, label it as 'no-topics'. "
        "Format your response in valid JSON. Respond only with the JSON itself. "
        "Here is an example: \n\n"
        "Response: \n'" + example_response + "'\n\n"
        "Output: \n" + example_output + "\n\n"
        "Here are the responses to process, separated by '\\n------\\n':\n\n"
        "{responses}"
    )
)

In [6]:
mode = "limited_with_default"

if mode == "limited_categories":
    prompt = limited_prompt
elif mode == "unlimited_categories":
    prompt = unlimited_prompt
elif mode == "limited_with_default":
    prompt = limited_with_default_prompt

In [None]:
parser = JsonOutputParser()
chain = prompt | llm | parser

batch_with_ids = ""
for id, survey_response in enumerate(survey_responses[:2]):
    batch_with_ids += f"Response ID {id}:\n{survey_response}\n------\n"
for llm_response in chain.stream({"responses": batch_with_ids, "question": question}):
    try:
        print(llm_response[-1]["response_id"], end="\r")
    except:
        pass

In [7]:
topics = [item["topics"] for item in llm_response]

In [8]:
import pandas as pd

df = pd.DataFrame(
    data={
        "id": range(len(survey_responses)),
        "response": survey_responses,
        "topics": topics,
    }
).explode("topics")

In [9]:
df = df.reset_index(drop=True)

In [10]:
df = (
    df.join(pd.json_normalize(df["topics"]))
    .drop("topics", axis="columns")
    .drop_duplicates()
)

In [11]:
df.to_csv(f"data/derived/2024-10-18_naive_llm_{mode}.csv", index=None)

# TODO:

- [ ] Rett reviews output of this notebook for all three modalities
- Idea 1:
  - [ ] Do a pass over all responses prompting the model to identify the 15 most relevant topics expressed across all responses
  - [ ] Do a second pass categorizing the responses using only the set of topics identified in the first pass
- Idea 2:
  - [ ] Determine "inter-rater reliability" by running the responses multiple times for different temperatures/seeds
  - [ ] Find the overlapping topics identified across the runs
  - [ ] Create spreadsheet with results from multiple runs (add a column with run index)