# Passing PRAW output to LLM

In [19]:
from openai import AzureOpenAI
from pydantic_settings import BaseSettings, SettingsConfigDict
from loguru import logger
import json
from typing import List
from concurrent.futures import ThreadPoolExecutor, as_completed

In [None]:
class Settings(BaseSettings):
    model_config = SettingsConfigDict(env_prefix="azure_openai_")
    api_key : str
    endpoint : str
    version : str
    model: str = "gpt-4o"
    timeout_seconds: int = 120

In [4]:
cfg = Settings()

In [7]:
prompt_tmpl = \
"""
You have been provided with the entire text of a reddit post under <<<<REDDIT POST>>>>.
This reddit post was retrieved when a user made the following google query: {query}
In the query, the user is seeking solutions to a problem they have or wants to get advice to make the best decision.
Given the context of the query, summarize the advice and solutions that people are saying in the thread.
OUTPUT THE SUMMARY BETWEEN <<<<SUMMARY>>>> and <<<</SUMMARY>>>>

After outputting the summary, output a python list of key words that can be verbs or nouns, which are solutions to the problem or advice provided in the thread.
For example, if the user has queried "Migraine relief", the list would be something like ["exedrin", "Magnesium Glycinate", "ibuprofen", "earplugs", "stretching", ...].
As another example, if the user has queried "Europe trip planning", the list would be something like ["Italy", "Budapest", "Barcelona", "Netherlands", ...]
YOU MUST ONLY USE WORDS THAT EXIST IN THE TEXT AS THEY ARE. OUTPUT THE LIST OF WORDS BETWEEN <<<<KEYWORDS>>>> and <<<</KEYWORDS>>>>

Follow the following chain of thought:
1. Identify the solutions or suggestions that are central to the discussion 
2. Determine the consensus about these solutions or suggestions. Is it good, or is it bad?
3. Identify unique solutions or suggestions that are not an integral part of the discussion, but could be worth trying out or exploring.
4. Provide a medium-length summary of the discussion, including the pros and cons of some of the discussed solutions or suggestions.
5. Output the key word list including the discussed solutions and suggestions.

Below are examples of the output that is expected:

{example_output}
"""

In [13]:
migraine_example = \
"""
OUTPUT FOR THE QUERY "Migraine relief":

<<<<SUMMARY>>>>
1. Central Suggestions
The central strategies revolve around combinations of over-the-counter (OTC) medications, prescription drugs, and physical remedies:

- Medications: Common drugs include triptans (e.g., Sumatriptan), NSAIDs (e.g., Ibuprofen, Naproxen), and acetaminophen (e.g., Tylenol). Other popular medications are Nurtec, Fioricet, and Benadryl for sedation or nausea relief.
- Topical Treatments: Products like Aspercreme with 4% lidocaine and CBD ointments are frequently recommended for numbing facial pain.
- Physical Remedies: Ice packs, heating pads, cold showers, and migraine caps are widely used for physical relief. Compression (tight headbands or scarves) and darkness are also key.
- Dietary and Hydration Support: Electrolytes, water, and caffeine (in moderation) are emphasized as essential aids.

2. Consensus
The community consensus is largely positive about these methods:

- Aspercreme is hailed as a "game-changer," with numerous users praising its immediate effectiveness and ease of application.
- Triptans and combination therapies (e.g., pairing NSAIDs with caffeine) receive widespread support for their efficacy.
- OTC remedies like Excedrin and strategies like using cold compresses or dark rooms are universally well-regarded.
- Unique remedies such as green light therapy and meditation also receive cautious optimism, often noted as subjective but potentially helpful.

3. Unique and Emerging Suggestions
Several lesser-discussed but intriguing approaches surfaced:

- Shockwave Therapy: A user reported significant relief after this treatment, suggesting its potential for muscle-tension-related migraines.
- Green Light Therapy: Some users experimented with green light filters or lamps, noting reduced migraine intensity, although effects were not universally confirmed.
- Dietary Additions: Specific items like fresh lemonade or magnesium supplements were mentioned as beneficial, with anecdotal evidence of effectiveness.
- Singing Bowls and ASMR: While polarizing, some users found these methods helpful for relaxation during migraines.
<<<</SUMMARY>>>>

<<<<KEYWORDS>>>>
["Sumatriptan", "Ibuprofen", "Naproxen", "Acetaminophen", "Tylenol",
"Excedrin", "Benadryl", "Nurtec", "Fioricet", "Ubrelvy", "Zofran",
"Ondansetron", "Promethazine", "Diclofenac", "Amitriptyline", 
"Rizatriptan", "Naratriptan", "Tramadol", "Aspirin", "Cambia",
"Propranolol", "Anarex", "Dexamethasone", "Meclizine", "Elatriptan",
"Flexeril", "Hydroxyzine", "Xanax", "Omeprazole", "BC Powder",
"Aspercreme", "CBD ointment", "Tiger Balm", "Salonpas", "Lidocaine patches",
"Migraine cap", "Ice packs", "Heating pads", "Compression headbands",
"Dark room", "Green light therapy", "Electrolytes", "Caffeine",
"Ginger chews", "Gua Sha", "Meditation", "ASMR", "Singing bowls",
"Fresh lemonade", "Magnesium supplements", "Vitamin D", "Pedialyte",
"Gatorade", "Cold showers", "Shockwave therapy", "Cefaly device",
"Avulux glasses", "Allay lamp", "McDonald's fries", "Rice sock",
"Motion sickness pills", "Energy drinks", "Sleep"]
<<<</KEYWORDS>>>>
"""

In [14]:
examples = [migraine_example, migraine_example]

In [15]:
print("".join(examples))


OUTPUT FOR THE QUERY "Migraine relief":

<<<<SUMMARY>>>>
1. Central Suggestions
The central strategies revolve around combinations of over-the-counter (OTC) medications, prescription drugs, and physical remedies:

- Medications: Common drugs include triptans (e.g., Sumatriptan), NSAIDs (e.g., Ibuprofen, Naproxen), and acetaminophen (e.g., Tylenol). Other popular medications are Nurtec, Fioricet, and Benadryl for sedation or nausea relief.
- Topical Treatments: Products like Aspercreme with 4% lidocaine and CBD ointments are frequently recommended for numbing facial pain.
- Physical Remedies: Ice packs, heating pads, cold showers, and migraine caps are widely used for physical relief. Compression (tight headbands or scarves) and darkness are also key.
- Dietary and Hydration Support: Electrolytes, water, and caffeine (in moderation) are emphasized as essential aids.

2. Consensus
The community consensus is largely positive about these methods:

- Aspercreme is hailed as a "game-changer

In [None]:
system_prompt = \
"""
You are a reddit post summarization agent, with the objective of providing a well-rounded yet informative summary to the user
"""

In [16]:
def find_text_in_between_tags(text, start_tag, end_tag):
    start_pos = text.find(start_tag)
    end_pos = text.find(end_tag)
    text_between_tags = text[start_pos + len(start_tag):end_pos]
    return text_between_tags

In [None]:
class Summarizer():
    def __init__(self,
                 openai_api_key: str = cfg.api_key,
                 openai_endpoint: str = cfg.endpoint,
                 openai_model: str = cfg.model,
                 openai_version: str = cfg.version,
                 timeout_seconds: int = cfg.timeout_seconds,
                 temperature: float = 0.0,
                 retries: int = 1
                 ):
        
        self.openai_client = AzureOpenAI(
            api_key=openai_api_key,
            azure_endpoint=openai_endpoint,
            api_version=cfg.openai_version
        )
        self.openai_model = openai_model
        self.temperature = temperature
        self.retries = retries
        self.timeout_seconds = timeout_seconds
    
    def _summarize_post(self, 
                        query: str,
                        post_str: str):
        
        llm_prompt = prompt_tmpl.format(query=query,
                                        example_output="".join(examples)
                                        )
        
        call_tries = 0
        while call_tries <= self.retries:
            try:
                response = self.openai_client.chat.completions.create(
                                model=self.openai_model,
                                messages=[
                                    {"role": "system", "content": system_prompt},
                                    {"role": "user", "content": llm_prompt}
                                ],
                                timeout=self.timeout_seconds,
                                temperature=self.temperature
                )

                llm_output = response.choices[0].message.content
                summary = find_text_in_between_tags(llm_output, "<<<<SUMMARY>>>>", "<<<</SUMMARY>>>>")
                keywords = json.loads(
                                find_text_in_between_tags(
                                                        llm_output, 
                                                        "<<<<KEYWORDS>>>>", 
                                                        "<<<</KEYWORDS>>>>"
                                                        )
                                    )
                
                return {"summary" : summary, "keywords": keywords}
            
            except Exception as e:
                call_tries += 1
                logger.error(f"Failed to call LLM: {e}. Retrying {call_tries}/{self.retries}")

        return {"error": "LLM call failed", "details": str(e)}
    
    def summarize_posts(self, query: str, 
                        post_strings: List[str], 
                        max_workers: int = 5):
        
        all_summaries, all_keywords = [], set()

        # Create a ThreadPoolExecutor with the specified number of workers
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            # Submit tasks to the executor
            future_to_post = {
                executor.submit(self._summarize_post, query, post): post
                for post in post_strings
            }

            # Collect results as they are completed
            for future in as_completed(future_to_post):
                post_str = future_to_post[future]
                try:
                    llm_output = future.result()
                    if ("summary" in llm_output) and ("keywords" in llm_output):
                        all_summaries.append(llm_output["summary"])
                        all_keywords.update(llm_output["keywords"])
                except Exception as e:
                    logger.error(f"Error processing post: {e}")

        return all_summaries, all_keywords
            