## Imports

In [None]:
from openai import OpenAI


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests


import os 
from dotenv import load_dotenv
parent_dir = os.path.dirname(os.getcwd())
load_dotenv(os.path.join(parent_dir, '.env'))


PPLX_API_KEY = os.getenv("PPLX_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=PPLX_API_KEY, base_url="https://api.perplexity.ai")

from pydantic import BaseModel, Field
import json



In [None]:
response = client.responses.create(
    model="gpt-4o-mini",
    tools=[{ "type": "web_search_preview" }],
    input="What was a positive news story from today?",
)

print(response)


In [None]:
print(response.output_text)
print(response.output[1].content[0].annotations)

## Example

In [None]:
messages = [
    {
        "role": "system",
        "content": (
            "You are an artificial intelligence assistant and you need to "
            "engage in a helpful, detailed, polite conversation with a user."
        ),
    },
    {   
        "role": "user",
        "content": (
            "How many stars are in the universe?"
        ),
    },
]

response = client.responses.create(
    model="gpt-4o-mini",
    tools=[{ "type": "web_search_preview" }],
    input=messages,
)
print(response.output_text)
print(response.output[1].content[0].annotations)


## Structured output example

In [3]:
class Timeline(BaseModel):
    year: list[str] = Field(..., description="List of years (YYYY format) corresponding to each event.")
    event_description: list[str] = Field(..., description="List of concise descriptions for each corresponding event.")


system_prompt = """
You are an expert analyst specializing in tracking and summarizing the **public perception and sentiment** surrounding public figures. Your role is to process information and present it according to strict guidelines.

Rules for Response Generation:

Format Adherence: Your final output MUST be a JSON object that strictly conforms to the provided Timeline JSON schema. Do not include any text before or after the JSON object.

Chronological Order: Ensure all events listed in the timeline are presented in strict chronological order, from the earliest year to the most recent.

Year Formatting: All entries in the year list MUST be strings in 'YYYY' format.

List Correspondence: The year list and the event_description list MUST have the exact same number of elements, with each year directly corresponding to the event description at the same index.

Handling Multiple Events per Year: If multiple distinct, significant events occurred in the same year, create separate entries for each. Repeat the year in the year list for each corresponding event description. Order these same-year events chronologically if possible.

Tone and Neutrality: Descriptions in event_description must be factual summaries. When mentioning public reaction or sentiment, report on it neutrally (e.g., "led to public criticism," "was praised by advocacy groups," 
"generated polarized opinions") without adding your own judgment or opinion. Maintain a neutral, analytical tone throughout."""


celebrity_name = "Elon Musk"

user_prompt = f"""
Generate a timeline for the public figure: {celebrity_name}.

Focus: The timeline MUST concentrate on events that significantly **shaped or reflected public sentiment** towards the individual. Gather key data points showing shifts and trends in **public perception and how the public felt** about them.

Content Requirements (Prioritize these):

Events Impacting Public Opinion: Include:

Major public controversies, scandals, or legal issues that generated significant **public reaction or media attention regarding the individual's public image**.

Significant achievements, actions, or statements that garnered **widespread public praise, admiration, or positive sentiment**.

Notable public stances or statements on sensitive social or political issues that **strongly influenced how the public viewed them**.

Business decisions, product launches, or professional conduct that became focal points for **public discussion, debate, or changes in sentiment** (positive or negative).

Public Sentiment Focus: For each event description, clearly summarize the **nature of the public reaction or the prevailing sentiment** (e.g., "...sparking widespread criticism," "...met with broad public approval," "...leading to polarized opinions," "...resulting in increased public support," "...drawing concern from advocacy groups"). This sentiment is **key** to the event's inclusion.

Source Reliability: Base the timeline only on events widely reported in reliable, publicly accessible sources. Avoid unverified rumors or obscure information.

Relevance Filter: De-emphasize routine career milestones (like company updates, minor product tweaks) unless they directly **triggered a significant shift in public sentiment, widespread media commentary about the individual's image, or major public debate**.

Scope: Focus on the most impactful events related to **public perception and reputation**, prioritizing those that generated significant public discussion or measurable shifts in sentiment. Aim for a comprehensive overview of these key moments.

Event Description: Ensure each event_description factually summarizes the core event and **crucially describes the associated public sentiment or reaction**, keeping descriptions concise (approx. 1-3 sentences).

Provide the timeline strictly adhering to the JSON schema format specified in the system instructions.
"""


messages = [
    {
        "role": "system",
        "content": (
           f""" {system_prompt}
"""
        ),
    },
    {   
        "role": "user",
        "content": (
            f"{user_prompt}"
        ),
    },
]


response = client.chat.completions.create(
    model="sonar",
    messages=messages,
    response_format =  {
		    "type": "json_schema",
        "json_schema": {"schema": Timeline.model_json_schema()},
    },
    max_tokens=2000,
)




NotFoundError: Error code: 404 - {'error': {'message': 'The model `sonar` does not exist or you do not have access to it.', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}

In [99]:
timeline_json_string = response.choices[0].message.content
# print(timeline_json_string) # Optional: print raw JSON string

# Validate with Pydantic
timeline_data = Timeline.parse_raw(timeline_json_string)
print("Timeline generated successfully:")
for year, event in zip(timeline_data.year, timeline_data.event_description):
    print(f"- {year}: {event}")


ValidationError: 1 validation error for Timeline
__root__
  Unterminated string starting at: line 1 column 401 (char 400) [type=value_error.jsondecode, input_value='{"year":["2018",">2018",...018",">2018",">2018",">', input_type=str]

In [88]:
def extract_year(unclean_year):
    """
    Extracts the year from the unclean year string.
    """
    if isinstance(unclean_year, str):
        # Remove any non-digit characters
        clean_year = ''.join(filter(str.isdigit, unclean_year))
        return int(clean_year)
    else:
        return unclean_year
    

def markdown_to_txt(markdown):
    """
    Converts markdown to plain text.
    """
    # Remove markdown formatting
    clean_text = markdown.replace('#', '').replace('*', '').replace('**', '')
    return clean_text.strip()


years = [extract_year(i) for i in eval(response.choices[0].message.content)['year']]
events = [markdown_to_txt(i) for i in eval(response.choices[0].message.content)['event_description']]

min_length = min(len(years), len(events))
years = years[:min_length]
events = events[:min_length]

timeline = dict(zip(years, events))
timeline

SyntaxError: unterminated string literal (detected at line 1) (<string>, line 1)