## Imports

In [5]:
from openai import OpenAI


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests


import os 
from dotenv import load_dotenv
parent_dir = os.path.dirname(os.getcwd())
load_dotenv(os.path.join(parent_dir, '.env'))


PPLX_API_KEY = os.getenv("PPLX_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=OPENAI_API_KEY)

from pydantic import BaseModel, Field
import json



In [None]:
response = client.responses.create(
    model="gpt-4o-mini",
    tools=[{ "type": "web_search_preview" }],
    input="What was a positive news story from today?",
)

print(response)


In [None]:
print(response.output_text)
print(response.output[1].content[0].annotations)

## Example

In [None]:
messages = [
    {
        "role": "system",
        "content": (
            "You are an artificial intelligence assistant and you need to "
            "engage in a helpful, detailed, polite conversation with a user."
        ),
    },
    {   
        "role": "user",
        "content": (
            "How many stars are in the universe?"
        ),
    },
]

response = client.responses.create(
    model="gpt-4o-mini",
    tools=[{ "type": "web_search_preview" }],
    input=messages,
)
print(response.output_text)
print(response.output[1].content[0].annotations)


## Structured output example

In [43]:
class Timeline(BaseModel):
    year: list[str] = Field(..., description="List of years (YYYY format) corresponding to each event.")
    event_description: list[str] = Field(..., description="List of concise descriptions for each corresponding event.")


system_prompt = """
You are an expert analyst specializing in the public perception and ethical conduct of public figures. Your role is to process information and present it according to strict guidelines.

**Rules for Response Generation:**
1.  **Format Adherence:** Your final output MUST be a JSON object that strictly conforms to the provided `Timeline` JSON schema. Do not include any text before or after the JSON object.
2.  **Chronological Order:** Ensure all events listed in the timeline are presented in strict chronological order, from the earliest year to the most recent.
3.  **Year Formatting:** All entries in the `year` list MUST be strings in 'YYYY' format.
4.  **List Correspondence:** The `year` list and the `event_description` list MUST have the exact same number of elements, with each year directly corresponding to the event description at the same index.
5.  **Handling Multiple Events per Year:** If multiple distinct, significant events occurred in the same year, create separate entries for each. Repeat the year in the `year` list for each corresponding event description. Order these same-year events chronologically if possible.
6.  **Tone and Neutrality:** Descriptions in `event_description` must be factual summaries. When mentioning public reaction, report *on* the reaction neutrally (e.g., "led to public criticism," "was praised by advocacy groups") without adding your own judgment or opinion. Maintain a neutral, analytical tone throughout."""


celebrity_name = "Elon Musk"

user_prompt = f"""
Generate a timeline for the public figure: **{celebrity_name}**.

**Focus:** The timeline MUST concentrate on events crucial for assessing the individual's ethical conduct and public perception. Gather key data points reflecting their ethical track record and public image.

**Content Requirements (Prioritize these):**
1.  **Ethically Significant Events:** Include:
    *   Major public controversies, scandals, accusations of unethical behavior, policy violations, or legal issues with clear ethical dimensions (e.g., fraud, discrimination, harassment, misleading conduct).
    *   Notable positive contributions, acts of philanthropy, demonstrated ethical leadership, activism, or significant charitable work that garnered public attention.
    *   Significant public stances or statements on sensitive social, political, or ethical issues that notably shaped public opinion.
    *   Business practices or professional conduct highlighted publicly as particularly ethical or unethical.
2.  **Public Reaction:** For each event description, **briefly summarize the notable public reaction or shift in perception** (e.g., "...sparking widespread outrage," "...earning accolades from X group," "...leading to debate," "...resulting in calls for accountability," "...generally well-received").
3.  **Source Reliability:** Base the timeline *only* on events widely reported in reliable, publicly accessible sources. Avoid unverified rumors or obscure information.
4.  **Relevance Filter:** De-emphasize routine career milestones (like album releases, movie roles) *unless* they directly involved a major ethical controversy, ethical stand, or significantly altered public perception in an ethical context.
5.  **Scope:** Focus on the *most impactful* events related to ethics and reputation, prioritizing those that generated significant public discussion or media coverage. Aim for a comprehensive overview of these key moments.
6.  **Event Description:** Ensure each `event_description` factually summarizes the core event, its ethical dimension/significance, and the associated public reaction (as noted in point 2), keeping descriptions concise (approx. 1-3 sentences).

Provide the timeline strictly adhering to the JSON schema format specified in the system instructions.
"""


messages = [
    {
        "role": "system",
        "content": (
           f""" {system_prompt}
"""
        ),
    },
    {   
        "role": "user",
        "content": (
            f"{user_prompt}"
        ),
    },
]

text={
        "format": {
            "type": "json_schema",
            "name": "Timeline",
            "schema": {
                "type": "object",
                "properties": {
                    "Year": {
                        "type": "list"
                    },
                    "date": {
                        "type": "string"
                    },
                    "participants": {
                        "type": "array", 
                        "items": {
                            "type": "string"
                        }
                    },
                },
                "required": ["name", "date", "participants"],
                "additionalProperties": False
            },
            "strict": True
        }
    }


response = client.responses.create(
    model="gpt-4o-mini",
    tools=[{ "type": "web_search_preview" }],
    input=messages,
    response_format={ "type": "json_schema", "json_schema": {Timeline} }
)

print(response.output_text)
print(response.output[1].content[0].annotations)


ValueError: Currently only `function` tool types support auto-parsing; Received `web_search_preview`

In [99]:
timeline_json_string = response.choices[0].message.content
# print(timeline_json_string) # Optional: print raw JSON string

# Validate with Pydantic
timeline_data = Timeline.parse_raw(timeline_json_string)
print("Timeline generated successfully:")
for year, event in zip(timeline_data.year, timeline_data.event_description):
    print(f"- {year}: {event}")


ValidationError: 1 validation error for Timeline
__root__
  Unterminated string starting at: line 1 column 401 (char 400) [type=value_error.jsondecode, input_value='{"year":["2018",">2018",...018",">2018",">2018",">', input_type=str]

In [88]:
def extract_year(unclean_year):
    """
    Extracts the year from the unclean year string.
    """
    if isinstance(unclean_year, str):
        # Remove any non-digit characters
        clean_year = ''.join(filter(str.isdigit, unclean_year))
        return int(clean_year)
    else:
        return unclean_year
    

def markdown_to_txt(markdown):
    """
    Converts markdown to plain text.
    """
    # Remove markdown formatting
    clean_text = markdown.replace('#', '').replace('*', '').replace('**', '')
    return clean_text.strip()


years = [extract_year(i) for i in eval(response.choices[0].message.content)['year']]
events = [markdown_to_txt(i) for i in eval(response.choices[0].message.content)['event_description']]

min_length = min(len(years), len(events))
years = years[:min_length]
events = events[:min_length]

timeline = dict(zip(years, events))
timeline

SyntaxError: unterminated string literal (detected at line 1) (<string>, line 1)