In [None]:
import os, json, requests, yaml
from openai import OpenAI
from datetime import datetime

In [None]:
# Load API key from config file
with open('config.yaml', 'r') as f:
    config = yaml.safe_load(f)
    api_key = config['api_key']

client = OpenAI(api_key=api_key)

In [None]:
NOW = datetime.now()
FORMATTED_TIME = NOW.strftime("%Y-%m-%d_%H-%M-%S")

In [None]:
from pydantic import BaseModel, Field
from typing import Literal, List, Optional

class ChannelBelief(BaseModel):
    channel: Literal["Revenue", "Cost", "Labor"] = Field(
        ...,
        description="Economic channel affected by data or AI use"
    )
    belief: Literal["increase", "decrease", "uncertain"] = Field(
        ...,
        description="Direction of expected effect on the channel"
    )
    confidence: float = Field(
        ...,
        ge=0, le=1,
        description="Subjective confidence level (0–1) in the stated direction"
    )
    magnitude: float = Field(
        ...,
        ge=0, le=1,
        description="Expected strength or intensity (0–1) of the effect"
    )
    explanation: str = Field(
        ...,
        description="Short reasoning (≤25 words) summarizing textual evidence"
    )

In [None]:
def send_webhook_start(time):
    try:
        requests.post("https://ntfy.sh/earningstranscriptextractor27337",
                     data=f"Started extractor at {time}.",
                     timeout=10)
    except Exception as e:
        print(e)

def send_webhook_finish(time):
    try:
        requests.post("https://ntfy.sh/earningstranscriptextractor27337",
                     data=f"Finished extracting data at {time}.",
                     timeout=10)
    except Exception as e:
        print(e)

In [None]:
def get_instruction(channel):
    """Generate channel-specific instruction"""
    channel_descriptions = {
        "Revenue": "Revenue growth",
        "Cost": "Cost efficiency",
        "Labor": "Labor demand"
    }
    
    return f"""You are a **financial analyst** evaluating a firm's disclosure or communication.
Based **only on this passage**, assess how the **firm's use of data or AI** is **expected to influence its *future performance*** — not its current nor past states— specifically through the **{channel_descriptions[channel]}** channel.

Focus on *expectations* or *forward-looking implications* derived from the text
(e.g., management plans, announced initiatives, strategic direction, or investor sentiment).
Ignore purely descriptive mentions of existing data holdings or past performance unless they imply future impact.

**IMPORTANT:** You must provide an analysis specifically for the **{channel}** channel.
"""

def get_prompt(channel):
    """Generate channel-specific prompt"""
    return f"""For the **{channel}** channel, provide an analysis **strictly in the format below**:

Channel: {channel}
Belief: [increase / decrease / uncertain]
Confidence: [0–1]
Magnitude: [0–1]
Explanation (≤ 100 words): [concise reasoning based on how data or AI use is expected to affect future outcomes]

**Guidelines:**
- **Belief** → Directional expectation (e.g., "increase" = expected positive impact on the {channel} channel).
- **Confidence** → How strongly the text supports that direction.
- **Magnitude** → Expected size or intensity of the effect.
- **Explanation** → Key textual evidence linking data/AI use to the {channel} channel's expected change.

**Goal:**
Quantify **investor-style expectations** about how **data and AI as production factors** are expected to affect the firm's *future {channel.lower()} outcomes*.
"""


In [None]:
def extract_belief(transcript, channel):
    """Extract belief for a specific channel"""
    instruction = get_instruction(channel)
    prompt = get_prompt(channel)
    
    response = client.responses.parse(
        model="gpt-5-mini",
        instructions=instruction,
        reasoning={
        "effort": "medium"},
        text={
        "verbosity": "high"},
        input=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "input_text",
                        "text": f"{prompt}\n"
                    },
                    {
                        "type": "input_text",
                        "text": f"\nHere is the transcript:\n{transcript}"
                    }
                ]
            },
        ],
        text_format=ChannelBelief,
    )

    return response.output_parsed

In [None]:
def save_output_json(filename, channels_list):
    """Save all three channel analyses to a single JSON file"""
    output_data = {
        "channels": [channel.model_dump() for channel in channels_list]
    }
    with open(f"output/{filename}_{FORMATTED_TIME}_parsed.json", "w") as f:
        json.dump(output_data, f, indent=4)

In [None]:
folder_path = "transcripts"
output_path = "output"
channels = ["Revenue", "Cost", "Labor"]

send_webhook_start(FORMATTED_TIME)

for filename in os.listdir(folder_path):
    if filename.endswith(".md"):
        file_path = os.path.join(folder_path, filename)

        with open(file_path, "r", encoding="utf-8") as file:
            content = file.read()

        # Call API three times, once for each channel
        channel_results = []
        for channel in channels:
            print(f"Processing {filename} for {channel} channel...")
            result = extract_belief(content, channel)
            channel_results.append(result)
        
        # Save all three channels in one file
        save_output_json(filename, channel_results)
        print(f"Completed {filename}")

finish = datetime.now()
finish_time = NOW.strftime("%Y-%m-%d_%H-%M-%S")
send_webhook_finish(finish_time)