In [35]:
# %load_ext autoreload
# %autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
import sys
import os
from googleapiclient.discovery import build
from typing import List, Literal, Optional
from pydantic import BaseModel, Field
from IPython.display import JSON

import nest_asyncio
nest_asyncio.apply()

sys.path.append(os.path.abspath(".."))
from src import YouTubeHelper, llm_client, text_to_speech, io_manager, schemas

In [2]:
# import importlib
# importlib.reload(src.io_manager)

In [3]:
class StockMetric(BaseModel):
    label: str
    value: str

class StockEntry(BaseModel):
    ticker: str
    company_name: str
    sentiment: Literal["Bullish", "Bearish", "Neutral"]
    thesis: str
    sources: List[str]
    metrics: List[StockMetric]
    voiceover_script: Optional[str] = None
    # voiceover_script: Optional[str] = Field(default=None, exclude=True)

class MarketReport(BaseModel):
    report_title: str
    stocks: List[StockEntry]


In [19]:
with open("../reports/2026-02-06_transcripts.txt", 'r', encoding='utf-8') as file:
    transcript = file.read()

with open("../prompts/combined_transcript_summary.md", 'r', encoding='utf-8') as file:
    system_prompt = file.read()

with open("../prompts/generate_transcript_for_ticker.md", 'r', encoding='utf-8') as file:
    ticker_prompt_template = file.read()

In [20]:
# MarketReport.model_json_schema()
# system_prompt

In [21]:
# ai_schema = MarketReport.model_json_schema()
# ai_schema['$defs']['StockEntry']['properties'].pop('voiceover_script', None)
# ai_schema

llm = llm_client.LLMClient(provider="google")
json_report = llm.chat(system_prompt=system_prompt, user_message=transcript, response_schema=MarketReport) 


In [22]:
display(JSON(json_report.model_dump()))
# json_report

<IPython.core.display.JSON object>

In [23]:
def generate_all_scripts(report: MarketReport):
    print(f"Generating scripts for {len(report.stocks)} stocks...")
    
    for stock in report.stocks:
        script_prompt = io_manager.load_formated_stock_prompt(stock)
        # print(script_prompt)
        llm = llm_client.LLMClient(provider="google")

        script_text = llm.chat(
            system_prompt="You are a financial scriptwriter.",
            user_message=script_prompt
        )
        
        # 3. Store it back in the object
        stock.voiceover_script = script_text.strip()
        # text_to_speech.text_to_voice()

    return report

# generate_all_scripts(json_report)
json_report_full = generate_all_scripts(json_report)

Generating scripts for 3 stocks...
-> Processing HIMS...
-> Processing NVO...
-> Processing LLY...


In [25]:
display(JSON(json_report_full.model_dump()))

<IPython.core.display.JSON object>

In [29]:
# combine subtranscripts
# generate voiceover
prompt = """
Role: Financial Video Scriptwriter
Task: Write a segmented YouTube script. Each segment must be roughly 30 seconds (70-75 words).

REPORT: {report_title}
DATA: {full_json_dump}

CONSTRAINTS FOR EVERY SEGMENT:
- Word Count: 70-75 words.
- Structure: 4-5 punchy sentences.
- Tone: Professional and fast-paced.
- Content: Mention the Ticker, Sentiment, and at least one key Metric.

OUTPUT FORMAT:
Return a list of segments.
1. [INTRO]: Hook the viewer on the overall theme.
2. [STOCK TICKER]: Focus on the thesis and data.
... (repeat for each stock)
3. [OUTRO]: Summary and call to action.
"""

final_prompt = prompt.format(
    report_title=json_report_full.report_title,
    full_json_dump=json_report_full.model_dump_json(indent=2)
)

ft = llm.chat(
            system_prompt="You are a financial scriptwriter.",
            user_message=final_prompt
        )
ft

"1. [INTRO]\nThe weight loss market is undergoing a massive shift as GLP-1 compounding pharmacies challenge pharmaceutical giants. New legal battles are redefining how semaglutide is distributed, creating massive volatility for key players in the space. We are analyzing the disruptive power of compounding laws and the potential patent risks facing industry leaders. Stay tuned as we break down the high-conviction moves and legal precedents shaping the future of this multibillion-dollar healthcare sector.\n\n2. [HIMS]\nHIMS is a disruptive force with a strong Bullish sentiment as it scales compounded oral semaglutide. By utilizing liposomal technology, they successfully navigate patent hurdles while offering a compelling ninety-nine dollar ongoing monthly price point. The company is weaponized with a one billion dollar cash reserve and a board stacked with former FDA officials. This strategic leverage of 503A and 503b compounding laws positions Hims and Hers as a high-growth leader in we

In [28]:
json_report_full.report_title

'State of the Market: Disruptive GLP-1 Compounding and Pharmaceutical Legal Battles'

In [17]:
from src import utils
def save_json(report, filename):
    json_data = report.model_dump_json(indent=2)
    
    # 3. Write the file
    with open(utils.PROJECT_ROOT/ "reports" / filename, "w", encoding="utf-8") as f:
        f.write(json_data)
save_json(json_report_full, "test.json")
# io_manager.save_json(json_report_full, "test.json")