# Playground

## Import libraries

In [1]:
import getpass
import os
from langchain.chat_models import init_chat_model
from langchain_core.messages import HumanMessage, SystemMessage
from dotenv import load_dotenv

from agent import clean_html_for_llm, parse_repertoires_from_page, extract_links_from_page, identify_page_and_get_repertoire_links, get_repertoire_links


load_dotenv();

## Test api connection

In [24]:
if not os.environ.get("TOGETHER_API_KEY"):
    os.environ["TOGETHER_API_KEY"] = getpass.getpass("Enter API key for Together AI: ")

# meta-llama/Llama-3.3-70B-Instruct-Turbo-Free   inputs` tokens + `max_new_tokens` must be <= 8193
# Qwen/Qwen2.5-Coder-32B-Instruct                inputs` tokens + `max_new_tokens` must be <= 32769
# meta-llama/Llama-3.2-3B-Instruct-Turbo         gives extra text
model = init_chat_model("Qwen/Qwen2.5-Coder-32B-Instruct", model_provider="together")

In [25]:
messages = [
    SystemMessage("Translate the following from English into Italian"),
    HumanMessage("hi!")
]

model.invoke(messages)

AIMessage(content='Ciao!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 4, 'prompt_tokens': 22, 'total_tokens': 26, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'Qwen/Qwen2.5-Coder-32B-Instruct', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-898e903f-638a-43aa-b9cc-54c7da28a621-0', usage_metadata={'input_tokens': 22, 'output_tokens': 4, 'total_tokens': 26, 'input_token_details': {}, 'output_token_details': {}})

In [14]:
url = "https://teatrdramatyczny.pl/"
content = clean_html_for_llm(url)
links = extract_links_from_page(content, url)
is_repertoire_page = identify_page_and_get_repertoire_links(content, links, model)
# repertoires = parse_repertoires_from_page(content, model)

In [None]:
theatres = [
    {
        "url": "https://teatrdramatyczny.pl/",
        "name": "teatrdramatyczny"
    },
    {
        "url": "https://www.teatr2strefa.pl",
        "name": "teatr2strefa"
    },
    {
        "url": "https://teatrstudio.pl",
        "name": "teatrstudio"
    },
]

for theater in theatres:
    theatre_url = theater["url"]
    print(f"Parsing theatre {theater['name']}, url {theatre_url}")
    content = clean_html_for_llm(theatre_url)
    links = extract_links_from_page(content, theatre_url)
    repertoire_links = get_repertoire_links(links, model)

    if len(repertoire_links) > 0:
        first_url = repertoire_links[0]
        if first_url["confidence"] >= 0.7:
            url = first_url['url']
        else:
            url = theatre_url
        print(f"Repertoire link {url}")
        content = clean_html_for_llm(url)   
        performances = parse_repertoires_from_page(content, model)
        with open(f"temp/{theater["name"]}.json", "w") as file:
            file.write(performances)


In [52]:
content = clean_html_for_llm("https://teatrstudio.pl/pl/repertuar")   
performances = parse_repertoires_from_page(content, model)

[
    {
        "title": "Magazyn „Pismo” w Studio #25",
        "date": "2025-04-02",
        "time": "18:00",
        "status": "DEBATA",
        "place": "DUŻA SCENA"
    },
    {
        "title": "Ekspedycja: Burza",
        "date": "2025-04-03",
        "time": "19:00",
        "status": "SPEKTAKL",
        "place": "DUŻA SCENA"
    },
    {
        "title": "Metafizyka dwugłowego cielęcia",
        "date": "2025-04-05",
        "time": "19:00",
        "status": "SPEKTAKL",
        "place": "DUŻA SCENA"
    },
    {
        "title": "Metafizyka dwugłowego cielęcia",
        "date": "2025-04-06",
        "time": "16:00",
        "status": "SPEKTAKL",
        "place": "DUŻA SCENA"
    },
    {
        "title": "Metafizyka dwugłowego cielęcia",
        "date": "2025-04-06",
        "time": "19:30",
        "status": "SPEKTAKL",
        "place": "DUŻA SCENA"
    },
    {
        "title": "Ekspedycja: Burza",
        "date": "2025-04-08",
        "time": "19:00",
        "status": "SP