In [11]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
FINNHUB_API_KEY = os.getenv("FINNHUB_API_KEY")

FUNDAMENTALS_CSV = os.getenv("FUNDAMENTALS_CSV")
CHROMA_PERSIST_DIR = os.getenv("CHROMA_PERSIST_DIR")

OPENAI_EMBED_MODEL = "text-embedding-3-large"
OPENAI_CHAT_MODEL = "gpt-4o-mini"
SNAPSHOT_DATE = "2024"

assert OPENAI_API_KEY
assert TAVILY_API_KEY
assert FINNHUB_API_KEY
assert FUNDAMENTALS_CSV
assert CHROMA_PERSIST_DIR

print("CSV:", FUNDAMENTALS_CSV)
print("Chroma:", CHROMA_PERSIST_DIR)


CSV: ../data/raw/fundamentals_2024.csv
Chroma: ../data/indices/chroma


In [12]:
import pandas as pd

df = pd.read_csv(FUNDAMENTALS_CSV)
df.head()


Unnamed: 0.1,Unnamed: 0,Name,Country,Sales,Profit,Assets,Market Value
0,0,JPMorganChase,United States,$252.9 B,$50 B,"$4,090.7 B",$588.1 B
1,1,Berkshire Hathaway,United States,$369 B,$73.4 B,"$1,070 B",$899.1 B
2,2,Saudi Arabian Oil Company (Saudi Aramco),Saudi Arabia,$489.1 B,$116.9 B,$661.5 B,"$1,919.3 B"
3,3,ICBC,China,$223.8 B,$50.4 B,"$6,586 B",$215.2 B
4,4,Bank of America,United States,$183.3 B,$25 B,"$3,273.8 B",$307.3 B


In [13]:
def row_to_company_card(row):
    text = "\n".join([
        f"Company: {row['Name']}",
        f"Country: {row['Country']}",
        "",
        "Financial snapshot (2024):",
        f"Sales: {row['Sales']}",
        f"Profit: {row['Profit']}",
        f"Assets: {row['Assets']}",
        f"Market Value: {row['Market Value']}",
    ])

    metadata = {
        "company": row["Name"],
        "country": row["Country"],
        "snapshot_date": SNAPSHOT_DATE,
        "data_type": "fundamentals",
    }
    
    return text, metadata


In [None]:
import os
import shutil
from pathlib import Path
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

ModuleNotFoundError: No module named 'langchain_community'

In [16]:
assert OPENAI_API_KEY is not None, "OPENAI_API_KEY is None"


In [1]:
import openai

In [2]:
print("OpenAI version:", openai.__version__)

OpenAI version: 1.52.0


In [None]:
print("OpenAI version:", langchain_openai.__version__)

In [2]:
import langchain_openai

In [None]:


def build_chroma_from_company_cards(
    texts,
    metadatas,
    chroma_dir,
    collection_name="company_cards",
    rebuild=False,
    embed_model="text-embedding-3-large",
    openai_api_key=None,
):
    if not openai_api_key:
        raise ValueError("OPENAI_API_KEY is missing")

    p = Path(chroma_dir)

    if rebuild and p.exists():
        shutil.rmtree(p)

    p.mkdir(parents=True, exist_ok=True)

    emb = OpenAIEmbeddings(api_key=openai_api_key, model=embed_model)

    vs = Chroma(
        collection_name=collection_name,
        persist_directory=str(p),
        embedding_function=emb,
    )

    if texts:
        ids = [f"company::{m.get('company','').strip()}" for m in metadatas]
        vs.add_texts(texts=texts, metadatas=metadatas, ids=ids)

    vs.persist()
    return vs


  from .autonotebook import tqdm as notebook_tqdm


Error importing huggingface_hub.hf_api: 'NoneType' object is not subscriptable


TypeError: 'NoneType' object is not subscriptable

In [None]:
# from langchain_openai import OpenAIEmbeddings
# from langchain_community.vectorstores import Chroma

# embeddings = OpenAIEmbeddings(
#     api_key=OPENAI_API_KEY,
#     model=OPENAI_EMBED_MODEL
# )

# vectorstore = Chroma(
#     collection_name="company_cards",
#     embedding_function=embeddings,
#     persist_directory=CHROMA_PERSIST_DIR
# )

# docs = [row_to_company_card(r) for _, r in df.iterrows()]
# texts = [t for t, _ in docs]
# metadatas = [m for _, m in docs]

# vectorstore.add_texts(texts=texts, metadatas=metadatas)
# vectorstore.persist()

# print("Inserted:", len(texts))


In [1]:
import numpy as np

In [4]:
import yfinance as yf

In [5]:


# Create a Ticker object for the desired stock
aapl = yf.Ticker("AAPL")

# Get historical market data for a specific period (e.g., the last year)
hist = aapl.history(period="1y")

# Print the data (which is a pandas DataFrame)
print(hist.tail())


                                 Open        High         Low       Close  \
Date                                                                        
2025-12-19 00:00:00-05:00  272.149994  274.600006  269.899994  273.670013   
2025-12-22 00:00:00-05:00  272.859985  273.880005  270.510010  270.970001   
2025-12-23 00:00:00-05:00  270.839996  272.500000  269.559998  272.359985   
2025-12-24 00:00:00-05:00  272.339996  275.429993  272.200012  273.809998   
2025-12-26 00:00:00-05:00  274.160004  275.369995  272.859985  273.399994   

                              Volume  Dividends  Stock Splits  
Date                                                           
2025-12-19 00:00:00-05:00  144632000        0.0           0.0  
2025-12-22 00:00:00-05:00   36571800        0.0           0.0  
2025-12-23 00:00:00-05:00   29642000        0.0           0.0  
2025-12-24 00:00:00-05:00   17910600        0.0           0.0  
2025-12-26 00:00:00-05:00   21455300        0.0           0.0  


In [7]:
hist.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-12-19 00:00:00-05:00,272.149994,274.600006,269.899994,273.670013,144632000,0.0,0.0
2025-12-22 00:00:00-05:00,272.859985,273.880005,270.51001,270.970001,36571800,0.0,0.0
2025-12-23 00:00:00-05:00,270.839996,272.5,269.559998,272.359985,29642000,0.0,0.0
2025-12-24 00:00:00-05:00,272.339996,275.429993,272.200012,273.809998,17910600,0.0,0.0
2025-12-26 00:00:00-05:00,274.160004,275.369995,272.859985,273.399994,21455300,0.0,0.0


In [9]:
from tavily import TavilyClient

In [10]:
tavily_client = TavilyClient(api_key="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")
response = tavily_client.search("Who is Leo Messi?")

print(response)

{'query': 'Who is Leo Messi?', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'url': 'https://www.imdb.com/name/nm2177779/', 'title': 'Lionel Messi - IMDb', 'content': "# Lionel Messi. Lionel Messi is a football player from Argentina who plays for Inter Miami. He has won the Ballon D'Or, the annual award given to the best player in the world, 8 times, 2022 FIFA World Cup winner and an Olympic gold medal winner in 2008. Faced with mounting medical expenses to treat a growth hormone condition, Messi's family accepted an offer to move the 13-year-old prodigy to FC Barcelona, who would pay for his treatment. Messi has gone on to become one of the most decorated players in football history and has broken countless records for his club and his country. ### Videos18. [Trailer Season 1 [OV]](/video/vi950781977/? Won the gold medal with Argentina Olympic Football Team at the 2008 Olympic Games. Sometimes you have to accept you can't win all the time. * How old is Lionel