# StockLLM + FinSeer local test

This notebook validates local inference using our schema wrappers.

- Build a tiny OHLCV dataset
- Create `QueryBasic` and a few `Candidate` objects
- Encode with FinSeer and retrieve top-k (naive cosine here for demo)
- Prompt StockLLM and parse JSON result


In [2]:
# If running in a clean environment, install requirements
# Uncomment if needed
# %pip install --quiet transformers accelerate sentencepiece faiss-cpu pydantic pandas
# %pip install --quiet bitsandbytes talib-binary --extra-index-url https://pypi.org/simple

from datetime import date, timedelta
import numpy as np
import pandas as pd

from src.schemas import (
    QueryBasic, QueryTechnical, Candidate, IndicatorSeries, Movement, TimeFrame
)
from src.services import FinSeerEmbedder, FinSeerConfig, StockLLMGenerator, StockLLMConfig

# Build a tiny MultiIndex OHLCV DataFrame for 2 symbols
symbols = ["AAPL", "MSFT"]
start = pd.Timestamp("2024-01-02")
idx = []
rows = []
for sym in symbols:
    dates = pd.date_range(start, periods=10, freq="B")
    for i, d in enumerate(dates):
        idx.append((sym, d))
        price = 100 + i + (1 if sym == "AAPL" else -1) * 0.5 * i
        rows.append({
            "open": price - 0.2,
            "high": price + 0.5,
            "low": price - 0.5,
            "close": price,
            "adjusted_close": price,  # simplify
            "volume": 1_000_000 + 1000 * i,
        })

df = pd.DataFrame(rows, index=pd.MultiIndex.from_tuples(idx, names=["symbol", "timestamp"]))

# Build a QueryBasic for AAPL as of the last day
as_of = (df.loc["AAPL"].index.max() + pd.Timedelta(days=1)).date()
qb = QueryBasic.from_dataframe("AAPL", df, as_of=as_of, lookback=5, timeframe=TimeFrame.day)
print("Query JSON:", qb.to_paper_json())

# Create a couple of basic candidates (e.g., high/low lists)
sdf = df.loc["AAPL"].tail(5)
recent_dates = [d.date() for d in sdf.index]

def make_cand(name, series):
    return Candidate(
        candidate_stock="AAPL",
        candidate_date=as_of - timedelta(days=1),
        movement=None,
        timeframe=TimeFrame.day,
        recent_dates=recent_dates,
        indicator=IndicatorSeries(name=name, values=series),
    )

cand_high = make_cand("high", sdf["high"].tolist())
cand_rsi = make_cand("RSI", np.linspace(45, 55, 5).tolist())
candidates = [cand_high, cand_rsi]

# Initialize FinSeer and StockLLM (adjust model ids if needed)
fin_conf = FinSeerConfig(model_id="TheFinAI/FinSeer")
finseer = FinSeerEmbedder(fin_conf)

stock_conf = StockLLMConfig(model_id="TheFinAI/StockLLM", temperature=0.0, max_new_tokens=64)
stockllm = StockLLMGenerator(stock_conf)

# Encode query and candidates (demo: cosine top-k on our tiny set)
q_emb = finseer.encode_query(qb)
C = finseer.encode_candidates(candidates)

# cosine similarity
def cosine(a, b):
    a = a / (np.linalg.norm(a) + 1e-12)
    b = b / (np.linalg.norm(b, axis=1, keepdims=True) + 1e-12)
    return (b @ a)

sims = cosine(q_emb, C)
order = np.argsort(-sims)
selected = [candidates[i] for i in order[:2]]

# Predict with StockLLM using schema-aware helper
result = stockllm.predict_from_schemas(qb, selected)
print("Result:", result)


ModuleNotFoundError: No module named 'numpy'

In [None]:
# If running in a clean environment, install requirements
# Uncomment if needed
# %pip install --quiet transformers accelerate sentencepiece faiss-cpu pydantic pandas
# %pip install --quiet bitsandbytes talib-binary --extra-index-url https://pypi.org/simple

from datetime import date, timedelta
import numpy as np
import pandas as pd

from src.schemas import (
    QueryBasic, QueryTechnical, Candidate, IndicatorSeries, Movement, TimeFrame
)
from src.services import FinSeerEmbedder, FinSeerConfig, StockLLMGenerator, StockLLMConfig

# Build a tiny MultiIndex OHLCV DataFrame for 2 symbols
symbols = ["AAPL", "MSFT"]
start = pd.Timestamp("2024-01-02")
idx = []
rows = []
for sym in symbols:
    dates = pd.date_range(start, periods=10, freq="B")
    for i, d in enumerate(dates):
        idx.append((sym, d))
        price = 100 + i + (1 if sym == "AAPL" else -1) * 0.5 * i
        rows.append({
            "open": price - 0.2,
            "high": price + 0.5,
            "low": price - 0.5,
            "close": price,
            "adjusted_close": price,  # simplify
            "volume": 1_000_000 + 1000 * i,
        })

df = pd.DataFrame(rows, index=pd.MultiIndex.from_tuples(idx, names=["symbol", "timestamp"]))

# Build a QueryBasic for AAPL as of the last day
as_of = (df.loc["AAPL"].index.max() + pd.Timedelta(days=1)).date()
qb = QueryBasic.from_dataframe("AAPL", df, as_of=as_of, lookback=5, timeframe=TimeFrame.day)
print("Query JSON:", qb.to_paper_json())

# Create a couple of basic candidates (e.g., high/low lists)
sdf = df.loc["AAPL"].tail(5)
recent_dates = [d.date() for d in sdf.index]

def make_cand(name, series):
    return Candidate(
        candidate_stock="AAPL",
        candidate_date=as_of - timedelta(days=1),
        movement=None,
        timeframe=TimeFrame.day,
        recent_dates=recent_dates,
        indicator=IndicatorSeries(name=name, values=series),
    )

cand_high = make_cand("high", sdf["high"].tolist())
cand_rsi = make_cand("RSI", np.linspace(45, 55, 5).tolist())
candidates = [cand_high, cand_rsi]

# Initialize FinSeer and StockLLM (adjust model ids if needed)
fin_conf = FinSeerConfig(model_id="TheFinAI/FinSeer")
finseer = FinSeerEmbedder(fin_conf)

stock_conf = StockLLMConfig(model_id="TheFinAI/StockLLM", temperature=0.0, max_new_tokens=64)
stockllm = StockLLMGenerator(stock_conf)

# Encode query and candidates (demo: cosine top-k on our tiny set)
q_emb = finseer.encode_query(qb)
C = finseer.encode_candidates(candidates)

# cosine similarity
def cosine(a, b):
    a = a / (np.linalg.norm(a) + 1e-12)
    b = b / (np.linalg.norm(b, axis=1, keepdims=True) + 1e-12)
    return (b @ a)

sims = cosine(q_emb, C)
order = np.argsort(-sims)
selected = [candidates[i] for i in order[:2]]

# Predict with StockLLM using schema-aware helper
result = stockllm.predict_from_schemas(qb, selected)
print("Result:", result)


ModuleNotFoundError: No module named 'numpy'

In [None]:
# If running in a clean environment, install requirements
# Uncomment if needed
# %pip install --quiet transformers accelerate sentencepiece faiss-cpu pydantic pandas
# %pip install --quiet bitsandbytes talib-binary --extra-index-url https://pypi.org/simple

from datetime import date, timedelta
import numpy as np
import pandas as pd

from src.schemas import (
    QueryBasic, QueryTechnical, Candidate, IndicatorSeries, Movement, TimeFrame
)
from src.services import FinSeerEmbedder, FinSeerConfig, StockLLMGenerator, StockLLMConfig

# Build a tiny MultiIndex OHLCV DataFrame for 2 symbols
symbols = ["AAPL", "MSFT"]
start = pd.Timestamp("2024-01-02")
idx = []
rows = []
for sym in symbols:
    dates = pd.date_range(start, periods=10, freq="B")
    for i, d in enumerate(dates):
        idx.append((sym, d))
        price = 100 + i + (1 if sym == "AAPL" else -1) * 0.5 * i
        rows.append({
            "open": price - 0.2,
            "high": price + 0.5,
            "low": price - 0.5,
            "close": price,
            "adjusted_close": price,  # simplify
            "volume": 1_000_000 + 1000 * i,
        })

df = pd.DataFrame(rows, index=pd.MultiIndex.from_tuples(idx, names=["symbol", "timestamp"]))

# Build a QueryBasic for AAPL as of the last day
as_of = (df.loc["AAPL"].index.max() + pd.Timedelta(days=1)).date()
qb = QueryBasic.from_dataframe("AAPL", df, as_of=as_of, lookback=5, timeframe=TimeFrame.day)
print("Query JSON:", qb.to_paper_json())

# Create a couple of basic candidates (e.g., high/low lists)
sdf = df.loc["AAPL"].tail(5)
recent_dates = [d.date() for d in sdf.index]

def make_cand(name, series):
    return Candidate(
        candidate_stock="AAPL",
        candidate_date=as_of - timedelta(days=1),
        movement=None,
        timeframe=TimeFrame.day,
        recent_dates=recent_dates,
        indicator=IndicatorSeries(name=name, values=series),
    )

cand_high = make_cand("high", sdf["high"].tolist())
cand_rsi = make_cand("RSI", np.linspace(45, 55, 5).tolist())
candidates = [cand_high, cand_rsi]

# Initialize FinSeer and StockLLM (adjust model ids if needed)
fin_conf = FinSeerConfig(model_id="TheFinAI/FinSeer")
finseer = FinSeerEmbedder(fin_conf)

stock_conf = StockLLMConfig(model_id="TheFinAI/StockLLM", temperature=0.0, max_new_tokens=64)
stockllm = StockLLMGenerator(stock_conf)

# Encode query and candidates (demo: cosine top-k on our tiny set)
q_emb = finseer.encode_query(qb)
C = finseer.encode_candidates(candidates)

# cosine similarity
def cosine(a, b):
    a = a / (np.linalg.norm(a) + 1e-12)
    b = b / (np.linalg.norm(b, axis=1, keepdims=True) + 1e-12)
    return (b @ a)

sims = cosine(q_emb, C)
order = np.argsort(-sims)
selected = [candidates[i] for i in order[:2]]

# Predict with StockLLM using schema-aware helper
result = stockllm.predict_from_schemas(qb, selected)
print("Result:", result)


ModuleNotFoundError: No module named 'numpy'

In [None]:
# If running in a clean environment, install requirements
# Uncomment if needed
# %pip install --quiet transformers accelerate sentencepiece faiss-cpu pydantic pandas
# %pip install --quiet bitsandbytes talib-binary --extra-index-url https://pypi.org/simple

from datetime import date, timedelta
import numpy as np
import pandas as pd

from src.schemas import (
    QueryBasic, QueryTechnical, Candidate, IndicatorSeries, Movement, TimeFrame
)
from src.services import FinSeerEmbedder, FinSeerConfig, StockLLMGenerator, StockLLMConfig

# Build a tiny MultiIndex OHLCV DataFrame for 2 symbols
symbols = ["AAPL", "MSFT"]
start = pd.Timestamp("2024-01-02")
idx = []
rows = []
for sym in symbols:
    dates = pd.date_range(start, periods=10, freq="B")
    for i, d in enumerate(dates):
        idx.append((sym, d))
        price = 100 + i + (1 if sym == "AAPL" else -1) * 0.5 * i
        rows.append({
            "open": price - 0.2,
            "high": price + 0.5,
            "low": price - 0.5,
            "close": price,
            "adjusted_close": price,  # simplify
            "volume": 1_000_000 + 1000 * i,
        })

df = pd.DataFrame(rows, index=pd.MultiIndex.from_tuples(idx, names=["symbol", "timestamp"]))

# Build a QueryBasic for AAPL as of the last day
as_of = (df.loc["AAPL"].index.max() + pd.Timedelta(days=1)).date()
qb = QueryBasic.from_dataframe("AAPL", df, as_of=as_of, lookback=5, timeframe=TimeFrame.day)
print("Query JSON:", qb.to_paper_json())

# Create a couple of basic candidates (e.g., high/low lists)
sdf = df.loc["AAPL"].tail(5)
recent_dates = [d.date() for d in sdf.index]

def make_cand(name, series):
    return Candidate(
        candidate_stock="AAPL",
        candidate_date=as_of - timedelta(days=1),
        movement=None,
        timeframe=TimeFrame.day,
        recent_dates=recent_dates,
        indicator=IndicatorSeries(name=name, values=series),
    )

cand_high = make_cand("high", sdf["high"].tolist())
cand_rsi = make_cand("RSI", np.linspace(45, 55, 5).tolist())
candidates = [cand_high, cand_rsi]

# Initialize FinSeer and StockLLM (adjust model ids if needed)
fin_conf = FinSeerConfig(model_id="TheFinAI/FinSeer")
finseer = FinSeerEmbedder(fin_conf)

stock_conf = StockLLMConfig(model_id="TheFinAI/StockLLM", temperature=0.0, max_new_tokens=64)
stockllm = StockLLMGenerator(stock_conf)

# Encode query and candidates (demo: cosine top-k on our tiny set)
q_emb = finseer.encode_query(qb)
C = finseer.encode_candidates(candidates)

# cosine similarity
def cosine(a, b):
    a = a / (np.linalg.norm(a) + 1e-12)
    b = b / (np.linalg.norm(b, axis=1, keepdims=True) + 1e-12)
    return (b @ a)

sims = cosine(q_emb, C)
order = np.argsort(-sims)
selected = [candidates[i] for i in order[:2]]

# Predict with StockLLM using schema-aware helper
result = stockllm.predict_from_schemas(qb, selected)
print("Result:", result)


ModuleNotFoundError: No module named 'numpy'

In [None]:
# If running in a clean environment, install requirements
# Uncomment if needed
# %pip install --quiet transformers accelerate sentencepiece faiss-cpu pydantic pandas
# %pip install --quiet bitsandbytes talib-binary --extra-index-url https://pypi.org/simple

from datetime import date, timedelta
import numpy as np
import pandas as pd

from src.schemas import (
    QueryBasic, QueryTechnical, Candidate, IndicatorSeries, Movement, TimeFrame
)
from src.services import FinSeerEmbedder, FinSeerConfig, StockLLMGenerator, StockLLMConfig

# Build a tiny MultiIndex OHLCV DataFrame for 2 symbols
symbols = ["AAPL", "MSFT"]
start = pd.Timestamp("2024-01-02")
idx = []
rows = []
for sym in symbols:
    dates = pd.date_range(start, periods=10, freq="B")
    for i, d in enumerate(dates):
        idx.append((sym, d))
        price = 100 + i + (1 if sym == "AAPL" else -1) * 0.5 * i
        rows.append({
            "open": price - 0.2,
            "high": price + 0.5,
            "low": price - 0.5,
            "close": price,
            "adjusted_close": price,  # simplify
            "volume": 1_000_000 + 1000 * i,
        })

df = pd.DataFrame(rows, index=pd.MultiIndex.from_tuples(idx, names=["symbol", "timestamp"]))

# Build a QueryBasic for AAPL as of the last day
as_of = (df.loc["AAPL"].index.max() + pd.Timedelta(days=1)).date()
qb = QueryBasic.from_dataframe("AAPL", df, as_of=as_of, lookback=5, timeframe=TimeFrame.day)
print("Query JSON:", qb.to_paper_json())

# Create a couple of basic candidates (e.g., high/low lists)
sdf = df.loc["AAPL"].tail(5)
recent_dates = [d.date() for d in sdf.index]

def make_cand(name, series):
    return Candidate(
        candidate_stock="AAPL",
        candidate_date=as_of - timedelta(days=1),
        movement=None,
        timeframe=TimeFrame.day,
        recent_dates=recent_dates,
        indicator=IndicatorSeries(name=name, values=series),
    )

cand_high = make_cand("high", sdf["high"].tolist())
cand_rsi = make_cand("RSI", np.linspace(45, 55, 5).tolist())
candidates = [cand_high, cand_rsi]

# Initialize FinSeer and StockLLM (adjust model ids if needed)
fin_conf = FinSeerConfig(model_id="TheFinAI/FinSeer")
finseer = FinSeerEmbedder(fin_conf)

stock_conf = StockLLMConfig(model_id="TheFinAI/StockLLM", temperature=0.0, max_new_tokens=64)
stockllm = StockLLMGenerator(stock_conf)

# Encode query and candidates (demo: cosine top-k on our tiny set)
q_emb = finseer.encode_query(qb)
C = finseer.encode_candidates(candidates)

# cosine similarity
def cosine(a, b):
    a = a / (np.linalg.norm(a) + 1e-12)
    b = b / (np.linalg.norm(b, axis=1, keepdims=True) + 1e-12)
    return (b @ a)

sims = cosine(q_emb, C)
order = np.argsort(-sims)
selected = [candidates[i] for i in order[:2]]

# Predict with StockLLM using schema-aware helper
result = stockllm.predict_from_schemas(qb, selected)
print("Result:", result)


ModuleNotFoundError: No module named 'numpy'

Notes:
- Ensure you have sufficient VRAM or enable 8-bit/4-bit loading in `StockLLMConfig`.
- The example uses a toy retrieval (cosine over two candidates). Replace with FAISS for real usage.
- The schemas emit the paper’s JSON format, so you can swap the generator to any prompt-compatible model.
