In [1]:
import os
from pathlib import Path

from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_community.llms import HuggingFacePipeline


In [1]:
repo_path = Path.home() / "stock-forecasting-llm-aapl"

files = []
for f in repo_path.rglob("*"):
    if "venv" in f.parts or ".git" in f.parts:
        continue
    if f.suffix in {".py", ".md", ".txt"}:
        files.append(f)

docs = []
for f in files:
    try:
        loader = TextLoader(str(f), encoding="utf-8")
        docs.extend(loader.load())
    except Exception:
        continue

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)
split_docs = text_splitter.split_documents(docs)

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)
vectorstore = FAISS.from_documents(split_docs, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})


NameError: name 'Path' is not defined

In [None]:
model_name = "microsoft/phi-2"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)  # CPU by default

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
)

llm = HuggingFacePipeline(pipeline=pipe)

prompt = ChatPromptTemplate.from_template(
    "You are a helpful assistant for this stock-forecasting repo.\n"
    "Use ONLY the context to answer.\n\n"
    "Context:\n{context}\n\nQuestion:\n{question}"
)

parser = StrOutputParser()

def answer_repo(question: str) -> str:
    docs = retriever.invoke(question)
    context = "\n\n".join(d.page_content for d in docs)
    chain = prompt | llm | parser
    return chain.invoke({"context": context, "question": question})


In [None]:
print(answer_repo("How does this project forecast AAPL stock prices?"))
print()
print(answer_repo("What are the exact steps to run training from scratch?"))
print()
print(answer_repo("How do I start the FastAPI service and which URL do I call?"))
