In [114]:
import os, requests
from dotenv import load_dotenv
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS


In [115]:
#load api key

load_dotenv()

api_key = os.getenv("OPENROUTER_API_KEY")

In [116]:
loader = TextLoader(r"C:\Users\prajyot\Documents\LLMOps\data\ml.txt", encoding="utf-8")
doc = loader.load()

In [117]:
text_spliter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
text_chunk = text_spliter.split_documents(doc)

# collect all chunks in text

text = [chunk.page_content for chunk in text_chunk]

In [118]:
# make a request to llm 

resp = requests.post(
    "https://openrouter.ai/api/v1/embeddings",
    headers={"Authorization": f"Bearer {api_key}"},
    json={"model": "openai/text-embedding-3-small", "input": text}  # batch input
)
result = resp.json()
print(result)

embeddings = [item["embedding"] for item in result.get("data", [])]

{'object': 'list', 'data': [{'object': 'embedding', 'embedding': [-0.007060386, 0.022618793, -0.0067781834, -0.03590893, 0.054140273, -0.015015299, 0.0014855562, 0.011788608, -0.023619812, 0.050689947, 0.025856134, -0.023129951, -0.04979542, 0.03663307, 0.011554327, 0.013918437, 0.0074384306, -0.02977502, 0.04417267, 0.026431188, 0.029136071, 0.015025949, 0.051840056, -0.037101634, 0.012917417, -0.0024399862, 0.010303051, 0.045833938, 0.009946305, -0.015547757, 0.011415888, -0.01664462, -0.028241543, -0.036973845, 0.0137161035, 0.042511404, -0.020244032, -0.013311436, -0.011096413, 0.018667959, -0.027964665, -0.010116692, 0.0015827297, 0.06904908, -0.005183473, -0.02225672, -0.04278828, -0.036334895, 0.026921049, 0.042362314, -0.06610992, -0.02202244, -0.037719283, -0.024599534, -0.03903978, 0.06990101, 0.016740462, 0.019317556, 0.026005222, -0.008918663, -0.02012689, 0.00064161123, 0.008104002, 0.0018649322, -0.021692317, -0.04677106, -0.009999551, 0.025557958, -0.024493042, -0.031777

In [119]:
text_embeddings = list(zip(text, embeddings))
vectorstore = FAISS.from_embeddings(text_embeddings, embedding=None)


`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.


In [120]:
query = "Explain supervised learning"
resp = requests.post(
    "https://openrouter.ai/api/v1/embeddings",
    headers={"Authorization": f"Bearer {api_key}"},
    json={"model": "openai/text-embedding-3-small", "input": query}
)
query_embedding = resp.json()["data"][0]["embedding"]

In [121]:
# 8. Search by vector
results = vectorstore.similarity_search_by_vector(query_embedding, k=2)
for r in results:
    print(r.page_content)


- **Supervised Learning**: Uses **labeled data** (input + desired output).
- **Semi-Supervised Learning**: Combines small labeled datasets with large unlabeled datasets.


In [122]:
from langchain_core.prompts import ChatPromptTemplate

template = """You are the assistant for question answering task.
If you don't know, say "don't know".
Question: {question}
Context: {context}
Answer: {{answer}}"""



In [123]:
prompt = ChatPromptTemplate.from_template(template)

In [124]:
from langchain_core.output_parsers import StrOutputParser

In [125]:
output_parser = StrOutputParser()

In [126]:
from langchain_openai import ChatOpenAI


llm_model = ChatOpenAI(
    model="nvidia/nemotron-nano-12b-v2-vl:free",
    api_key=api_key,
    base_url="https://openrouter.ai/api/v1"
)

In [127]:
from langchain_core.runnables.passthrough import RunnablePassthrough

# Build chain
rag_chain = (
    {"context": lambda q: vectorstore.similarity_search_by_vector(query_embedding, k=2), "question": RunnablePassthrough()}
    | prompt
    | llm_model
    | output_parser
)

In [129]:
rag_chain.invoke("Three Main Types of Learning")

'The three main types of learning contexts mentioned in the provided documents are:  \n1. **Supervised Learning** (uses labeled data input + output).  \n2. **Semi-Supervised Learning** (combines small labeled datasets with large unlabeled datasets).  \n\nHowever, only two types are explicitly documented here. The third type (e.g., unsupervised learning or reinforcement learning, which are standard in broader contexts) is not provided in the given data.\n'