In [1]:
# 📦 Step 1: Import libraries and load API key from .env
import os
from dotenv import load_dotenv

load_dotenv()
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

# Ensure your key is loaded
assert OPENROUTER_API_KEY is not None, "OPENROUTER_API_KEY not found in .env"


In [2]:
# 📘 Step 2: Load PDF document
from langchain_community.document_loaders import PyMuPDFLoader

# Replace with your file name (ensure it's in the same folder)
loader = PyMuPDFLoader("cricketRules.pdf")
documents = loader.load()


print(f"Loaded {len(documents)} pages")


Loaded 79 pages


In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Create a text splitter that splits based on characters and overlaps slightly
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,      # Max characters per chunk
    chunk_overlap=50     # Overlap to preserve context
)

# Split the document
chunks = splitter.split_documents(documents)

# Print number of chunks and preview first one
print(f"✅ Document split into {len(chunks)} chunks")
print("🔍 First chunk preview:\n", chunks[0].page_content)


✅ Document split into 527 chunks
🔍 First chunk preview:
 Laws of Cricket 2017 Code (3rd Edition - 2022) 
1 
 
 
 
 
 
 
THE LAWS OF CRICKET 2017 CODE (3rd Edition - 2022) 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
© Marylebone Cricket Club


In [4]:
import os
from dotenv import load_dotenv

# Force re-load from .env file
load_dotenv(override=True)

# Debug check
api_key = os.getenv("OPENROUTER_API_KEY")
print("🔐 Loaded:", api_key[:12] + "..." if api_key else "❌ Not loaded")


🔐 Loaded: sk-or-v1-0a4...


In [5]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import os
from dotenv import load_dotenv

load_dotenv()

# ✅ Use free local embedding model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# ✅ Recreate the vector store from chunks
vectorstore = FAISS.from_documents(chunks, embeddings)

# ✅ Save it locally
vectorstore.save_local("faiss_index")

print("✅ FAISS vector store created and saved using HuggingFaceEmbeddings.")


  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


✅ FAISS vector store created and saved using HuggingFaceEmbeddings.


In [9]:
from openai import OpenAI

# Initialize OpenRouter client
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=os.getenv("OPENROUTER_API_KEY")
)

# 🔍 Step 1: Ask a question
question = " how do we say that we win the game?"

# 🔎 Step 2: Search similar chunks using FAISS
docs = vectorstore.similarity_search(question, k=3)  # get top 3 relevant chunks
context = "\n\n".join([doc.page_content for doc in docs])

# 🤖 Step 3: Ask the model using retrieved context
response = client.chat.completions.create(
    model="mistralai/mixtral-8x7b-instruct",  # Free LLM
    messages=[
        {
            "role": "system",
            "content": "You are an assistant that answers questions based only on the provided context. Do not use any outside information."
        },
        {
            "role": "user",
            "content": f"Context:\n{context}\n\nQuestion: {question}"
        }
    ]
)

# ✅ Step 4: Display the answer
print("📄 Answer from PDF:\n")
print(response.choices[0].message.content)


📄 Answer from PDF:

 Based on the context provided, there are several ways to express that one's team has won the game:

1. If your team won the match by scoring more runs than the opposing team, you can say "We won the match by [number of runs] runs." For example, "We won the match by 20 runs."
2. If your team batted last and won the match without losing all its wickets, you can say "We won the match by [number of wickets] wickets." For example, "We won the match by 5 wickets."
3. If your team won the match as a result of the opposing team conceding or refusing to play, you can say "We were awarded the match" or "The match was conceded to us."
4. If your team won the match due to an overthrow or a willful act of a fielder resulting in 5 penalty runs, you can say "We won the match by Penalty runs."

In any case, it's important to note that the correctness of the scores is the responsibility of the umpires, as stated in Law 16.8 of the context.
