In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install PyPDF2 sentence-transformers faiss-cpu 



In [None]:
!pip install PyMuPDF

## Load a Board Game Rulebook and Extract Text

In [2]:
import fitz  # PyMuPDF

file_path = "ideabase_chessrules.pdf"  

doc = fitz.open(file_path)

full_text = ""
for page_num,page in enumerate(doc):
    page_text = page.get_text()
    full_text += f"\n--- Page {page_num + 1} ---\n" + page_text

print(full_text[:5000])  # Print the first 1000 characters to check the content
    


--- Page 1 ---
3
THE OFFICIAL RULES OF CHESS • CARDOZA PUBLISHING
Rules of
Chess
ERIC SCHILLER

--- Page 2 ---
THE OFFICIAL RULES OF CHESS • CARDOZA PUBLISHING
THE OFFICIAL RULES OF CHESS
The following are the standard rules of chess as applied in World
Championship competition. In later chapter we present some of the
variations of the rules used in amateur, scholastic, and online com-
petitions. These rules conform in most part to those of the world
chess federation (FIDE), but differ significantly from those found in
American tournaments conducted under the auspices of the United
States Chess Federation. Since most American tournaments are
amateur events, those rules are discussed in the section on ama-
teur rules.
This set of rules was composed by International Arbiter Eric
Schiller, with the cooperation and valuable assistance of Interna-
tional Arbiters Andrzej Filipowicz (Poland) and Yuri Averbakh (Rus-
sia). They were used verbatim in the 2000 Braingames.net World
Chess Champio

### Clean and Preprocess the Rulebook Text

In [3]:
import re

# ✅ Retain double newlines (paragraph breaks), collapse 3+ to exactly 2
cleaned_text = re.sub(r'\n{3,}', '\n\n', full_text)

# Step 2: Remove bullet characters or other non-standard symbols
cleaned_text = re.sub(r'[•·►\uf0b7\xa0]', '', cleaned_text)

# Step 3: Remove common footer/header artifacts (if you notice any patterns like "Page x" or "FIDE Laws of Chess")
cleaned_text = re.sub(r'Page\s+\d+', '', cleaned_text, flags=re.IGNORECASE)

# Step 4: Strip extra whitespace from beginning and end of each line
cleaned_text = '\n'.join([line.strip() for line in cleaned_text.splitlines()])

# Step 5: (Optional) Convert to lowercase for normalization
cleaned_text = cleaned_text.lower()  # Only if you're okay making everything lowercase

# Preview the cleaned result
print(cleaned_text[:1500])



---  ---
3
the official rules of chess  cardoza publishing
rules of
chess
eric schiller

---  ---
the official rules of chess  cardoza publishing
the official rules of chess
the following are the standard rules of chess as applied in world
championship competition. in later chapter we present some of the
variations of the rules used in amateur, scholastic, and online com-
petitions. these rules conform in most part to those of the world
chess federation (fide), but differ significantly from those found in
american tournaments conducted under the auspices of the united
states chess federation. since most american tournaments are
amateur events, those rules are discussed in the section on ama-
teur rules.
this set of rules was composed by international arbiter eric
schiller, with the cooperation and valuable assistance of interna-
tional arbiters andrzej filipowicz (poland) and yuri averbakh (rus-
sia). they were used verbatim in the 2000 braingames.net world
chess championship and were

In [None]:
print(cleaned_text[:5000])  # Print the first 5000 characters to check the cleaned content

### Chunking Text

In [4]:
# Split based on paragraph breaks
raw_chunks = cleaned_text.split('\n\n')

# Keep only meaningful chunks (e.g., more than 30 words)
chunks = []
for chunk in raw_chunks:
    chunk = chunk.strip()
    if len(chunk.split()) > 15: # Adjust the threshold as needed
        chunks.append(chunk)

# Show a few to confirm
for i, chunk in enumerate(chunks[:3]):
    print(f"\n=== Chunk {i+1} ===\n{chunk}")



=== Chunk 1 ===
---  ---
the official rules of chess  cardoza publishing
the official rules of chess
the following are the standard rules of chess as applied in world
championship competition. in later chapter we present some of the
variations of the rules used in amateur, scholastic, and online com-
petitions. these rules conform in most part to those of the world
chess federation (fide), but differ significantly from those found in
american tournaments conducted under the auspices of the united
states chess federation. since most american tournaments are
amateur events, those rules are discussed in the section on ama-
teur rules.
this set of rules was composed by international arbiter eric
schiller, with the cooperation and valuable assistance of interna-
tional arbiters andrzej filipowicz (poland) and yuri averbakh (rus-
sia). they were used verbatim in the 2000 braingames.net world
chess championship and were personally approved by world cham-
pion garry kasparov and his challenge

In [None]:
print(len(chunks))  # Check how many chunks we have

### Generate Embeddings

In [5]:
from sentence_transformers import SentenceTransformer

embedmodel = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embedmodel.encode(chunks, show_progress_bar=True)

  from .autonotebook import tqdm as notebook_tqdm
Batches: 100%|██████████| 1/1 [00:01<00:00,  1.90s/it]


In [6]:
print(f"Total chunks embedded: {len(embeddings)}")
print(f"First vector shape: {embeddings[0].shape}")  # should be (384,)


Total chunks embedded: 16
First vector shape: (384,)


### Setting Up Vector Database

In [None]:
!pip install qdrant-client


In [7]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct

# Start a local in-memory Qdrant instance
client = QdrantClient(":memory:")

collection_name = "board_game_rules"

#create the collection with vector params
client.recreate_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(
        size=len(embeddings[0]),  # Should match the embedding size
        distance=Distance.COSINE
    )
)

# Prepare points to insert
points = [
    PointStruct(
        id=i,
        vector=embeddings[i],
        payload={"text": chunks[i]}  # Store the original text as payload
    )
    for i in range(len(embeddings))
]

# Insert points into the collection
client.upsert(collection_name=collection_name, points=points)

  client.recreate_collection(


UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

### Test

In [8]:
query = "How to win in chess?"
query_embedding = embedmodel.encode(query).tolist()

# Search for top 3 most similar chunks
search_result = client.search(
    collection_name=collection_name,
    query_vector=query_embedding,
    limit=3
)

# Show results
for i, result in enumerate(search_result):
    print(f"\n=== Match {i+1} ===")
    print(result.payload["text"])



=== Match 1 ===
---  ---
the official rules of chess  cardoza publishing
the official rules of chess
the following are the standard rules of chess as applied in world
championship competition. in later chapter we present some of the
variations of the rules used in amateur, scholastic, and online com-
petitions. these rules conform in most part to those of the world
chess federation (fide), but differ significantly from those found in
american tournaments conducted under the auspices of the united
states chess federation. since most american tournaments are
amateur events, those rules are discussed in the section on ama-
teur rules.
this set of rules was composed by international arbiter eric
schiller, with the cooperation and valuable assistance of interna-
tional arbiters andrzej filipowicz (poland) and yuri averbakh (rus-
sia). they were used verbatim in the 2000 braingames.net world
chess championship and were personally approved by world cham-
pion garry kasparov and his challenge

  search_result = client.search(


### Retrival+Generation

In [None]:
!pip install google-generativeai


In [17]:
!pip uninstall google-generativeai -y


Found existing installation: google-generativeai 0.8.5
Uninstalling google-generativeai-0.8.5:
  Successfully uninstalled google-generativeai-0.8.5


In [18]:
!pip install -U google-generativeai


Collecting google-generativeai
  Using cached google_generativeai-0.8.5-py3-none-any.whl.metadata (3.9 kB)
Using cached google_generativeai-0.8.5-py3-none-any.whl (155 kB)
Installing collected packages: google-generativeai
Successfully installed google-generativeai-0.8.5



[notice] A new release of pip is available: 23.3 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [13]:
import google.generativeai as genai
genai.configure(api_key="AIzaSyDjulXNeZOea3zKzV51v7PEjLkOVQWOjtc")

### Load the Gemini Model

In [19]:
models = genai.list_models()
for m in models:
    print(m.name)


models/embedding-gecko-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-2.5-pro-preview-03-25
models/gemini-2.5-flash-preview-04-17
models/gemini-2.5-flash-preview-05-20
models/gemini-2.5-flash
models/gemini-2.5-flash-preview-04-17-thinking
models/gemini-2.5-flash-lite-preview-06-17
models/gemini-2.5-pro-preview-05-06
models/gemini-2.5-pro-preview-06-05
models/gemini-2.5-pro
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-preview-image-generation
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-ex

In [23]:
geminimodel = genai.GenerativeModel(model_name="models/gemini-1.5-flash-latest")


### RAG Function

In [24]:
def rag_answer_gemini(query:str ,top_k: int = 3) -> str:
    # Step 1: Embed the query using sentence-transformers
    query_embedding = embedmodel.encode(query).tolist()
    
    # Step 2: Search Qdrant for top relevant chunks
    search_result = client.search(
        collection_name=collection_name,
        query_vector=query_embedding,
        limit=top_k
    )
    
    # Step 3: Extract matched chunks
    matched_chunks = [hit.payload["text"] for hit in search_result]
    
    # Step 4: Construct the prompt
    prompt = (
        "You are a helpful assistant for answering board game rules.\n"
        "Answer the question using only the context provided below.\n"
        "If the answer isn't found in the context, say so clearly.\n\n"
        "Context:\n"
        + "\n\n---\n\n".join(matched_chunks) +
        f"\n\nQuestion: {query}\nAnswer:"
    )
    
    # Step 5: Generate content using Gemini
    response = geminimodel.generate_content(prompt)

    return response.text

In [28]:
question = "How to checkmate?"
answer = rag_answer_gemini(question)

print("🔍 Question:", question)
print("\n💡 Answer:", answer)


  search_result = client.search(


🔍 Question: How to checkmate?

💡 Answer: Checkmate is when a player's king is under attack (in check) and there is no way to remove it from attack.  The player who checkmates their opponent wins the game.

