In [1]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline


In [2]:
# Loading Frankenstein text

def load_chunks(path, chunk_size=500):
    with open(path, "r", encoding="utf8") as f:
        text = f.read()

    words = text.split()
    chunks = [
        " ".join(words[i:i + chunk_size])
        for i in range(0, len(words), chunk_size)
    ]
    return chunks

chunks = load_chunks("frankenstein.txt")
print(f"Loaded {len(chunks)} chunks.")


Loaded 151 chunks.


In [None]:
# Converts the imported text into numerical vectors within an embedding model
print("Loading embedding model...")
embedder = SentenceTransformer("all-MiniLM-L6-v2")

print("Embedding chunks...")
embeddings = embedder.encode(chunks, convert_to_numpy=True)

dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embeddings)

print("Vector index created!")


Loading embedding model...
Embedding chunks...
Vector index created!


In [None]:
# Selecting which model to use
model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
# model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"

# importing the chosen model
print("Loading DeepSeek model...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    dtype="auto"
)

generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=300
)

print("DeepSeek model loaded!")


Loading DeepSeek model...


Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-000002.safetensors:   0%|          | 0.00/7.39G [00:00<?, ?B/s]

model-00001-of-000002.safetensors:   0%|          | 0.00/8.67G [00:00<?, ?B/s]

In [None]:
# Defining how the model and user will interact with each other:
def retrieve(query, k=3):
    query_emb = embedder.encode([query], convert_to_numpy=True)
    distances, indices = index.search(query_emb, k)
    return [chunks[i] for i in indices[0]]

def ask_deepseek(question, context):
    prompt = f"""
You are a helpful assistant knowledgeable about Mary Shelley's *Frankenstein*. 
Use ONLY the context provided â€” don't make up details.

Context:
{context}

Question: {question}

Answer:
"""

    out = generator(prompt)[0]["generated_text"]
    return out.split("Answer:", 1)[-1].strip()


In [None]:
def ask_frankenstein():
    question = input("Ask a question about Frankenstein: ")

    passages = retrieve(question, k=3)
    context = "\n\n".join(passages)

    answer = ask_deepseek(question, context)
    print("\n--- Answer ---\n")
    print(answer)

ask_frankenstein()


In [None]:
while True:
    q = input("\nYou: ").strip()
    if q.lower() in ["quit", "exit"]:
        print("Goodbye!")
        break
    
    passages = retrieve(q)
    context = "\n\n".join(passages)
    answer = ask_deepseek(q, context)
    print(f"\nBot: {answer}")
