In [2]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline


In [3]:
# Loading Frankenstein text

def load_chunks(path, chunk_size=500):
    with open(path, "r", encoding="utf8") as f:
        text = f.read()

    words = text.split()
    chunks = [
        " ".join(words[i:i + chunk_size])
        for i in range(0, len(words), chunk_size)
    ]
    return chunks

chunks = load_chunks("frankenstein.txt")
print(f"Loaded {len(chunks)} chunks.")


Loaded 151 chunks.


In [4]:
# Converts the imported text into numerical vectors within an embedding model
print("Loading embedding model...")
embedder = SentenceTransformer("all-MiniLM-L6-v2")

print("Embedding chunks...")
embeddings = embedder.encode(chunks, convert_to_numpy=True)

dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embeddings)

print("Vector index created!")


Loading embedding model...
Embedding chunks...
Vector index created!


In [5]:
# # Selecting which model to use
# model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
# # model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"

# # importing the chosen model
# print("Loading DeepSeek model...")
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     device_map="auto",
#     dtype="auto"
# )

# generator = pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
#     max_new_tokens=300
# )

# print("DeepSeek model loaded!")


In [6]:
# Second LLM Load attempt
from llama_cpp import Llama

llm = Llama(
    model_path="deepseek-r1-distill-qwen-7b-uncensored-q4_k_m.gguf",
    n_ctx=4096,
    n_threads=8
)


llama_model_loader: loaded meta data with 32 key-value pairs and 339 tensors from deepseek-r1-distill-qwen-7b-uncensored-q4_k_m.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen2
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Qwen2 7B Instruct
llama_model_loader: - kv   3:                       general.organization str              = Qwen
llama_model_loader: - kv   4:                           general.finetune str              = Instruct
llama_model_loader: - kv   5:                           general.basename str              = Qwen2
llama_model_loader: - kv   6:                         general.size_label str              = 7B
llama_model_loader: - kv   7:                

In [7]:
# Defining how the model and user will interact with each other:
def retrieve(query, k=3):
    query_emb = embedder.encode([query], convert_to_numpy=True)
    distances, indices = index.search(query_emb, k)
    return [chunks[i] for i in indices[0]]

def ask_deepseek(question, context):
    prompt = f"""
You are a helpful assistant knowledgeable about Mary Shelley's *Frankenstein*. 
Use ONLY the context provided — don't make up details.

Context:
{context}

Question: {question}

Answer:
"""

    out = generator(prompt)[0]["generated_text"]
    return out.split("Answer:", 1)[-1].strip()


In [8]:
def ask(question, k=3):
    # retrieve context
    D, I = index.search(embedder.encode([question]), k)
    retrieved_chunks = "\n\n".join(chunks[i] for i in I[0])
    
    full_prompt = f"""
You are a chatbot that answers questions about Mary Shelley's *Frankenstein*.

Here are the most relevant excerpts from the text:

{retrieved_chunks}

---

Question: {question}
Answer:
"""

    # Run inference
    output = llm(
        prompt=full_prompt,
        max_tokens=300,
        temperature=0.7,
    )

    # llama-cpp returns a dict
    response = output["choices"][0]["text"]
    return response

for out in llm(
    prompt=full_prompt,
    max_tokens=300,
    temperature=0.7,
    stream=True
):
    token = out["choices"][0]["text"]
    pbar.update(1)
    collected.append(token)


NameError: name 'full_prompt' is not defined

In [None]:
while True:
    q = input("\nYou: ").strip()
    if q.lower() in ["quit", "exit"]:
        print("Goodbye!")
        break
    
    passages = retrieve(q)
    context = "\n\n".join(passages)
    answer = ask_deepseek(q, context)
    print(f"\nBot: {answer}")


KeyboardInterrupt: 