In [None]:
!pip install python-docx

In [None]:
# rag_ai_doc.py
from docx import Document
import math
from typing import List, Dict

# ---------- Load Word Document ----------
def load_docx(path: str) -> List[str]:
    doc = Document(path)
    chunks = []

    for p in doc.paragraphs:
        text = p.text.strip()
        if not text:
            continue


        if text.lower().startswith("what is"):
            continue

        chunks.append(text)

    return chunks


DOCUMENTS = load_docx(
    "/content/drive/MyDrive/DS_PROJECT/What is Artificial Intelligence.docx"
)


# ---------- Utility ----------
def tokenize(text: str):
    return text.lower().split()


def embed(text: str) -> Dict[str, int]:
    vec = {}
    for t in tokenize(text):
        vec[t] = vec.get(t, 0) + 1
    return vec


def cosine_similarity(v1, v2):
    dot = sum(v1.get(k, 0) * v2.get(k, 0) for k in v1)
    mag1 = math.sqrt(sum(x*x for x in v1.values()))
    mag2 = math.sqrt(sum(x*x for x in v2.values()))
    if mag1 == 0 or mag2 == 0:
        return 0
    return dot / (mag1 * mag2)


# ---------- Retrieval ----------
def retrieve(query: str, docs: List[str], top_k=3):
    q_vec = embed(query)
    scored = []
    for d in docs:
        scored.append((cosine_similarity(q_vec, embed(d)), d))
    scored.sort(reverse=True)
    return [d for _, d in scored[:top_k]]


# ---------- Topic Filter ----------
def filter_by_topic(query: str, docs: List[str]):
    q_words = set(query.lower().split())
    filtered = []

    for d in docs:
        d_words = set(d.lower().split())
        if len(q_words & d_words) >= 2:  # at least 2 common words
            filtered.append(d)

    return filtered


# ---------- RAG Pipeline ----------
def rag_pipeline(query: str):
    retrieved_docs = retrieve(query, DOCUMENTS)
    filtered_docs = filter_by_topic(query, retrieved_docs)

    return {
        "query": query,
        "answer": "\n".join(filtered_docs) if filtered_docs else "No relevant answer found in document"
    }


# ---------- Run ----------
if __name__ == "__main__":
    print("AI RAG System (Using Word Document)")
    print("Type 'exit' to quit\n")

    while True:
        q = input("Ask a question: ")
        if q.lower() == "exit":
            break

        result = rag_pipeline(q)
        print("\nAnswer:\n", result["answer"], "\n")

AI RAG System (Using Word Document)
Type 'exit' to quit

Ask a question: What is Artificial Intelligence?

Answer:
 Artificial Intelligence (AI) is a technology that enables machines and computers to perform tasks that typically require human intelligence. It helps systems learn from data, recognize patterns and make decisions to solve complex problems. It is used in healthcare, finance, e-commerce and transportation offering personalized recommendations and enabling self-driving cars. 

Ask a question: What is Machine Learning?

Answer:
 Machine LearningÂ is a subset of artificial intelligence (AI) that focuses on building systems that can learn from and make decisions based on data. Instead of being explicitly programmed to perform a task, a machine learning model uses algorithms to identify patterns within data and improve its performance over time without human intervention. 



KeyboardInterrupt: Interrupted by user