In [None]:
import chromadb
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
import nltk
nltk.download("punkt")
from nltk.tokenize import sent_tokenize

# Initialize ChromaDB (Persistent Storage)
client = chromadb.PersistentClient(path="chroma_db/")

# Create collections for each course inside a school
schools = {"School_of_technology":["Python","Java","AI","ML"],"School_of_business":["Marketing","Accounts"],"School_of_law":["History_law"],"School_of_design":["Design_history"]}
for school, courses in schools.items():
    for course in courses:
        collection_name = f"{school}_{course}"
        client.get_or_create_collection(name=collection_name)


print("✅ School and Course collections created!")

print(len(client.list_collections()))
print(client.list_collections())



# Load the embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"  # Extract text from each page
    return text

# def split_text_into_chunks(text, chunk_size=500, overlap=50):
#     """
#     Splits long text (books) into fixed-size chunks with overlap.

#     :param text: Extracted text from PDF
#     :param chunk_size: Number of characters per chunk
#     :param overlap: Overlapping characters between consecutive chunks (to maintain context)
#     :return: List of text chunks
#     """
#     words = text.split()
#     chunks = []
    
#     for i in range(0, len(words), chunk_size - overlap):
#         chunk = " ".join(words[i:i + chunk_size])
#         chunks.append(chunk)

#     return chunks


def split_text_into_chunks(text, max_tokens=100):
    sentences = sent_tokenize(text)  # Split into sentences
    chunks = []
    current_chunk = ""

    for sentence in sentences:
        if len(current_chunk) + len(sentence) <= max_tokens:
            current_chunk += " " + sentence
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence  # Start new chunk

    if current_chunk:
        chunks.append(current_chunk.strip())

    return chunks


def store_pdf_embeddings(school, course, pdf_paths):
    """
    Convert book PDFs to embeddings & store in ChromaDB.

    :param school: School name
    :param course: Course name
    :param pdf_paths: List of PDF file paths
    """
    collection_name = f"{school}_{course}"
    collection = client.get_or_create_collection(name=collection_name)

    for pdf_path in pdf_paths:
        print(f"🔄 Processing {pdf_path}...")

        text = extract_text_from_pdf(pdf_path)
        chunks = split_text_into_chunks(text, chunk_size=500, overlap=50)
        embeddings = embedding_model.encode(chunks).tolist()  # Generate vector embeddings

        for i, chunk in enumerate(chunks):
            collection.add(
                ids=[f"{collection_name}_{pdf_path}_{i}"],  # Unique ID
                documents=[chunk],  # Store text chunk
                embeddings=[embeddings[i]]  # Store vector embeddings
            )

        print(f"✅ {pdf_path} embeddings added to {collection_name} in ChromaDB.")

# Example Usage: Storing book PDFs
store_pdf_embeddings(
    "School_of_technology",
    "Python",
    ["R:/CourseChabot/Backend/new/Books/school_tech_books/Learning_Python_part_1.pdf", "R:/CourseChabot/Backend/new/Books/school_tech_books/Learning_Python_part_2.pdf"]
)

# store_pdf_embeddings(
#     "School_of_business",
#     "Marketing",
#     ["marketing_strategy.pdf", "digital_marketing_book.pdf"]
# )



In [None]:
import chromadb
from sentence_transformers import SentenceTransformer


# Initialize ChromaDB Client
client = chromadb.PersistentClient(path="chroma_db/")

# Load the embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

def query_course_content(school, course, user_query, top_k=3):
    """
    Queries the ChromaDB vector database to retrieve relevant course material.

    :param school: School name (e.g., "School_of_technology")
    :param course: Course name (e.g., "Python")
    :param user_query: Question asked by the user
    :param top_k: Number of top results to retrieve
    :return: List of retrieved document chunks
    """
    collection_name = f"{school}_{course}"
    collection = client.get_or_create_collection(name=collection_name)

    # Convert user query into vector embeddings
    query_embedding = embedding_model.encode(user_query).tolist()

    # Retrieve the most relevant course content
    results = collection.query(
        query_embeddings=[query_embedding], 
        n_results=top_k  # Retrieve top 3 most relevant results
    )

    if "documents" not in results or not results["documents"][0]:
        return ["No relevant content found in the course materials."]

    return results["documents"][0]

# Example Usage
user_question = "What are the basics of Python programming?"
retrieved_text = query_course_content("School_of_technology", "Python", user_question)

print("🔎 Retrieved Content:")
for i, chunk in enumerate(retrieved_text):
    print(f"{i+1}. {chunk}\n")


  from .autonotebook import tqdm as notebook_tqdm


🔎 Retrieved Content:


3. languages such as C, C++, and Java: Python code will be much less difficult to write, debug, and maintain. Of course, your author has been a card-carrying Python evangelist since 1992, so take these comments as you may. They do, however, reflect the common experience of many developers who have taken time to explore what Python has to offer. Chapter Summary And that concludes the hype portion of this book. In this chapter, we’ve explored some of the reasons that people pick Python for their programming tasks. We’ve also seen how it is applied and looked at a representative sample of who is using it today. My goal is to teach Python, though, not to sell it. The best way to judge a language is to see it in action, so the rest of this book focuses entirely on the language details we’ve glossed over here. The next two chapters begin our technical introduction to the language. In them, we’ll explore ways to run Python programs, peek at Python’s byte code execution 

In [5]:
import ollama

def generate_answer_with_ollama(school, course, user_query):
    """
    Uses ChromaDB and Ollama to generate an AI-based response.

    :param school: School name
    :param course: Course name
    :param user_query: User's question
    :return: AI-generated response
    """
    retrieved_content = query_course_content(school, course, user_query, top_k=3)

    # Prepare the context for the AI model
    context = "\n\n".join(retrieved_content)
    
    prompt = f"""
    You are an expert AI assistant for {school}'s {course} course.

    User Question: {user_query}

    Course Reference Material:
    {context if context.strip() else "No relevant reference material found. Answer based on general knowledge."}

    Provide a detailed and easy-to-understand answer.
    """
    
    # Generate AI response using Ollama
    response = ollama.chat(model="llama2", messages=[{"role": "user", "content": prompt}])

    return response["message"]["content"]

# Example Usage
user_question = "Explain about architecture."
answer = generate_answer_with_ollama("School_of_technology", "Python", user_question)

print("🧠 AI Response:\n", answer)


🧠 AI Response:
 Class inheritance or attribute inheritance refers to the process of automatically searching for an attribute or method name across a hierarchy of classes, starting from the bottom up. In Python, when you refer to an attribute or method of an object, the interpreter performs an automatic search of the object's attributes and methods, as well as those of its parent objects, until it finds the desired attribute or method.

Here are some key points to keep in mind:

* The search proceeds from the bottom up, meaning that the interpreter will first search the current object, then move up the hierarchy to search for an attribute or method in a parent object.
* If an attribute or method is not found in the current object or any of its parent objects, the interpreter will raise a NameError.
* Inheritance is also used to implement polymorphism, which allows objects of different classes to be treated as if they were of the same class.
* In Python, you can use the dot (`.`) notatio