In [None]:
!pip install chromadb langchain openai langchain_community requests beautifulsoup4

Collecting chromadb
  Downloading chromadb-1.0.5-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting fastapi==0.115.9 (from chromadb)
  Downloading fastapi-0.115.9-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.25.0-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.21.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metada

In [None]:
import os
import chromadb
import requests
from bs4 import BeautifulSoup
import urllib.parse
import random
import time

In [None]:
#to generate query
def generate_query():
    topic = input("Enter the learning topic: ").strip()
    objective = input("Enter your learning objective: ").strip()
    query = f"How to learn {topic} to achieve {objective}"
    return query

In [None]:
#to fetch data
def bing_results(query):
    dummy_links = [f"https://example.com/search_result_{i}?q={urllib.parse.quote(query)}" for i in range(1, 6)]
    return dummy_links

def wikipedia_summary(query):
    search_url = f"https://en.wikipedia.org/w/index.php?search={urllib.parse.quote(query)}"
    try:
        response = requests.get(search_url, headers={'User-Agent': 'Mozilla/5.0'}, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the first non-empty paragraph in the main content
        content = soup.find('div', class_='mw-parser-output')
        if content:
            for para in content.find_all('p'):
                if para.text.strip():
                    return para.text.strip()
        return "No Wikipedia summary found."
    except requests.RequestException as e:
        return f"Error fetching Wikipedia summary: {str(e)}"

def arxiv_papers(query):
    papers = []
    for i in range(1, 4):
        papers.append((
            f"Simulated Paper Title {i} on {query}",
            f"This is a simulated abstract for paper {i} discussing {query}.",
            f"https://arxiv.org/abs/1234.{random.randint(1000,9999)}"
        ))
    return papers

def semantic_scholar_papers(query):
    papers = []
    for i in range(1, 4):
        papers.append((
            f"Simulated Research Title {i} on {query}",
            f"Simulated research abstract {i} on the topic of {query}.",
            f"https://semanticscholar.org/paper/{random.randint(100000,999999)}"
        ))
    return papers

def youtube_transcript(query):
    sentences = [
        f"In this video, we discuss {query} in depth.",
        f"The basics of {query} are explained thoroughly.",
        f"We move into advanced techniques related to {query}.",
        f"Finally, we wrap up the session with key takeaways about {query}."
    ]
    transcript = " ".join(sentences)
    return transcript

def fetch_information_simulated(query):
    start_time = time.time()
    info = {
        'Bing_Web_Results': bing_results(query),
        'Wikipedia_Summary': wikipedia_summary(query),
        'Arxiv_Papers': arxiv_papers(query),
        'Semantic_Scholar_Papers': semantic_scholar_papers(query),
        'YouTube_Transcript': youtube_transcript(query)
    }
    elapsed_time = time.time() - start_time
    print(f"\n(Simulation completed in {elapsed_time:.2f} seconds)")
    return info

In [None]:
#to store retrieved data into vector database
def store_to_chroma(info_dict, query, collection_name="learning_resources"):
    try:
        client = chromadb.PersistentClient(path="./chroma_db")
        collection = client.get_or_create_collection(collection_name)

        documents = []
        metadatas = []
        ids = []

        counter = 0
        for source, content in info_dict.items():
            if isinstance(content, list):
                for item in content:
                    text = " ".join(item) if isinstance(item, tuple) else str(item)
                    documents.append(text)
                    metadatas.append({"query": query, "source": source})
                    ids.append(f"{source}_{counter}")
                    counter += 1
            else:
                documents.append(str(content))
                metadatas.append({"query": query, "source": source})
                ids.append(f"{source}_{counter}")
                counter += 1

        collection.add(documents=documents, metadatas=metadatas, ids=ids)
        print("Data stored in ChromaDB successfully.")
    except Exception as e:
        print(f"Error storing to ChromaDB: {str(e)}")

In [None]:
# conversational system to clarify
def ask(prompt):
    print(f"Bot: {prompt}")
    return input("You: ").strip()

def run_chatbot():
    print("Hi! Let me personalize your learning.")
    user_data = {}
    user_data["interest"] = ask("What topic are you interested in?")
    level = ask("How familiar are you? (Beginner/Intermediate/Expert)")
    user_data["knowledge"] = level if level in ["Beginner", "Intermediate", "Expert"] else "Beginner"
    user_data["format"] = ask("Preferred format? (Videos/Articles/Quizzes/Projects)")
    print(f"\nYour preferences:\n- Interest: {user_data['interest']}\n- Level: {user_data['knowledge']}\n- Format: {user_data['format']}")
    return user_data

In [None]:
#to generate report
def generate_structured_report(query, collection_name="learning_resources", model="llama3-8b-8192"):
    groq_api_key = os.getenv("GROQ_API_KEY")
    if not groq_api_key:
        raise ValueError("GROQ_API_KEY not set in environment variables.")

    groq_api_url = "https://api.groq.com/openai/v1/chat/completions"

    try:
        client = chromadb.PersistentClient(path="./chroma_db")
        collection = client.get_or_create_collection(name=collection_name)

        results = collection.query(query_texts=[query], n_results=10)
        relevant_docs = results.get('documents', [[]])[0]
        combined_context = "\n\n".join(relevant_docs) if relevant_docs else "No relevant documents found."

        prompt = f"""
You are a learning assistant. Based on the following educational content, generate a detailed, structured report. Include:

1. Introduction: Explain the topic and why it's important.
2. Learning Objectives: What the user will understand by the end.
3. Core Concepts: Break down main ideas with logical progression.
4. Visual Aids: Include at least one diagram (use Mermaid syntax if applicable).
5. Citations: Reference relevant papers and sources.
6. Next Steps: Suggest further reading or advanced topics.

Query: {query}

Educational Content:
{combined_context}
"""

        payload = {
            "model": model,
            "messages": [{"role": "user", "content": prompt}],
            "temperature": 0.7
        }

        headers = {
            "Authorization": f"Bearer {groq_api_key}",
            "Content-Type": "application/json"
        }

        response = requests.post(groq_api_url, json=payload, headers=headers)
        response.raise_for_status()

        return response.json().get('choices', [{}])[0].get('message', {}).get('content', "No text generated.")
    except requests.RequestException as e:
        raise Exception(f"Error from Groq API: {str(e)}")
    except Exception as e:
        raise Exception(f"Error generating report: {str(e)}")


In [None]:
# main function
def main():
    try:
        # Optionally run chatbot to collect user preferences
        user_data = run_chatbot()
        query = generate_query()
        info = fetch_information_simulated(query)
        store_to_chroma(info, query)
        report = generate_structured_report(query)
        print("\nGenerated Report:\n")
        print(report)
    except Exception as e:
        print(f"Error: {str(e)}")

if __name__ == "__main__":
    main()

Hi! Let me personalize your learning.
Bot: What topic are you interested in?
You: quantum mechanics
Bot: How familiar are you? (Beginner/Intermediate/Expert)
You: beginner
Bot: Preferred format? (Videos/Articles/Quizzes/Projects)
You: text

Your preferences:
- Interest: quantum mechanics
- Level: Beginner
- Format: text
Enter the learning topic: quantum mechanics
Enter your learning objective: deep knowledge

(Simulation completed in 1.83 seconds)


/root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:18<00:00, 4.39MiB/s]


Data stored in ChromaDB successfully.

Generated Report:

**Report: How to Learn Quantum Mechanics to Achieve Deep Knowledge**

**Introduction**

Quantum mechanics is a fundamental concept in modern physics that has far-reaching implications for our understanding of the universe. However, the subject can be daunting for many students and researchers, requiring a deep understanding of complex mathematical and theoretical concepts. In this report, we will explore the key strategies for learning quantum mechanics and achieving a deep understanding of the subject.

**Learning Objectives**

By the end of this report, the reader will be able to:

1. Understand the basics of quantum mechanics and its importance in modern physics.
2. Learn advanced techniques for studying quantum mechanics, including the use of mathematical models and computational simulations.
3. Apply the principles of quantum mechanics to real-world problems and experiments.

**Core Concepts**

The following core concepts a