In [6]:
%pip install -q nbformat
import nbformat as nbf

print("üìÇ 'Academic_Truth_Engine.ipynb' has been created! You can now download it from your file menu.")

# Create the notebook object
nb = nbf.v4.new_notebook()
cells = []

# --- Step 0: The PowerShell Foundation ---
cells.append(nbf.v4.new_markdown_cell(
    "# üìì The Academic Truth Engine\n"
    "**Goal:** 80%+ Accuracy in Leadership & Statistics\n"
    "**System:** IBM Granite 3.0 via Replicate\n\n"
    "### Step 0: Initializing the Environment\n"
    "This project was initialized via **Windows PowerShell** to ensure local kernel stability.\n"
    "- **Command:** `jupyter notebook`"
))

# --- Step 1: Tool Installation ---
cells.append(nbf.v4.new_code_cell(
    "# Step 1: Install Academic Research Tools\n"
    "%pip install -q llama-index llama-index-llms-replicate llama-index-embeddings-huggingface \\\n"
    "  llama-index-readers-file llama-index-packs-fusion-retriever sentence-transformers \\\n"
    "  nest-asyncio requests replicate\n\n"
    "import nest_asyncio\n"
    "nest_asyncio.apply()\n"
    "print('‚úÖ Tools installed successfully')"
))

# --- Step 2: Granite 3.0 Logic ---
cells.append(nbf.v4.new_code_cell(
    "# Step 2: Configure IBM Granite & Guardrails\n"
    "import os\n"
    "from getpass import getpass\n"
    "from llama_index.core import Settings\n"
    "from llama_index.llms.replicate import Replicate\n"
    "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n\n"
    "# Replicate API Key handles the connection to Granite 3.0\n"
    "os.environ['REPLICATE_API_TOKEN'] = getpass('Enter your REPLICATE_API_KEY: ')\n\n"
    "llm = Replicate(\n"
    "    model='ibm-granite/granite-3.0-8b-instruct',\n"
    "    temperature=0.1,  # Low temperature for high accuracy\n"
    "    system_prompt=(\n"
    "        'You are an expert academic RAG system. Answer ONLY using the provided context. '\n"
    "        'Never hallucinate. Never editorialize. If the answer is not in the context, say so. '\n"
    "        'Provide factual responses with 2-4 evidence bullets.'\n"
    "    )\n"
    ")\n"
    "Settings.llm = llm\n"
    "Settings.embed_model = HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5')\n"
    "print('‚úÖ Granite 3.0 Guardrails Active')"
))

# --- Step 3: Google Drive Ingestion ---
cells.append(nbf.v4.new_code_cell(
    "# Step 3: PDF Ingestion from Google Drive\n"
    "import requests, re\n\n"
    "def download_pdf(url, path):\n"
    "    match = re.search(r'/d/([A-Za-z0-9_-]+)', url)\n"
    "    f_id = match.group(1) if match else re.search(r'id=([A-Za-z0-9_-]+)', url).group(1)\n"
    "    d_url = f'https://drive.google.com/uc?export=download&id={f_id}'\n"
    "    r = requests.get(d_url)\n"
    "    with open(path, 'wb') as f: f.write(r.content)\n"
    "    print(f'‚úÖ PDF Secured: {path}')\n\n"
    "drive_link = input('üìå Paste Google Drive Link: ').strip()\n"
    "os.makedirs('data', exist_ok=True)\n"
    "pdf_path = 'data/source.pdf'\n"
    "download_pdf(drive_link, pdf_path)"
))

# --- Step 4: Semantic Chunking ---
cells.append(nbf.v4.new_markdown_cell(
    "### Step 4: Semantic Chunking\n"
    "We use semantic splitting to ensure that leadership theories and statistical data stay intact."
))
cells.append(nbf.v4.new_code_cell(
    "from llama_index.core import SimpleDirectoryReader\n"
    "from llama_index.core.node_parser import SemanticSplitterNodeParser\n\n"
    "docs = SimpleDirectoryReader(input_files=[pdf_path]).load_data()\n"
    "parser = SemanticSplitterNodeParser(buffer_size=3, breakpoint_percentile_threshold=95, embed_model=Settings.embed_model)\n"
    "nodes = parser.get_nodes_from_documents(docs)\n"
    "for n in nodes: n.metadata['source'] = 'source.pdf'\n"
    "print(f'üîç Created {len(nodes)} high-fidelity semantic nodes.')"
))

# --- Step 5: Query Fusion ---
cells.append(nbf.v4.new_markdown_cell(
    "### Step 5: Query Fusion\n"
    "This engine rewrites your question 6 times to ensure no fact is missed."
))
cells.append(nbf.v4.new_code_cell(
    "from llama_index.core.llama_pack import download_llama_pack\n"
    "QueryRewritingRetrieverPack = download_llama_pack('QueryRewritingRetrieverPack', './query_rewriting_pack')\n"
    "query_rewriting_pack = QueryRewritingRetrieverPack(nodes, chunk_size=256, vector_similarity_top_k=5, num_queries=6)\n"
    "print('üöÄ Query Fusion Engine Live')"
))

# --- Step 6: Interactive Loop ---
cells.append(nbf.v4.new_code_cell(
    "# Step 6: Final Academic Research Loop\n"
    "print('--- RESEARCH MODE ACTIVE (Type \"end\" to exit) ---')\n"
    "while True:\n"
    "    q = input('\\nüü¶ Research Question: ').strip()\n"
    "    if q.lower() == 'end': break\n"
    "    ans = query_rewriting_pack.run(q)\n"
    "    print(f'\\nüß† FACTUAL ANALYSIS:\\n{ans}')\n"
    "    print('\\nüìç SOURCE: Absolute Reference to source.pdf')"
))

# Write to file
# ...existing code...
nb['cells'] = cells
with open('Academic_Truth_Engine.ipynb', 'w', encoding='utf-8') as f:
    nbf.write(nb, f)
# ...existing code...
print("üìÇ 'Academic_Truth_Engine.ipynb' has been created! You can now download it from your file menu.")

Note: you may need to restart the kernel to use updated packages.
üìÇ 'Academic_Truth_Engine.ipynb' has been created! You can now download it from your file menu.
üìÇ 'Academic_Truth_Engine.ipynb' has been created! You can now download it from your file menu.



[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip
