# =========================================================
# MUKTHI GURU: FINAL VISUALIZATION EDITION (3D + Interactive)
# =========================================================
# Run this notebook in Google Colab (with GPU Runtime).

In [None]:
# ---------------------------------------------------------
# 1. INSTALLATION & SETUP
# ---------------------------------------------------------
print("‚è≥ Installing dependencies... (Allow ~2-3 mins)")

# A. Install System Dependencies
!apt-get update -qq && apt-get install -y nodejs ffmpeg > /dev/null

# B. Install Python Libraries (Added plotly for 3D)
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" --quiet
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes --quiet
!pip install -q qdrant-client sentence-transformers youtube-transcript-api yt-dlp faster-whisper matplotlib scikit-learn pandas plotly


In [None]:
# ---------------------------------------------------------
# 2. DRIVE PERSISTENCE
# ---------------------------------------------------------
try:
    from google.colab import drive
    import os

    drive.mount('/content/drive', force_remount=True)
    QDRANT_PATH = "/content/drive/MyDrive/mukthi_qdrant_visual_v1" 
except ImportError:
    print("‚ö†Ô∏è Google Colab not detected. Skipping Drive mount.")
    QDRANT_PATH = "./mukthi_qdrant_visual_v1"

if not os.path.exists(QDRANT_PATH):
    os.makedirs(QDRANT_PATH, exist_ok=True)


In [None]:
# ---------------------------------------------------------
# 3. IMPORTS
# ---------------------------------------------------------
import torch
import time
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import plotly.express as px  # THE 3D WIZARD

try:
    from unsloth import FastLanguageModel
except ImportError:
    print("‚ö†Ô∏è 'unsloth' package not found. Model loading will fail.")
    FastLanguageModel = None # Stub

from sentence_transformers import SentenceTransformer, CrossEncoder
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
from faster_whisper import WhisperModel
from youtube_transcript_api import YouTubeTranscriptApi as YTApi
import yt_dlp

# Force GPU Usage
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"üöÄ Acceleration Status: {DEVICE.upper()}")


In [None]:
# ---------------------------------------------------------
# 4. LOAD MODELS
# ---------------------------------------------------------

# A. Load Llama-3 (Unsloth 4-bit)
print("\nü¶ô Loading Llama-3 8B (4-bit)...")
if FastLanguageModel:
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "unsloth/llama-3-8b-Instruct-bnb-4bit",
        max_seq_length = 2048,
        dtype = None,
        load_in_4bit = True,
    )
    FastLanguageModel.for_inference(model)
else:
    print("‚ùå FastLanguageModel/Unsloth not available.")
    model, tokenizer = None, None

# B. Load SOTA Retrieval Models
print("üß† Loading BGE Embeddings & Reranker...")
embed_model = SentenceTransformer("BAAI/bge-base-en-v1.5", device=DEVICE)
reranker = CrossEncoder("BAAI/bge-reranker-base", device=DEVICE)

# C. Load Whisper
whisper_model = WhisperModel("base", device=DEVICE, compute_type="float16")

# D. Connect to Qdrant
print(f"üóÑÔ∏è Connected to Knowledge Base at: {QDRANT_PATH}")
client = QdrantClient(path=QDRANT_PATH)

COLLECTION_NAME = "mukthi_teachings_visual"

# Ensure Collection Exists
collections = client.get_collections().collections
if COLLECTION_NAME not in [c.name for c in collections]:
    client.create_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=VectorParams(size=768, distance=Distance.COSINE),
    )
    print(f"‚úÖ Created new collection '{COLLECTION_NAME}'.")
else:
    print(f"‚úÖ Loaded existing collection '{COLLECTION_NAME}'.")

print("‚úÖ System Fully Loaded.")


In [None]:
# ---------------------------------------------------------
# 5. INGESTION FUNCTIONS
# ---------------------------------------------------------

def download_audio(url):
    audio_file = os.path.join(QDRANT_PATH, "audio.mp3")
    if os.path.exists(audio_file):
        os.remove(audio_file)
        
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': os.path.join(QDRANT_PATH, "audio"), 
        'quiet': True,
        'postprocessors': [{'key': 'FFmpegExtractAudio','preferredcodec': 'mp3','preferredquality': '192'}],
    }
    
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
        
    return audio_file

def get_transcript(url):
    if "v=" in url:
        video_id = url.split("v=")[1].split("&")[0]
    else:
        print("‚ö†Ô∏è Invalid URL format.")
        return ""
    
    try:
        transcript = YTApi.get_transcript(video_id)
        print("‚úÖ Found official captions.")
        return " ".join([t['text'] for t in transcript])
    except:
        print("‚ö†Ô∏è Captions unavailable. Switching to Whisper...")

    try:
        audio_path = download_audio(url)
        segments, _ = whisper_model.transcribe(audio_path, beam_size=1)
        return " ".join([seg.text for seg in segments])
    except Exception as e:
        print(f"‚ùå Transcription failed: {e}")
        return ""

def ingest_youtube(url):
    print(f"üì• Processing: {url}")
    text = get_transcript(url)
    
    if not text: return

    chunks = []
    chunk_size = 500
    overlap = 50
    for i in range(0, len(text), chunk_size - overlap):
        chunks.append(text[i:i+chunk_size])
        
    print(f"üß© Embedding {len(chunks)} chunks...")
    
    embeddings = embed_model.encode(chunks, normalize_embeddings=True, show_progress_bar=True)
    
    base_id = int(time.time())
    points = [
        PointStruct(id=base_id + i, vector=embeddings[i].tolist(), payload={"text": chunks[i], "source": url})
        for i in range(len(chunks))
    ]
    client.upsert(collection_name=COLLECTION_NAME, points=points)
    print("‚úÖ Ingestion Complete.")


In [None]:
# ---------------------------------------------------------
# 6. 3D VISUALIZATION FUNCTION (INTERACTIVE)
# ---------------------------------------------------------
import plotly.io as pio
pio.renderers.default = 'colab' # Changed back for Colab use

def visualize_knowledge():
    print("\nüé® Generating Interactive 3D Brain Map...")
    
    # 1. Fetch Data
    records, _ = client.scroll(
        collection_name=COLLECTION_NAME,
        limit=500, 
        with_payload=True,
        with_vectors=True
    )
    
    if len(records) < 3:
        print("‚ö†Ô∏è Not enough data. Ingest a video first!")
        return

    # 2. Process Data
    vectors = np.array([r.vector for r in records])
    texts = [r.payload['text'][:100] + "..." for r in records] # Shorten text
    
    # 3. Reduce to 3D
    pca = PCA(n_components=3)
    components = pca.fit_transform(vectors)
    
    df = pd.DataFrame({
        'x': components[:, 0],
        'y': components[:, 1],
        'z': components[:, 2],
        'content': texts
    })
    
    # 4. Create Plot
    fig = px.scatter_3d(
        df, x='x', y='y', z='z',
        hover_name='content', # Shows text when you hover!
        color='z',            # Colors points by depth
        opacity=0.7,
        title=f"Mukthi Guru Brain Map ({len(records)} Memories)",
        template="plotly_dark" # Dark mode looks better in Colab
    )
    
    fig.update_traces(marker=dict(size=5))
    fig.show() # This should now work!


In [None]:
# ---------------------------------------------------------
# 7. RAG PIPELINE
# ---------------------------------------------------------

def get_guru_response(question):
    # Retrieval
    query_instruction = "Represent this sentence for searching relevant passages: "
    q_vec = embed_model.encode(query_instruction + question, normalize_embeddings=True)
    
    search_result = client.query_points(collection_name=COLLECTION_NAME, query=q_vec, limit=10)
    docs = [point.payload['text'] for point in search_result.points]
    
    if not docs: return "I have no knowledge yet."

    # Reranking
    pairs = [[question, doc] for doc in docs]
    scores = reranker.predict(pairs)
    
    top_results = []
    for doc, score in zip(docs, scores):
        if score > -2.0: top_results.append((doc, score))
            
    top_results = sorted(top_results, key=lambda x: x[1], reverse=True)[:3]
    
    if not top_results: return "I am sorry, but I do not have enough knowledge on this topic yet."

    context_text = "\n\n".join([f"Teaching: {res[0]}" for res in top_results])

    # Generation
    prompt_template = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are AskMukthiGuru, a compassionate spiritual guide based on the teachings of Sri Preethaji and Sri Krishnaji.
Context: {context_text}
User Question: {question}
Answer:<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

    if not model or not tokenizer:
         return "I cannot answer because the model is not loaded (Unsloth requires GPU/Colab)."

    inputs = tokenizer([prompt_template], return_tensors="pt").to(DEVICE)
    
    outputs = model.generate(
        **inputs, 
        max_new_tokens=512, 
        use_cache=True,
        temperature=0.3, 
        repetition_penalty=1.1
    )
    
    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    return response.split("assistant")[-1].strip()


In [None]:
# =========================================================
# MAIN MENU
# =========================================================

print("\nüïâÔ∏è AskMukthiGuru System Ready!")

while True:
    print("\n---------------- MENU ----------------")
    print("1. Ingest YouTube Video")
    print("2. üß† Visualize 3D Brain Map")
    print("3. Chat with Guru")
    print("4. Exit")
    choice = input("Enter choice (1-4): ")

    if choice == '1':
        ingest_youtube(input("Enter YouTube URL: "))
    elif choice == '2':
        visualize_knowledge()
    elif choice == '3':
        print("\n--- Chat Started (Type 'back' to exit) ---")
        while True:
            q = input("You: ")
            if q.lower() in ['back', 'exit']: break
            print("Guru is thinking...")
            try: print(f"\nMukthi Guru: {get_guru_response(q)}\n")
            except Exception as e: print(f"Error: {e}")
    elif choice == '4':
        print("Namaste.")
        break
