In [56]:
# ============================================================================
# CELL 1: Install Dependencies
# ============================================================================
print("üì¶ Installing dependencies (this may take 2-3 minutes)...")
print("‚ö†Ô∏è  You may see some dependency warnings - these are harmless!\n")

# Install core dependencies
!pip install -q sentence-transformers chromadb --no-warn-conflicts
!pip install -q transformers accelerate bitsandbytes --no-warn-conflicts
!pip install -q google-generativeai --no-warn-conflicts

print("\n‚úÖ All dependencies installed!")
print("üìù Note: Dependency warnings can be ignored - they don't affect functionality.")

üì¶ Installing dependencies (this may take 2-3 minutes)...


‚úÖ All dependencies installed!


In [57]:
# ============================================================================
# CELL 2: Import Libraries & Check GPU
# ============================================================================
import json
import numpy as np
import torch
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
from google.colab import files
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
import google.generativeai as genai
from getpass import getpass
import warnings
warnings.filterwarnings('ignore')

# Check GPU availability
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"‚úÖ GPU Available: {gpu_name}")
    print(f"üíæ GPU Memory: {gpu_memory:.1f} GB")
else:
    print("‚ö†Ô∏è  No GPU detected. Please enable GPU: Runtime > Change runtime type > T4 GPU")

print("‚úÖ Libraries imported successfully!")


‚úÖ GPU Available: Tesla T4
üíæ GPU Memory: 15.8 GB
‚úÖ Libraries imported successfully!


In [58]:
# ============================================================================
# CELL 3: Upload and Load Data
# ============================================================================
print("üìÅ Please upload your rupp-data.txt file...")
uploaded = files.upload()

# Load the JSON data
filename = list(uploaded.keys())[0]
with open(filename, 'r', encoding='utf-8') as f:
    raw_data = json.load(f)

print(f"\n‚úÖ Loaded {len(raw_data)} Q&A pairs")
print(f"üìä Categories found: {len(set(item['category'] for item in raw_data))}")
print(f"üìÇ Categories: {', '.join(sorted(set(item['category'] for item in raw_data)))}")
print("\nüîç Sample entry:")
print(json.dumps(raw_data[0], indent=2))


üìÅ Please upload your rupp-data.txt file...


Saving rupp-data.txt to rupp-data (1).txt

‚úÖ Loaded 256 Q&A pairs
üìä Categories found: 108
üìÇ Categories: academic, academic_calendar_detail, academic_integrity, academic_probation, academic_support, adding_dropping, admission_process, admissions, after_graduation, alumni, application_process, attendance_policy, biology_program, campus, career_prep, ceremonies, changing_programs, chemistry_program, community, computer_science_detailed, contact, contact_departments, contact_specific, continuing_education, cost_living, course_load, credits, departments, development, disability, diversity, documents, double_major, electives, employment, engineering_detailed, english_detailed, english_requirements, environment, exam_format, exams, extracurricular, facilities_detail, faculty, famous_alumni, fees, fees_scholarship, financial, food, future_plans, general_info, geography_program, grade_disputes, graduation, graduation_requirements, history, history_program, homework, housing_detail, inte

In [59]:
# ============================================================================
# CELL 4: Data Processing
# ============================================================================
def process_documents(data):
    """Convert raw data into structured documents"""
    documents = []
    for idx, item in enumerate(data):
        doc = {
            'id': f"doc_{idx}",
            'text': f"Question: {item['question']}\nAnswer: {item['answer']}",
            'metadata': {
                'category': item['category'],
                'question': item['question'],
                'answer': item['answer'],
                'doc_id': idx
            }
        }
        documents.append(doc)
    return documents

# Process documents
documents = process_documents(raw_data)
print(f"‚úÖ Processed {len(documents)} documents")
print(f"\nüìÑ Sample processed document:")
print(f"ID: {documents[0]['id']}")
print(f"Text preview: {documents[0]['text'][:150]}...")
print(f"Category: {documents[0]['metadata']['category']}")

‚úÖ Processed 256 documents

üìÑ Sample processed document:
ID: doc_0
Text preview: Question: What is RUPP?
Answer: RUPP (Royal University of Phnom Penh) is Cambodia's oldest and largest national research university, established in 19...
Category: general_info


In [60]:
# ============================================================================
# CELL 5: Initialize Embedding Model (sentence-transformers)
# ============================================================================
print("ü§ñ Loading embedding model: all-MiniLM-L6-v2...")
print("‚è≥ This will take 10-20 seconds...")
embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
print("‚úÖ Embedding model loaded!")

# Test the embedder
test_embedding = embedder.encode("test query")
print(f"üìä Embedding dimension: {len(test_embedding)}")
print(f"üìè Model size: ~80MB")
print(f"‚ö° Speed: ~1000 sentences/second")

ü§ñ Loading embedding model: all-MiniLM-L6-v2...
‚è≥ This will take 10-20 seconds...
‚úÖ Embedding model loaded!
üìä Embedding dimension: 384
üìè Model size: ~80MB
‚ö° Speed: ~1000 sentences/second


In [61]:
# ============================================================================
# CELL 6: Generate Embeddings for All Documents
# ============================================================================
def generate_embeddings(documents, embedder):
    """Generate embeddings for all documents"""
    texts = [doc['text'] for doc in documents]
    print(f"üîÑ Generating embeddings for {len(texts)} documents...")

    embeddings = embedder.encode(
        texts,
        batch_size=32,
        show_progress_bar=True,
        normalize_embeddings=True,
        convert_to_numpy=True
    )

    return embeddings

# Generate embeddings
embeddings = generate_embeddings(documents, embedder)
print(f"‚úÖ Generated embeddings with shape: {embeddings.shape}")
print(f"üíæ Memory usage: ~{embeddings.nbytes / 1e6:.2f} MB")

üîÑ Generating embeddings for 256 documents...


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

‚úÖ Generated embeddings with shape: (256, 384)
üíæ Memory usage: ~0.39 MB


In [62]:
# ============================================================================
# CELL 7: Setup ChromaDB Vector Store
# ============================================================================
print("üóÑÔ∏è  Setting up ChromaDB vector database...")

# Initialize ChromaDB client
client = chromadb.Client(Settings(
    anonymized_telemetry=False,
    allow_reset=True
))

# Create or reset collection
collection_name = "rupp_qa"
try:
    client.delete_collection(collection_name)
    print("üóëÔ∏è  Cleared existing collection")
except:
    pass

collection = client.create_collection(
    name=collection_name,
    metadata={"hnsw:space": "cosine"}
)

# Add documents to collection
print("üíæ Adding documents to vector store...")
collection.add(
    embeddings=embeddings.tolist(),
    documents=[doc['text'] for doc in documents],
    metadatas=[doc['metadata'] for doc in documents],
    ids=[doc['id'] for doc in documents]
)

print(f"‚úÖ Vector store created with {collection.count()} documents!")
print(f"üîç Search algorithm: HNSW (fast approximate search)")

üóÑÔ∏è  Setting up ChromaDB vector database...
üóëÔ∏è  Cleared existing collection
üíæ Adding documents to vector store...
‚úÖ Vector store created with 256 documents!
üîç Search algorithm: HNSW (fast approximate search)


In [63]:
# ============================================================================
# FLEXIBLE STAFF RETRIEVAL FUNCTION
# ============================================================================
def retrieve_staff_flex(query, n_results=3, category_filter=None):
    """
    Retrieve top staff entries for a query.

    Parameters:
    - query: str, the question or search string
    - n_results: int, number of top results to return
    - category_filter: str or None, if set only searches that category

    Returns:
    - List of dicts with text, similarity, and category
    """

    # Generate query embedding
    query_embedding = embedder.encode([query], normalize_embeddings=True)[0]

    # Build filter if category is specified
    where_clause = {"category": category_filter} if category_filter else None

    # Search vector store
    results = collection.query(
        query_embeddings=[query_embedding.tolist()],
        n_results=n_results,
        where=where_clause
    )

    # Collect results
    retrieved = []
    for i in range(len(results['documents'][0])):
        retrieved.append({
            "text": results['documents'][0][i],
            "similarity": 1 - results['distances'][0][i] if results['distances'] else None,
            "category": results['metadatas'][0][i].get("category", "unknown"),
            "source_file": results['metadatas'][0][i].get("source_file", "unknown"),
            "chunk_index": results['metadatas'][0][i].get("chunk_index", 0),
            "total_chunks": results['metadatas'][0][i].get("total_chunks", 0)
        })

    return retrieved


In [64]:
# Authenticate with HuggingFace
from huggingface_hub import login
from getpass import getpass

token = getpass("Enter your HuggingFace token: ")
login(token=token)
print("‚úÖ Authenticated!")

Enter your HuggingFace token: ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑
‚úÖ Authenticated!


In [65]:
# ============================================================================
# CELL 9: Load Llama-3.1-8B-Instruct (Primary LLM)
# ============================================================================
print("ü¶ô Loading Llama-3.1-8B-Instruct...")
print("‚è≥ This will take 2-3 minutes (one-time download)...")
print("üíæ Model will use ~6GB GPU memory with 4-bit quantization")

model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"

# Configure 4-bit quantization to fit in free Colab GPU
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

# Load model with quantization
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.bfloat16
)

print("‚úÖ Llama-3.1-8B loaded successfully!")
print(f"üìä Model size: ~4.5GB (4-bit quantized)")
print(f"‚ö° Expected speed: 40-60 tokens/second")
print(f"üéØ Quality: Near GPT-3.5 level (88% benchmark)")

ü¶ô Loading Llama-3.1-8B-Instruct...
‚è≥ This will take 2-3 minutes (one-time download)...
üíæ Model will use ~6GB GPU memory with 4-bit quantization


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

‚úÖ Llama-3.1-8B loaded successfully!
üìä Model size: ~4.5GB (4-bit quantized)
‚ö° Expected speed: 40-60 tokens/second
üéØ Quality: Near GPT-3.5 level (88% benchmark)


In [66]:
# ============================================================================
# CELL 10: Generate Answer Function with Llama-3.1
# ============================================================================
def generate_answer_llama(query, context_docs, max_tokens=256, temperature=0.3):
    """Generate answer using Llama-3.1-8B"""

    # Build context from retrieved documents
    context = "\n\n".join([
        f"Reference {i+1}:\n{doc['text']}"
        for i, doc in enumerate(context_docs)
    ])

    # Create chat messages in Llama-3.1 format
    messages = [
        {
            "role": "system",
            "content": "You are a helpful academic advisor at RUPP (Royal University of Phnom Penh). Provide clear, accurate, and professional information based on the context given. Keep answers concise but complete."
        },
        {
            "role": "user",
            "content": f"""Context information from RUPP policies:
{context}

Student Question: {query}

Instructions:
- Provide a clear and accurate answer based ONLY on the context above
- If the context doesn't contain enough information, say so
- Keep the tone professional and helpful
- Be specific and cite relevant policies when applicable"""
        }
    ]

    # Format prompt using Llama-3.1 chat template
    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    # Tokenize
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    # Generate
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    # Decode response
    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the assistant's response (after the last "assistant" marker)
    if "assistant" in full_response:
        response = full_response.split("assistant")[-1].strip()
    else:
        response = full_response.strip()

    return response

print("‚úÖ Llama-3.1 generation function ready!")

# Test generation
print("\nüß™ Testing Llama-3.1 generation...")
test_context = retrieve_context("Can I take fewer courses?", n_results=2)
test_answer = generate_answer_llama("Can I study part-time?", test_context, max_tokens=150)
print(f"\nüí¨ Test Answer:\n{test_answer}")

‚úÖ Llama-3.1 generation function ready!

üß™ Testing Llama-3.1 generation...

üí¨ Test Answer:
According to RUPP's policies, part-time enrollment with fewer courses may be possible. However, this would extend your graduation time. If you are interested in taking fewer courses, I recommend checking with the Studies Office to discuss your part-time status and any implications for scholarships.


In [67]:
# ============================================================================
# CELL 12: Complete RAG Pipeline
# ============================================================================
def rag_query(user_query, category=None, n_results=3, use_gemini=False, max_tokens=256):
    """Complete RAG pipeline with Llama-3.1 or Gemini"""

    print(f"\n{'='*70}")
    print(f"üîç Query: {user_query}")
    if category:
        print(f"üìÇ Category filter: {category}")
    print(f"{'='*70}\n")

    # Step 1: Retrieve relevant context
    print("üìö Step 1: Retrieving relevant documents...")
    context_docs = retrieve_context(
        query=user_query,
        n_results=n_results,
        category_filter=category
    )

    print(f"‚úÖ Retrieved {len(context_docs)} documents")
    for i, doc in enumerate(context_docs, 1):
        sim_score = 1 - doc['distance']
        print(f"   {i}. [{doc['metadata']['category']}] Similarity: {sim_score:.3f}")

    # Step 2: Generate answer
    print(f"\nü§ñ Step 2: Generating answer with {'Gemini' if use_gemini else 'Llama-3.1'}...")

    import time
    start_time = time.time()

    if use_gemini and generate_answer_gemini:
        answer = generate_answer_gemini(user_query, context_docs)
    else:
        answer = generate_answer_llama(user_query, context_docs, max_tokens=max_tokens)

    generation_time = time.time() - start_time

    print(f"‚úÖ Answer generated in {generation_time:.2f} seconds")

    # Step 3: Format response
    result = {
        'query': user_query,
        'answer': answer,
        'sources': [
            {
                'category': doc['metadata']['category'],
                'question': doc['metadata']['question'],
                'similarity': 1 - doc['distance']
            }
            for doc in context_docs
        ],
        'num_sources': len(context_docs),
        'generation_time': generation_time,
        'model_used': 'Gemini' if use_gemini else 'Llama-3.1-8B'
    }

    return result

print("‚úÖ Complete RAG pipeline ready!")

‚úÖ Complete RAG pipeline ready!


In [68]:
# ============================================================================
# CELL 13: Test RAG System with Multiple Queries
# ============================================================================
# Test queries covering different categories
test_queries = [
    "Can I take fewer courses?",
    "Are there summer classes?",
    "What happens if I fail a course?",
    "How do I withdraw from a class?",
]

print("üß™ TESTING RAG SYSTEM")
print("="*70)

for i, query in enumerate(test_queries, 1):
    print(f"\n{'#'*70}")
    print(f"TEST {i}/{len(test_queries)}")
    print(f"{'#'*70}")

    result = rag_query(query, use_gemini=False, n_results=3, max_tokens=200)

    print(f"\nüí¨ ANSWER:")
    print(result['answer'])

    print(f"\nüìñ SOURCES ({result['num_sources']}):")
    for j, source in enumerate(result['sources'], 1):
        print(f"   {j}. [{source['category']}] {source['question'][:60]}...")
        print(f"      Similarity: {source['similarity']:.3f}")

    print(f"\n‚è±Ô∏è  Generation time: {result['generation_time']:.2f}s")
    print(f"ü§ñ Model: {result['model_used']}")

print("\n" + "="*70)
print("‚úÖ All tests completed!")

üß™ TESTING RAG SYSTEM

######################################################################
TEST 1/4
######################################################################

üîç Query: Can I take fewer courses?

üìö Step 1: Retrieving relevant documents...
‚úÖ Retrieved 3 documents
   1. [course_load] Similarity: 0.761
   2. [course_load] Similarity: 0.628
   3. [attendance_policy] Similarity: 0.579

ü§ñ Step 2: Generating answer with Llama-3.1...
‚úÖ Answer generated in 16.24 seconds

üí¨ ANSWER:
Based on the provided context, it appears that part-time enrollment with fewer courses is possible, but it may extend your graduation time. To confirm this, I recommend checking with the Studies Office about part-time status and any implications for scholarships.

Please note that taking too many courses can hurt your grades and understanding, and excessive absences can result in being barred from taking final exams or failing the course. However, the context does not provide specific 

In [69]:
# ============================================================================
# CELL 14: Interactive Chat Interface
# ============================================================================
def interactive_rag():
    """Interactive query interface for RUPP chatbot"""
    print("\n" + "="*70)
    print("üéì RUPP Q&A CHATBOT - Interactive Mode")
    print("="*70)
    print("Commands:")
    print("  ‚Ä¢ Type your question to get an answer")
    print("  ‚Ä¢ 'categories' - Show all available categories")
    print("  ‚Ä¢ 'stats' - Show system statistics")
    print("  ‚Ä¢ 'switch' - Switch between Llama and Gemini")
    print("  ‚Ä¢ 'quit' - Exit the chatbot")
    print("="*70 + "\n")

    # Get available categories
    categories = sorted(set(doc['metadata']['category'] for doc in documents))
    use_gemini = False

    while True:
        try:
            user_input = input("\n‚ùì Your question: ").strip()

            if not user_input:
                continue

            if user_input.lower() == 'quit':
                print("üëã Thank you for using RUPP Q&A Chatbot. Goodbye!")
                break

            if user_input.lower() == 'categories':
                print(f"\nüìÇ Available categories ({len(categories)}):")
                for i, cat in enumerate(categories, 1):
                    count = sum(1 for d in documents if d['metadata']['category'] == cat)
                    print(f"   {i}. {cat} ({count} Q&As)")
                continue

            if user_input.lower() == 'stats':
                print(f"\nüìä System Statistics:")
                print(f"   ‚Ä¢ Total Q&A pairs: {len(documents)}")
                print(f"   ‚Ä¢ Categories: {len(categories)}")
                print(f"   ‚Ä¢ Embedding model: all-MiniLM-L6-v2 (384d)")
                print(f"   ‚Ä¢ LLM model: {'Gemini Pro' if use_gemini else 'Llama-3.1-8B'}")
                print(f"   ‚Ä¢ Vector DB: ChromaDB (HNSW)")
                continue

            if user_input.lower() == 'switch':
                if generate_answer_gemini:
                    use_gemini = not use_gemini
                    print(f"üîÑ Switched to {'Gemini Pro' if use_gemini else 'Llama-3.1-8B'}")
                else:
                    print("‚ö†Ô∏è  Gemini not configured. Using Llama-3.1 only.")
                continue

            # Ask for optional category filter
            filter_cat = input("üìÇ Filter by category? (press Enter to skip): ").strip()
            category_filter = filter_cat if filter_cat and filter_cat in categories else None

            if filter_cat and filter_cat not in categories and filter_cat != "":
                print(f"‚ö†Ô∏è  Category '{filter_cat}' not found. Searching all categories...")
                category_filter = None

            # Run RAG query
            result = rag_query(
                user_input,
                category=category_filter,
                use_gemini=use_gemini,
                n_results=3,
                max_tokens=250
            )

            print(f"\nüí¨ ANSWER:")
            print(result['answer'])

            print(f"\nüìñ SOURCES:")
            for i, source in enumerate(result['sources'], 1):
                print(f"   {i}. [{source['category']}] {source['question']}")
                print(f"      Relevance: {source['similarity']:.1%}")

            print(f"\n‚è±Ô∏è  Response time: {result['generation_time']:.2f}s")

        except KeyboardInterrupt:
            print("\n\nüëã Interrupted. Goodbye!")
            break
        except Exception as e:
            print(f"\n‚ùå Error: {str(e)}")
            print("Please try again or type 'quit' to exit.")

# Run interactive mode
print("\nüöÄ Starting interactive chatbot...")
interactive_rag()



üöÄ Starting interactive chatbot...

üéì RUPP Q&A CHATBOT - Interactive Mode
Commands:
  ‚Ä¢ Type your question to get an answer
  ‚Ä¢ 'categories' - Show all available categories
  ‚Ä¢ 'stats' - Show system statistics
  ‚Ä¢ 'switch' - Switch between Llama and Gemini
  ‚Ä¢ 'quit' - Exit the chatbot


‚ùì Your question: quit
üëã Thank you for using RUPP Q&A Chatbot. Goodbye!


In [70]:
# ============================================================================
# CELL 15: Evaluation & Performance Metrics
# ============================================================================
def evaluate_retrieval(test_cases):
    """Evaluate retrieval quality"""

    print("\nüìä RETRIEVAL EVALUATION")
    print("="*70)

    total_correct = 0
    results = []

    for test in test_cases:
        query = test['query']
        expected_category = test['expected_category']

        # Retrieve top result
        context_docs = retrieve_context(query, n_results=1)
        retrieved_category = context_docs[0]['metadata']['category']
        similarity = 1 - context_docs[0]['distance']

        is_correct = retrieved_category == expected_category
        total_correct += is_correct

        results.append({
            'query': query,
            'expected': expected_category,
            'retrieved': retrieved_category,
            'correct': is_correct,
            'similarity': similarity
        })

        status = "‚úÖ" if is_correct else "‚ùå"
        print(f"{status} {query[:45]:45} | Expected: {expected_category:15} | Got: {retrieved_category:15} | Sim: {similarity:.3f}")

    accuracy = total_correct / len(test_cases) * 100
    avg_similarity = np.mean([r['similarity'] for r in results])

    print(f"\n{'='*70}")
    print(f"üéØ Retrieval Accuracy: {accuracy:.1f}% ({total_correct}/{len(test_cases)})")
    print(f"üìä Average Similarity: {avg_similarity:.3f}")
    print(f"{'='*70}")

    return results

# Example test cases (customize with your actual data)
example_test_cases = [
    {'query': 'Can I take fewer courses?', 'expected_category': 'course_load'},
    {'query': 'Are there summer classes?', 'expected_category': 'summer_courses'},
]

print("\nüí° To run evaluation, create test cases and call:")
print("eval_results = evaluate_retrieval(your_test_cases)")


üí° To run evaluation, create test cases and call:
eval_results = evaluate_retrieval(your_test_cases)


In [71]:
# ============================================================================
# CELL 16: Save & Export Results
# ============================================================================
def save_conversation(queries_and_answers, filename='rag_conversation.json'):
    """Save Q&A results to file and download"""

    # Prepare data for export
    export_data = {
        'system_info': {
            'embedding_model': 'sentence-transformers/all-MiniLM-L6-v2',
            'llm_model': 'meta-llama/Meta-Llama-3.1-8B-Instruct',
            'vector_db': 'ChromaDB',
            'total_documents': len(documents),
            'retrieval_k': 3
        },
        'conversations': queries_and_answers
    }

    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(export_data, f, indent=2, ensure_ascii=False)

    print(f"‚úÖ Results saved to {filename}")

    # Download file
    files.download(filename)
    print(f"üì• File downloaded!")

def export_to_csv(results, filename='rag_results.csv'):
    """Export results to CSV format"""
    import csv

    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Query', 'Answer', 'Model', 'Time', 'Top_Category', 'Similarity'])

        for r in results:
            writer.writerow([
                r['query'],
                r['answer'],
                r['model_used'],
                f"{r['generation_time']:.2f}",
                r['sources'][0]['category'] if r['sources'] else 'N/A',
                f"{r['sources'][0]['similarity']:.3f}" if r['sources'] else 'N/A'
            ])

    print(f"‚úÖ Exported to {filename}")
    files.download(filename)

print("\nüí° To save your results:")
print("save_conversation(your_results)")
print("export_to_csv(your_results)")


üí° To save your results:
save_conversation(your_results)
export_to_csv(your_results)


In [72]:
# ============================================================================
# CELL 17: Install FastAPI & Expose to Public
# ============================================================================
!pip install -q fastapi uvicorn pyngrok python-multipart
print("‚úÖ FastAPI dependencies installed!")


‚úÖ FastAPI dependencies installed!


In [73]:
# ============================================================================
# CELL 18: FastAPI Server for Next.js
# ============================================================================
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Optional
import uvicorn
from pyngrok import ngrok
import nest_asyncio

# Allow nested event loops (required for Colab)
nest_asyncio.apply()

# Initialize FastAPI
app = FastAPI(title="RUPP RAG API", version="1.0.0")

# Enable CORS for Next.js
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # In production, specify your domain
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Request/Response Models
class QueryRequest(BaseModel):
    question: str
    category: Optional[str] = None
    n_results: Optional[int] = 3
    use_gemini: Optional[bool] = False

class Source(BaseModel):
    category: str
    question: str
    similarity: float

class QueryResponse(BaseModel):
    query: str
    answer: str
    sources: List[Source]
    generation_time: float
    model_used: str

# Health check endpoint
@app.get("/")
def read_root():
    return {
        "status": "online",
        "message": "RUPP RAG API",
        "endpoints": {
            "/query": "POST - Ask a question",
            "/categories": "GET - List all categories",
            "/health": "GET - Check system health"
        }
    }

# Query endpoint
@app.post("/query", response_model=QueryResponse)
async def query_endpoint(request: QueryRequest):
    try:
        # Run RAG query
        result = rag_query(
            user_query=request.question,
            category=request.category,
            n_results=request.n_results,
            use_gemini=request.use_gemini,
            max_tokens=256
        )

        return QueryResponse(**result)

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Get categories endpoint
@app.get("/categories")
def get_categories():
    categories = sorted(set(doc['metadata']['category'] for doc in documents))
    return {
        "categories": categories,
        "total": len(categories)
    }

# Health check
@app.get("/health")
def health_check():
    return {
        "status": "healthy",
        "model": "Llama-3.1-8B",
        "documents": len(documents),
        "gpu_available": torch.cuda.is_available()
    }

print("‚úÖ FastAPI app configured!")

‚úÖ FastAPI app configured!


In [74]:
# ============================================================================
# CELL 19: Start Server & Get Public URL  (COLAB SAFE VERSION)
# ============================================================================
from getpass import getpass
import threading
import nest_asyncio
import uvicorn
from pyngrok import ngrok

# Patch the running event loop so uvicorn works in Colab
nest_asyncio.apply()

# Get ngrok auth token (free: https://dashboard.ngrok.com/get-started/your-authtoken)
print("üîë Get your FREE ngrok token from: https://dashboard.ngrok.com/get-started/your-authtoken")
ngrok_token = getpass("Enter ngrok auth token: ")
ngrok.set_auth_token(ngrok_token)

# Kill previous ngrok if exists
!pkill ngrok || echo "No existing ngrok process."

# Start ngrok tunnel
public_url = ngrok.connect(8000)
print(f"\n{'='*70}")
print(f"üåê PUBLIC API URL: {public_url}")
print(f"{'='*70}")
print("\nüìù Copy this URL for your Next.js app!\n")

print("üß™ Test endpoints:")
print(f"   ‚Ä¢ Health: {public_url}/health")
print(f"   ‚Ä¢ Categories: {public_url}/categories")
print(f"   ‚Ä¢ Query: {public_url}/query  (POST)")
print("\n‚ö†Ô∏è  Keep this cell running! Server will stop if you interrupt it.")
print("="*70)


# ------------------------------
# Run Uvicorn in a background thread
# ------------------------------
def start_server():
    uvicorn.run(app, host="0.0.0.0", port=8000)

server_thread = threading.Thread(target=start_server, daemon=True)
server_thread.start()


üîë Get your FREE ngrok token from: https://dashboard.ngrok.com/get-started/your-authtoken
Enter ngrok auth token: ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑





üåê PUBLIC API URL: NgrokTunnel: "https://grouseless-nonphysically-craig.ngrok-free.dev" -> "http://localhost:8000"

üìù Copy this URL for your Next.js app!

üß™ Test endpoints:
   ‚Ä¢ Health: NgrokTunnel: "https://grouseless-nonphysically-craig.ngrok-free.dev" -> "http://localhost:8000"/health
   ‚Ä¢ Categories: NgrokTunnel: "https://grouseless-nonphysically-craig.ngrok-free.dev" -> "http://localhost:8000"/categories
   ‚Ä¢ Query: NgrokTunnel: "https://grouseless-nonphysically-craig.ngrok-free.dev" -> "http://localhost:8000"/query  (POST)

‚ö†Ô∏è  Keep this cell running! Server will stop if you interrupt it.


INFO:     Started server process [1341]
INFO:     Waiting for application startup.


In [75]:
# ============================================================================
# SINGLE CELL: Complete Document Upload API Setup
# ============================================================================

# Install dependencies
print("üì¶ Installing document processing libraries...")
!pip install -q PyPDF2 python-docx
print("‚úÖ Libraries installed!")

# Import required modules
import PyPDF2
import docx
from datetime import datetime
from typing import List, Dict, Optional
from fastapi import File, UploadFile, Form
from fastapi.responses import JSONResponse
import os

# Document Processor Class
class DocumentProcessor:
    @staticmethod
    def extract_from_pdf(file_path: str) -> str:
        with open(file_path, 'rb') as f:
            pdf_reader = PyPDF2.PdfReader(f)
            return "\n\n".join([page.extract_text() for page in pdf_reader.pages]).strip()

    @staticmethod
    def extract_from_docx(file_path: str) -> str:
        doc = docx.Document(file_path)
        return "\n\n".join([para.text for para in doc.paragraphs if para.text.strip()])

    @staticmethod
    def extract_from_txt(file_path: str) -> str:
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()

    @staticmethod
    def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
        words = text.split()
        chunks = []
        for i in range(0, len(words), chunk_size - overlap):
            chunk = ' '.join(words[i:i + chunk_size])
            if chunk.strip():
                chunks.append(chunk)
        return chunks

    @staticmethod
    def process_document(file_path: str, category: str = "general") -> List[Dict]:
        # Extract text based on file type
        if file_path.endswith('.pdf'):
            text = DocumentProcessor.extract_from_pdf(file_path)
        elif file_path.endswith('.docx'):
            text = DocumentProcessor.extract_from_docx(file_path)
        elif file_path.endswith('.txt'):
            text = DocumentProcessor.extract_from_txt(file_path)
        else:
            return []

        if not text:
            return []

        # Chunk text
        chunks = DocumentProcessor.chunk_text(text)

        # Create documents
        documents = []
        for i, chunk in enumerate(chunks):
            documents.append({
                'text': chunk,
                'metadata': {
                    'category': category,
                    'source_file': os.path.basename(file_path),
                    'chunk_index': i,
                    'total_chunks': len(chunks),
                    'upload_date': datetime.now().isoformat()
                }
            })

        return documents

# Vector Store Manager
class VectorStoreManager:
    def __init__(self, collection, embedder):
        self.collection = collection
        self.embedder = embedder

    def add_documents(self, documents: List[Dict], category: str = "general"):
        existing_count = self.collection.count()
        new_docs = []

        for i, doc in enumerate(documents):
            doc['id'] = f"doc_{existing_count + i}"
            if 'category' not in doc['metadata']:
                doc['metadata']['category'] = category
            new_docs.append(doc)

        texts = [doc['text'] for doc in new_docs]
        embeddings = self.embedder.encode(texts, normalize_embeddings=True, show_progress_bar=True)

        self.collection.add(
            embeddings=embeddings.tolist(),
            documents=texts,
            metadatas=[doc['metadata'] for doc in new_docs],
            ids=[doc['id'] for doc in new_docs]
        )

        return len(new_docs)

    def get_stats(self):
        total_docs = self.collection.count()
        all_docs = self.collection.get()

        categories = {}
        sources = {}

        if all_docs and all_docs['metadatas']:
            for meta in all_docs['metadatas']:
                cat = meta.get('category', 'unknown')
                src = meta.get('source_file', 'unknown')
                categories[cat] = categories.get(cat, 0) + 1
                sources[src] = sources.get(src, 0) + 1

        return {
            'total_documents': total_docs,
            'categories': categories,
            'sources': sources,
            'last_update': datetime.now().isoformat()
        }

    def delete_by_category(self, category: str):
        self.collection.delete(where={'category': category})

# Initialize manager
vector_manager = VectorStoreManager(collection, embedder)

# Add upload endpoint to existing FastAPI app
@app.post("/upload")
async def upload_document(
    file: UploadFile = File(...),
    category: str = Form("general")
):
    """Upload and process PDF, DOCX, or TXT document"""
    try:
        # Validate file type
        allowed_extensions = ['.pdf', '.docx', '.txt']
        file_ext = os.path.splitext(file.filename)[1].lower()

        if file_ext not in allowed_extensions:
            return JSONResponse(
                status_code=400,
                content={
                    "success": False,
                    "error": f"Unsupported file type. Allowed: {', '.join(allowed_extensions)}"
                }
            )

        # Save uploaded file
        file_path = f"/tmp/{file.filename}"
        with open(file_path, "wb") as f:
            content = await file.read()
            f.write(content)

        # Process document
        docs = DocumentProcessor.process_document(file_path, category)

        if not docs:
            return JSONResponse(
                status_code=400,
                content={"success": False, "error": "Failed to extract text from document"}
            )

        # Add to vector store
        chunks_added = vector_manager.add_documents(docs, category)

        # Clean up temp file
        os.remove(file_path)

        return {
            "success": True,
            "filename": file.filename,
            "chunks_created": chunks_added,
            "category": category,
            "message": f"Successfully processed {chunks_added} chunks"
        }

    except Exception as e:
        return JSONResponse(
            status_code=500,
            content={"success": False, "error": str(e)}
        )

@app.get("/stats")
def get_stats():
    """Get vector store statistics"""
    return vector_manager.get_stats()

@app.delete("/documents/{category}")
def delete_category(category: str):
    """Delete all documents in a category"""
    try:
        vector_manager.delete_by_category(category)
        return {"success": True, "deleted_category": category}
    except Exception as e:
        return JSONResponse(status_code=500, content={"success": False, "error": str(e)})

print("‚úÖ Document Upload API ready!")
print("üì§ Endpoints:")
print("   POST /upload - Upload PDF/DOCX/TXT")
print("   GET  /stats  - Get statistics")
print("   DELETE /documents/{category} - Delete category")

INFO:     Application startup complete.
ERROR:    [Errno 98] error while attempting to bind on address ('0.0.0.0', 8000): address already in use
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
