# 1. Environment Setup
Install all required libraries for embeddings, vector database, LLM, and API.


In [None]:
!pip install faiss-cpu langchain langchain-community langchain-core pypdf sentence-transformers transformers==4.52.4 torch 

In [None]:
!pip install fastapi uvicorn pyngrok streamlit requests PyPDF2 -q

# 2. Models Initialization
load the required models.

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
model_name = "mistralai/Mistral-Nemo-Instruct-2407"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# 3. RAG Engine & FastAPI

In [None]:
import uvicorn
import threading
import time
import socket
import torch
import re
import io
from fastapi import FastAPI, UploadFile, File, HTTPException, Header
from pyngrok import ngrok, conf
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import PromptTemplate
from PyPDF2 import PdfReader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Configuration
NGROK_TOKEN = "ADD_YOUR_TOKEN_HERE" 
API_KEY = "secret123"

app = FastAPI()

def extract_text_from_pdf(file_stream):
    reader = PdfReader(file_stream)
    extracted_text = ""
    for page in reader.pages:
        extracted_text += page.extract_text() + "\n"
    return extracted_text

# Setting Up the Schemas
response_schemas = [
    ResponseSchema(name="quiz_title", description="A title for the study session"),
    ResponseSchema(name="questions", description="A list of 5 questions. Each must have 'question_text', 'options' (list of 4 strings), and 'correct_answer' (This MUST be the EXACT string text from the options list, not a number)."),
    ResponseSchema(name="flashcards", description="A list of 5 flashcards with 'front' and 'back'.")
]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

study_template = """
You are an expert tutor. Create a quiz and flashcards based on the text.
IMPORTANT: The 'correct_answer' field must contain the EXACT text string from the 'options' list. 
Do not use numbers or indexes for 'correct_answer'.

Respond ONLY in JSON format:
{format_instructions}

Text Content: "{extracted_text}"
"""
prompt_temp = PromptTemplate(template=study_template, input_variables=["extracted_text", "format_instructions"])

def generate_text(prompt_text):
    inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)
    # Increased max_length to accommodate both quiz and flashcards
    outputs = model.generate(**inputs, max_length=2500, do_sample=True, temperature=0.7)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def extract_json_block(text):
    pattern = r'```json\s*(.*?)\s*```'
    matches = re.findall(pattern, text, re.DOTALL)
    return f"```json\n{matches[-1]}\n```" if matches else text

@app.post("/generate_study_material")
async def generate_study_material(file: UploadFile = File(...), authorization: str = Header(None)):
    if authorization != f"Bearer {API_KEY}":
        raise HTTPException(status_code=401, detail="Unauthorized")
    
    try:
        pdf_bytes = await file.read()
        raw_text = extract_text_from_pdf(io.BytesIO(pdf_bytes))
        
        # 1. Chunking: Split text into manageable pieces
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
        chunks = text_splitter.split_text(raw_text)
        
        # 2. Vector DB: Create a temporary searchable database in memory
        vector_db = FAISS.from_texts(chunks, embeddings)
        
        # 3. Retrieval: Find the most "information-dense" chunks
        relevant_docs = vector_db.similarity_search("key concepts, definitions, and main summaries", k=4)
        context_text = "\n".join([doc.page_content for doc in relevant_docs])
        
        format_instructions = output_parser.get_format_instructions()
        # 4. Generation: Pass only the relevant chunks to Mistral
        full_prompt = prompt_temp.format(extracted_text=context_text, format_instructions=format_instructions)
        response = generate_text(full_prompt)
        
        json_text = extract_json_block(response)
        return output_parser.parse(json_text)
    except Exception as e:
        return {"error": str(e)}

def find_port():
    s = socket.socket(); s.bind(('', 0)); p = s.getsockname()[1]; s.close()
    return p

api_port = find_port()
threading.Thread(target=lambda: uvicorn.run(app, host="127.0.0.1", port=api_port), daemon=True).start()
time.sleep(5)
print(f"‚úÖ Study API ready on port {api_port}.")

# 5. Streamlit

In [None]:
streamlit_code = f"""
import streamlit as st
import requests

st.set_page_config(page_title="AI Study Material Generator", page_icon="üéì")
st.title("üéì AI Study Material Generator")

API_URL = "http://127.0.0.1:{api_port}/generate_study_material"
API_KEY = "secret123"

if "study_data" not in st.session_state:
    st.session_state.study_data = None

uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])

if st.button("Generate Study Materials"):
    if uploaded_file:
        with st.spinner("Generating..."):
            try:
                files = {{"file": (uploaded_file.name, uploaded_file.getvalue(), "application/pdf")}}
                headers = {{"Authorization": f"Bearer {{API_KEY}}"}}\n
                resp = requests.post(API_URL, files=files, headers=headers)
                if resp.status_code == 200:
                    st.session_state.study_data = resp.json()
                else:
                    st.error("API Error. Check your FastAPI console.")
            except Exception as e:
                st.error(f"Connection error: {{e}}")

if st.session_state.study_data:
    data = st.session_state.study_data
    tab1, tab2 = st.tabs(["üìù Quiz", "üóÇÔ∏è Flashcards"])
    
    with tab1:
        with st.form("quiz_form"):
            user_answers = []
            questions = data.get("questions", [])
            for i, q in enumerate(questions):
                st.write(f"**Q{{i+1}}:** {{q['question_text']}}")
                opts = [str(o) for o in q['options']]
                
                # FIX: Set index=None so no radio button is selected by default
                ans = st.radio(f"Select answer for Q{{i+1}}", opts, index=None, key=f"radio_{{i}}")
                
                user_answers.append((ans, str(q['correct_answer'])))
                st.divider()
            
            submitted = st.form_submit_button("Submit Quiz")

        if submitted:
            score = 0
            for i, (ua, ca) in enumerate(user_answers):
                if ua is None:
                    st.warning(f"Q{{i+1}}: No answer selected.")
                elif ua.strip().lower() == ca.strip().lower():
                    st.success(f"Q{{i+1}}: Correct!")
                    score += 1
                else:
                    st.error(f"Q{{i+1}}: Incorrect. You chose '{{ua}}', but the answer is '{{ca}}'")
            st.metric("Final Score", f"{{score}}/{{len(questions)}}")

    with tab2:
        for card in data.get("flashcards", []):
            with st.container(border=True):
                st.subheader(f"Front: {{card['front']}}")
                with st.expander("Reveal Back"):
                    st.write(card['back'])

    # The sidebar button has been removed from here.
"""
with open("streamlit_app.py", "w") as f:
    f.write(streamlit_code)

# 6. Deployment via Ngrok

In [None]:
from pyngrok import ngrok, conf
import subprocess
import sys

NGROK_TOKEN = "39dFzgCkux5odvLlGAoolLxkCem_7mVjpzUCBJeFcKeqp8Kku" 
conf.get_default().auth_token = NGROK_TOKEN

try:
    for t in ngrok.get_tunnels(): ngrok.disconnect(t.public_url)
    url = ngrok.connect(8501).public_url
    print(f"‚úÖ Access your Quiz Generator here: {url}")
    subprocess.Popen([sys.executable, "-m", "streamlit", "run", "streamlit_app.py", "--server.port", "8501"])
except Exception as e:
    print(f"‚ùå Error: {e}")

In [None]:
import os
os.system("pkill -f streamlit")