In [1]:
from flask import Flask, render_template, request, redirect, url_for, flash
import cv2
import numpy as np
import pytesseract
import re
import random
import torch
from transformers import BertTokenizer, BertForSequenceClassification, pipeline, BertForTokenClassification, BertForQuestionAnswering
from datetime import datetime
import firebase_admin
from firebase_admin import credentials, firestore
import threading

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Set up Tesseract path
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
app = Flask(__name__)

In [3]:
ak = open("HF.txt","r")
hf_ak = ak.readline()
# print(hf_ak)
# print(type(hf_ak))


In [4]:
import requests
import random
from time import sleep
from functools import lru_cache
import json

# HuggingFace Setup - REPLACE WITH YOUR ACTUAL TOKEN
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
HF_TOKEN = hf_ak  # Get from https://huggingface.co/settings/tokens
headers = {"Authorization": f"Bearer {HF_TOKEN}"}

@lru_cache(maxsize=100)
def generate_hf_questions(text, clauses_str, num_questions=3, max_retries=3):
    """Generate quiz questions using HuggingFace API with randomized answer positions"""
    # Convert clauses string back to dict for use in the function
    clauses = json.loads(clauses_str)
    
    prompt = f"""Generate {num_questions} multiple-choice legal quiz questions from this contract with these requirements:
    
1. For each question:
   - Provide one correct answer (marked with (CORRECT))
   - Provide three plausible but incorrect answers
2. Format each question exactly like:
   Q: [question text]
   A: [option 1] (CORRECT)
   B: [option 2]
   C: [option 3]
   D: [option 4]
   
Document Excerpt:
{text[:1000]}

Key Clauses:
{clauses_str}
"""
    
    for attempt in range(max_retries):
        try:
            response = requests.post(
                API_URL,
                headers=headers,
                json={
                    "inputs": prompt,
                    "parameters": {
                        "max_length": 800,
                        "temperature": 0.7,
                        "do_sample": True
                    }
                }
            )
            
            if response.status_code == 200:
                generated_text = response.json()[0]['generated_text']
                questions = parse_hf_response(generated_text)
                if questions:  # Only return if we got valid questions
                    return questions
                
            elif response.status_code == 503:  # Model loading
                print(f"Model loading, retrying... (attempt {attempt + 1})")
                sleep(5)  # Wait before retrying
                continue
                
            else:
                print(f"HF API Error: {response.text}")
                return []
                
        except Exception as e:
            print(f"HF Request Failed: {str(e)}")
            return []
    
    return []  # Return empty if all retries failed

def parse_hf_response(text):
    """Parse the generated text into quiz format with randomized answer positions"""
    questions = []
    current_question = None
    
    for line in text.split('\n'):
        line = line.strip()
        
        # New question detected
        if line.startswith('Q:'):
            if current_question:  # Save previous question if exists
                questions.append(current_question)
                
            current_question = {
                "question": line[2:].strip(),
                "options": [],
                "correct_answer": "",
                "type": "multiple_choice"
            }
        
        # Answer option detected
        elif line and line[0] in 'ABCD' and current_question:
            option_text = line[3:].strip()
            
            # Check if this is the correct answer
            if "(CORRECT)" in option_text:
                clean_option = option_text.replace("(CORRECT)", "").strip()
                current_question["correct_answer"] = clean_option
                current_question["options"].append(clean_option)
            else:
                current_question["options"].append(option_text)
    
    # Add the last question if exists
    if current_question and current_question["options"]:
        questions.append(current_question)
    
    # Randomize option order for each question
    for question in questions:
        if len(question["options"]) >= 4:
            # Get all options except correct answer
            other_options = [opt for opt in question["options"] if opt != question["correct_answer"]]
            other_options = other_options[:3]  # Ensure we only have 3 incorrect options
            
            # Combine and shuffle
            all_options = [question["correct_answer"]] + other_options
            random.shuffle(all_options)
            
            # Update question with randomized options
            question["options"] = all_options
    
    return questions

# Fallback question generator
def generate_fallback_questions(text, clauses, num_questions=3):
    """Generate simple questions if HF API fails"""
    questions = []
    clause_items = list(clauses.items())
    
    for i in range(min(num_questions, len(clause_items))):
        key, clause = clause_items[i]
        clause_content = clause.split(":")[1].strip() if ":" in clause else clause
        
        questions.append({
            "question": f"What does the '{key}' clause specify?",
            "options": [
                clause_content,
                f"The {key} clause is not specified",
                f"The {key} clause allows unlimited disclosure",
                f"The {key} clause has no time limit"
            ],
            "correct_answer": clause_content,
            "type": "multiple_choice"
        })
        
        # Shuffle options
        random.shuffle(questions[-1]["options"])
    
    return questions

# Main function to use in your application
def generate_quiz_questions(text, clauses, num_questions=3):
    """Main function to generate questions, with fallback"""
    # Convert clauses to string for caching
    clauses_str = json.dumps(clauses, sort_keys=True)
    questions = generate_hf_questions(text, clauses_str, num_questions)
    
    if not questions:  # If HF API failed
        print("Using fallback question generator")
        questions = generate_fallback_questions(text, clauses, num_questions)
    
    return questions[:num_questions]  # Ensure we don't return more than requested

# Example Usage
if __name__ == "__main__":
    sample_text = """The confidentiality clause requires parties to keep information secret for 2 years after contract termination. 
    The termination clause allows either party to terminate with 30 days notice."""
    
    sample_clauses = {
        "confidentiality": "Parties must keep confidential information secret for 2 years post-termination",
        "termination": "Either party may terminate with 30 days notice"
    }
    
    questions = generate_quiz_questions(sample_text, sample_clauses)
    
    for i, q in enumerate(questions, 1):
        print(f"\nQuestion {i}: {q['question']}")
        for j, opt in enumerate(q['options']):
            prefix = "✓" if opt == q['correct_answer'] else " "
            print(f" {prefix} {'ABCD'[j]}) {opt}")


Question 1: [question text]
 ✓ A) [option 1]
   B) [option 4]
   C) [option 2]
   D) [option 3]

Question 2: How long does the confidentiality clause require parties to keep information secret for after contract termination?
   A) Until the parties mutually agree (A)
   B) Lifetime (D)
 ✓ C) 3 years
   D) 1 year (B)

Question 3: According to the termination clause, how much notice is required for either party to terminate the contract?
   A) 180 days (D)
 ✓ B) 60 days
   C) 364 days (A)
   D) 90 days (B)


In [5]:
# Initialize models
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
ner_tokenizer = BertTokenizer.from_pretrained('dbmdz/bert-large-cased-finetuned-conll03-english')
ner_model = BertForTokenClassification.from_pretrained('dbmdz/bert-large-cased-finetuned-conll03-english')
qa_tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
qa_model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of 

In [6]:
# Initialize pipelines
ner_pipe = pipeline('ner', model=ner_model, tokenizer=ner_tokenizer)
qa_pipe = pipeline('question-answering', model=qa_model, tokenizer=qa_tokenizer)

Device set to use cpu
Device set to use cpu


In [7]:
# Initialize Firebase with proper check
if not firebase_admin._apps:
    try:
        cred = credentials.Certificate("firebase_config.json")  # Replace with your credentials path
        firebase_admin.initialize_app(cred)
        db = firestore.client()
        print("Firebase initialized successfully")
    except FileNotFoundError:
        print("Error: firebase_config.json not found. Please ensure the file exists.")
    except Exception as e:
        print(f"Error initializing Firebase: {str(e)}")
else:
    db = firestore.client()  # Get the client if app already exists

Firebase initialized successfully


In [8]:
# Define constants
# Update CLAUSES to better handle license agreement sections
CLAUSES = {
    "permitted uses": r"(Permitted Uses\..*?)(?=\\n\\s*\\n|$)",
    "use restrictions": r"(Use Restrictions\..*?)(?=\\n\\s*\\n|$)",
    "data usage": r"(Customer may (?:not )?use Data.*?)(?=\\n|$)",
    "geocoding": r"(geocod.*?)(?=\\n|$)",
    "redistribution": r"(redistribut.*?)(?=\\n|$)",
    # Keep your existing clauses too
    "termination": r"(termination.*?)(?:\\n|\\.)",
    "confidentiality": r"(confidentiality.*?)(?:\\n|\\.)",
    # ... other existing clauses
}

def extract_clauses(text):
    """Enhanced clause extraction with section headers"""
    clauses = {}
    
    # First extract section headers
    for key, pattern in CLAUSES.items():
        matches = re.finditer(pattern, text, re.IGNORECASE | re.DOTALL)
        for match in matches:
            clause_text = match.group(1).strip()
            if len(clause_text) > 20:  # Minimum length threshold
                clauses[f"{key}"] = clause_text
    
    # Additional pattern for numbered clauses
    numbered_clauses = re.finditer(r"(\\d+\\.\\s+[a-z]?\\..*?)(?=\\n\\s*\\d+\\.|$)", text, re.DOTALL)
    for i, match in enumerate(numbered_clauses, 1):
        clauses[f"clause_{i}"] = match.group(1).strip()
    
    return clauses

# Update DOCUMENT_TYPES to better handle license agreements
DOCUMENT_TYPES = {
    0: "Employment Contract",
    1: "Non-Disclosure Agreement",
    2: "Service Agreement",
    3: "Purchase Agreement",
    4: "License Agreement",  # This should match your document
    5: "Lease Agreement"
}

def classify_text_with_bert(text):
    """Enhanced classification with keyword fallback"""
    # First try BERT
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=-1).item() % len(DOCUMENT_TYPES)
    
    # Keyword fallback for license agreements
    text_lower = text.lower()
    if "permitted uses" in text_lower or "use restrictions" in text_lower:
        return 4  # License Agreement
    if "confidential" in text_lower:
        return 1  # NDA
    
    return predicted_class

ACHIEVEMENTS = {
    "first_scan": {"name": "Legal Novice", "description": "Scanned your first document", "xp": 50},
    "scan_milestone_5": {"name": "Legal Apprentice", "description": "Scanned 5 documents", "xp": 100},
    "scan_milestone_10": {"name": "Legal Expert", "description": "Scanned 10 documents", "xp": 200},
    "quiz_perfect": {"name": "Perfect Score", "description": "Got all answers correct in a quiz", "xp": 150},
    "unique_clauses_5": {"name": "Clause Hunter", "description": "Discovered 5 different legal clauses", "xp": 125},
    "all_doc_types": {"name": "Document Master", "description": "Analyzed all document types", "xp": 300}
}

In [9]:
def generate_license_quiz(text, clauses):
    """Specialized quiz generator for license agreements"""
    questions = []
    
    # Question about permitted uses
    if "permitted uses" in clauses:
        questions.append({
            "question": "What are the permitted uses of the data according to this agreement?",
            "options": [
                clauses["permitted uses"],
                "Unlimited use for any purpose",
                "Only for personal non-commercial use",
                "Only for government projects"
            ],
            "correct_answer": clauses["permitted uses"],
            "type": "multiple_choice",
            "points": 15
        })
    
    # Question about restrictions
    if "use restrictions" in clauses:
        questions.append({
            "question": "Which of these is NOT a restriction mentioned in the agreement?",
            "options": [
                "Using data for academic research",
                "Using data for real-time navigation",
                "Redistributing business listing data",
                "Caching data without authorization"
            ],
            "correct_answer": "Using data for academic research",
            "type": "multiple_choice",
            "points": 15
        })
    
    # Add more specialized questions as needed
    return questions[:5]  # Return max 5 questions

In [10]:
def classify_text_with_bert(text):
    """Classify text using BERT model"""
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=-1).item()
    return predicted_class % len(DOCUMENT_TYPES)

In [11]:
def extract_entities(text):
    """Extract named entities from text"""
    entities = ner_pipe(text)
    named_entities = []
    current_entity = ""
    current_label = ""
    current_score_sum = 0
    count = 0

    for entity in entities:
        if entity['entity'].startswith('B-') or entity['entity'].startswith('I-'):
            current_score_sum += entity['score']
            count += 1
        else:
            if current_entity and count > 0:
                named_entities.append({
                    'entity': current_entity,
                    'label': current_label,
                    'score': current_score_sum / count
                })
        
            current_entity = entity['word']
            current_label = entity['entity']
            current_score_sum = entity['score']
            count = 1

    if current_entity and count > 0:
        named_entities.append({
            'entity': current_entity,
            'label': current_label,
            'score': current_score_sum / count
        })

    return [e for e in named_entities if e['score'] > 0.8]

In [12]:
def answer_question(context, question):
    """Get answer to a question from the context"""
    result = qa_pipe(question=question, context=context)
    return result['answer']

In [13]:
def generate_advanced_quiz(text, clauses):
    """Generate an enhanced quiz based on document content and extracted clauses"""
    questions = []

    for key, clause in clauses.items():
        clause_content = clause.split(":")[1].strip() if ":" in clause else clause
    
        incorrect_options = [
            f"The {key} clause allows for immediate termination without notice",
            f"The {key} clause requires written approval from all parties involved",
            f"The {key} clause limits liability to $10,000 for each occurrence"
        ]
        
        options = [clause_content] + incorrect_options[:3]
        random.shuffle(options)
    
        questions.append({
            "type": "multiple_choice",
            "question": f"What does the '{key.title()}' clause specify in this document?",
            "options": options,
            "correct_answer": clause_content,
            "difficulty": "medium",
            "points": 10
        })

    if len(clauses) > 0:
        sampled_clauses = random.sample(list(clauses.items()), min(2, len(clauses)))
        for key, clause in sampled_clauses:
            questions.append({
                "type": "true_false",
                "question": f"This document contains a {key} clause.",
                "correct_answer": "True",
                "difficulty": "easy",
                "points": 5
            })

    if len(text) > 100:
        potential_questions = [
            "Who are the parties involved in this agreement?",
            "What is the effective date of this document?",
            "What happens if one party breaches this agreement?",
            "Is there a notice period specified in the document?"
        ]
    
        sampled_questions = random.sample(potential_questions, min(2, len(potential_questions)))
        for question in sampled_questions:
            try:
                answer = answer_question(text, question)
                if len(answer) > 2: 
                    questions.append({
                        "type": "short_answer",
                        "question": question,
                        "correct_answer": answer,
                        "difficulty": "hard",
                        "points": 15
                    })
            except Exception as e:
                print(f"Error generating question: {e}")
                continue

    random.shuffle(questions)

    return questions[:5]

In [14]:
def check_and_award_achievements(user_id):
    """Check user's progress and award any earned achievements"""
    user_ref = db.collection("users").document(user_id)
    user_doc = user_ref.get()

    if not user_doc.exists:
        return [ACHIEVEMENTS["first_scan"]]

    user_data = user_doc.to_dict()
    earned_achievements = []

    scan_count = user_data.get("scan_count", 0)
    achievements = user_data.get("achievements", [])
    unique_clauses = user_data.get("unique_clauses", set())
    if isinstance(unique_clauses, list):
        unique_clauses = set(unique_clauses)
    doc_types = user_data.get("doc_types", set())
    if isinstance(doc_types, list):
        doc_types = set(doc_types)

    if scan_count == 5 and "scan_milestone_5" not in achievements:
        earned_achievements.append(ACHIEVEMENTS["scan_milestone_5"])

    if scan_count == 10 and "scan_milestone_10" not in achievements:
        earned_achievements.append(ACHIEVEMENTS["scan_milestone_10"])

    if len(unique_clauses) >= 5 and "unique_clauses_5" not in achievements:
        earned_achievements.append(ACHIEVEMENTS["unique_clauses_5"])

    if len(doc_types) == len(DOCUMENT_TYPES) and "all_doc_types" not in achievements:
        earned_achievements.append(ACHIEVEMENTS["all_doc_types"])

    return earned_achievements

In [15]:
def save_to_firebase(text, quiz, clauses, doc_type, user_id="anonymous"):
    """Save scan data and update user stats in Firebase"""
    db.collection("scans").add({
        "user_id": user_id,
        "text": text,
        "quiz": quiz,
        "clauses": list(clauses.keys()),
        "doc_type": doc_type,
        "xp": sum(q["points"] for q in quiz),
        "timestamp": datetime.utcnow().isoformat()
    })
    
    user_ref = db.collection("users").document(user_id)
    user_doc = user_ref.get()

    if user_doc.exists:
        user_data = user_doc.to_dict()
        current_xp = user_data.get("xp", 0)
        scan_count = user_data.get("scan_count", 0) + 1
    
        unique_clauses = set(user_data.get("unique_clauses", []))
        unique_clauses.update(clauses.keys())
    
        doc_types = set(user_data.get("doc_types", []))
        doc_types.add(doc_type)
    
        achievements = user_data.get("achievements", [])
    
        user_ref.set({
            "xp": current_xp + sum(q["points"] for q in quiz),
            "scan_count": scan_count,
            "unique_clauses": list(unique_clauses),
            "doc_types": list(doc_types),
            "achievements": achievements,
            "updated_at": datetime.utcnow().isoformat()
        }, merge=True)
    else:
        user_ref.set({
            "user_id": user_id,
            "xp": sum(q["points"] for q in quiz),
            "scan_count": 1,
            "unique_clauses": list(clauses.keys()),
            "doc_types": [doc_type],
            "achievements": ["first_scan"],
            "created_at": datetime.utcnow().isoformat(),
            "updated_at": datetime.utcnow().isoformat()
        })
        
    earned_achievements = check_and_award_achievements(user_id)

    if earned_achievements:
        achievement_ids = [a["name"] for a in earned_achievements]
        achievement_xp = sum(a["xp"] for a in earned_achievements)
    
        user_doc = user_ref.get().to_dict()
        current_xp = user_doc.get("xp", 0)
        achievements = user_doc.get("achievements", [])
        achievements.extend([a["name"] for a in earned_achievements])
    
        user_ref.set({
            "xp": current_xp + achievement_xp,
            "achievements": achievements
        }, merge=True)

    return earned_achievements

In [16]:
def generate_advanced_quiz(text, clauses):
    """Generate a multiple-choice quiz based on document content and extracted clauses"""
    questions = []

    for key, clause in clauses.items():
        clause_content = clause.split(":")[1].strip() if ":" in clause else clause
    
        incorrect_options = [
            f"The {key} clause allows for immediate termination without notice",
            f"The {key} clause requires written approval from all parties involved",
            f"The {key} clause limits liability to $10,000 for each occurrence"
        ]
        
        options = [clause_content] + incorrect_options[:3]
        random.shuffle(options)
    
        questions.append({
            "type": "multiple_choice",
            "question": f"What does the '{key.title()}' clause specify in this document?",
            "options": options,
            "correct_answer": clause_content
        })

    if len(clauses) > 0:
        sampled_clauses = random.sample(list(clauses.items()), min(2, len(clauses)))
        for key, clause in sampled_clauses:
            questions.append({
                "type": "true_false",
                "question": f"This document contains a {key} clause.",
                "correct_answer": "True"
            })

    random.shuffle(questions)
    return questions[:5]  # Return max 5 questions


def save_to_firebase(text, quiz, clauses, doc_type, user_id="anonymous"):
    """Save scan data and update user stats in Firebase"""
    db.collection("scans").add({
        "user_id": user_id,
        "text": text,
        "quiz": quiz,
        "clauses": list(clauses.keys()),
        "doc_type": doc_type,
        "timestamp": datetime.utcnow().isoformat()
    })
    
    # Rest of the function remains the same, just remove any XP calculations

In [17]:
@app.route('/')
def index():
    return render_template("index.html")

In [18]:
@app.route("/scan", methods=["POST"])
def scan_document():
    file = request.files["document"]
    user_id = request.form.get("user_id", "anonymous")

    image = cv2.imdecode(np.frombuffer(file.read(), np.uint8), cv2.IMREAD_COLOR)
    text = pytesseract.image_to_string(image)
    classification_result = classify_text_with_bert(text)
    doc_type = DOCUMENT_TYPES[classification_result]
    
    extracted_clauses = {}
    for key, pattern in CLAUSES.items():
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            extracted_clauses[key] = match.group(1)
            
    quiz = generate_advanced_quiz(text, extracted_clauses)
    entities = extract_entities(text)
    sample_question = "What is the main purpose of this document?"
    answer = answer_question(text, sample_question)
    
    earned_achievements = save_to_firebase(text, quiz, extracted_clauses, doc_type, user_id)
    
    return render_template(
        "results.html",
        doc_type=doc_type,
        clauses=extracted_clauses,
        quiz=quiz,
        classification=classification_result,
        entities=entities,
        answer=answer,
        achievements=earned_achievements
    )

In [19]:
@app.route("/analyze_document", methods=["POST"])
def analyze_document():
    try:
        file = request.files["document"]
        user_id = request.form.get("user_id", "anonymous")

        if file.filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff')):
            image = cv2.imdecode(np.frombuffer(file.read(), np.uint8), cv2.IMREAD_COLOR)
            text = pytesseract.image_to_string(image)
        else:
            text = file.read().decode('utf-8', errors='ignore')

        classification_result = classify_text_with_bert(text)
        doc_type = DOCUMENT_TYPES[classification_result]
        
        extracted_clauses = extract_clauses(text)
        
        # Use specialized quiz generator for license agreements
        if doc_type == "License Agreement":
            quiz = generate_license_quiz(text, extracted_clauses)
        else:
            quiz = generate_advanced_quiz(text, extracted_clauses)
        
        entities = extract_entities(text)
        
        earned_achievements = save_to_firebase(text, quiz, extracted_clauses, doc_type, user_id)
        
        recent_scans = db.collection("scans").where("user_id", "==", user_id).order_by(
            "timestamp", direction=firestore.Query.DESCENDING
        ).limit(5).stream()

        recent_scan_data = []
        for scan in recent_scans:
            scan_data = scan.to_dict()
            scan_data["id"] = scan.id
            recent_scan_data.append(scan_data)
            
        return render_template(
            "results.html",
            doc_type=doc_type,
            clauses=extracted_clauses,
            quiz=quiz,
            entities=entities,
            user_id=user_id,
            recent_scans=recent_scan_data,
            achievements=earned_achievements
        )
    except Exception as e:
        return f"Analysis error: {str(e)}", 500

In [20]:
@app.route("/submit_quiz", methods=["POST"])
def submit_quiz():
    scan_id = request.args.get("scan_id", "")
    user_id = request.args.get("user", "anonymous")

    if not scan_id:
        flash("Quiz not found.", "error")
        return redirect(url_for("index"))

    try:
        scan_doc = db.collection("scans").document(scan_id).get()
        if not scan_doc.exists:
            flash("Quiz not found.", "error")
            return redirect(url_for("index"))

        scan_data = scan_doc.to_dict()
        quiz = scan_data.get("quiz", [])
        
        if not quiz:
            flash("No quiz questions found.", "error")
            return redirect(url_for("index"))

        score = 0
        total_questions = len(quiz)
        answers = []

        # Create a mapping of question numbers to indices
        question_map = {i+1: idx for idx, i in enumerate(range(len(quiz)))}
        
        for q_num, idx in question_map.items():
            user_answer = request.form.get(f"q{q_num}", "").strip()
            question = quiz[idx]
            correct = False
            
            if user_answer.lower() == str(question.get("correct_answer", "")).lower():
                score += 1
                correct = True
            
            answers.append({
                "question": question.get("question", ""),
                "user_answer": user_answer,
                "correct_answer": question.get("correct_answer", ""),
                "correct": correct
            })

        # Calculate percentage
        percentage = (score / total_questions) * 100 if total_questions > 0 else 0
        
        # Save quiz results (without XP)
        db.collection("quiz_results").add({
            "user_id": user_id,
            "scan_id": scan_id,
            "score": score,
            "total_questions": total_questions,
            "percentage": percentage,
            "answers": answers,
            "timestamp": datetime.utcnow().isoformat()
        })

        return render_template(
            "quiz_results.html",
            score=score,
            total_questions=total_questions,
            percentage=percentage,
            answers=answers,
            perfect_score=(score == total_questions),
            user_id=user_id
        )

    except Exception as e:
        print(f"Error submitting quiz: {str(e)}")
        flash("An error occurred while submitting your quiz.", "error")
        return redirect(url_for("index"))

In [21]:
@app.route("/leaderboard")
def leaderboard():
    users_ref = db.collection("users").order_by("xp", direction=firestore.Query.DESCENDING).limit(10)
    users = []

    for doc in users_ref.stream():
        user_data = doc.to_dict()
        user_id = user_data.get("user_id", doc.id)
        xp = user_data.get("xp", 0)
        level = xp // 100
        scan_count = user_data.get("scan_count", 0)
        achievement_count = len(user_data.get("achievements", []))
    
        users.append({
            "user_id": user_id,
            "xp": xp,
            "level": level,
            "scan_count": scan_count,
            "achievement_count": achievement_count
        })

    current_user = request.args.get("user")
    current_user_rank = None

    if current_user:
        all_users = list(db.collection("users").order_by("xp", direction=firestore.Query.DESCENDING).stream())
    
        for i, user_doc in enumerate(all_users):
            if user_doc.id == current_user or user_doc.to_dict().get("user_id") == current_user:
                current_user_rank = i + 1
                break

    return render_template(
        "leaderboard.html",
        users=users,
        current_user=current_user,
        current_user_rank=current_user_rank
    )
    
# @app.route('/take_quiz')
# def take_quiz():
#     scan_id = request.args.get('scan_id', '')
#     user_id = request.args.get('user', '')

#     try:
#         doc_ref = db.collection('scans').document(scan_id)
#         scan_doc = doc_ref.get()
    
#         if scan_doc.exists:
#             scan_data = scan_doc.to_dict()
#             quiz_data = scan_data.get('quiz', [])
#         else:
#             flash("Scan not found", "error")
#             return redirect(url_for('index'))
        
#     except Exception as e:
#         flash(f"Error retrieving quiz: {str(e)}", "error")
#         return redirect(url_for('index'))
        
#     return render_template('quiz.html', scan_id=scan_id, user_id=user_id, quiz=quiz_data)

# @app.route("/dashboard")
# def dashboard():
#     user_id = request.args.get("user", "anonymous")
#     user_ref = db.collection("users").document(user_id)
#     user_data = user_ref.get().to_dict()
#     xp = user_data["xp"] if user_data else 0
#     level = xp // 100
#     progress = xp % 100
#     return render_template("dashboard.html", xp=xp, level=level, progress=progress)

In [22]:
@app.route('/take_quiz')
def take_quiz():
    scan_id = request.args.get('scan_id')
    user_id = request.args.get('user', 'anonymous')
    
    if not scan_id:
        flash("No document specified for quiz", "error")
        return redirect(url_for('index'))

    try:
        scan_doc = db.collection('scans').document(scan_id).get()
        if not scan_doc.exists:
            flash("Document not found", "error")
            return redirect(url_for('index'))
            
        scan_data = scan_doc.to_dict()
        quiz = scan_data.get('quiz', [])
        
        if not quiz:
            flash("No quiz available for this document", "error")
            return redirect(url_for('index'))
        
        # Prepare quiz data with question numbers
        quiz_with_numbers = []
        for i, question in enumerate(quiz, 1):
            question['number'] = i
            quiz_with_numbers.append(question)
            
        return render_template('quiz.html', 
                            scan_id=scan_id, 
                            user_id=user_id, 
                            quiz=quiz_with_numbers)
        
    except Exception as e:
        print(f"Error retrieving quiz: {str(e)}")
        flash("Error loading quiz", "error")
        return redirect(url_for('index'))

In [23]:
@app.route("/dashboard")
def dashboard():
    user_id = request.args.get("user", "anonymous")
    try:
        user_ref = db.collection("users").document(user_id)
        user_data = user_ref.get().to_dict()
        xp = user_data["xp"] if user_data else 0
        level = xp // 100
        progress = xp % 100
    except Exception as e:
        print(f"Firebase error, using mock data: {e}")
        # Mock data
        xp = 150
        level = 1
        progress = 50
    
    return render_template("dashboard.html", 
                         xp=xp, 
                         level=level, 
                         progress=progress,
                         scan_count=5,  # Mock values
                         quiz_completed=3,
                         total_clauses=8,
                         doc_types_analyzed=2,
                         achievements=[],  # Empty array
                         recent_scans=[])  # Empty array

In [None]:
# Import threading to run Flask in a separate thread
import threading

def run_flask():
    app.run(debug=False, use_reloader=False)  # Disable reloader and debug for notebook use

# Start Flask in a separate thread
flask_thread = threading.Thread(target=run_flask)
flask_thread.daemon = True  # This makes the thread exit when the notebook is closed
flask_thread.start()

print("Flask server is running in the background. Access it at http://127.0.0.1:5000")

Flask server is running in the background. Access it at http://127.0.0.1:5000


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [25/Apr/2025 10:53:30] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2025 10:53:30] "GET /static/style.css HTTP/1.1" 304 -
127.0.0.1 - - [25/Apr/2025 10:53:30] "GET /favicon.ico HTTP/1.1" 404 -
  "timestamp": datetime.utcnow().isoformat()
  return query.where(field_path, op_string, value)
127.0.0.1 - - [25/Apr/2025 10:53:59] "POST /analyze_document HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2025 10:53:59] "GET /static/style.css HTTP/1.1" 304 -
127.0.0.1 - - [25/Apr/2025 10:55:24] "POST /analyze_document HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2025 10:55:24] "GET /static/style.css HTTP/1.1" 304 -
127.0.0.1 - - [25/Apr/2025 10:55:29] "GET /take_quiz?scan_id=VVXlo6FEKPAHR390ekgN&user=anonymous HTTP/1.1" 200 -
127.0.0.1 - - [25/Apr/2025 10:55:29] "GET /static/style.css HTTP/1.1" 304 -


In [25]:
# Firebase initialization with proper check
if not firebase_admin._apps:
    try:
        cred = credentials.Certificate("firebase_config.json")  # Ensure this path is correct
        firebase_admin.initialize_app(cred)
        db = firestore.client()
        print("Firebase initialized successfully")
    except Exception as e:
        print(f"Error initializing Firebase: {str(e)}")
        # Handle the error appropriately - maybe exit if Firebase is critical
else:
    print("Firebase already initialized")
    db = firestore.client()  # Still need to get the firestore client

Firebase already initialized
