<a href="https://colab.research.google.com/github/Jathin4/Personalized-Quiz-Developer/blob/main/Personalized_Quiz_Developer_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install sentence-transformers faiss-cpu transformers torch pdfplumber fpdf numpy scikit-learn

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting pdfplumber
  Downloading pdfplumber-0.11.6-py3-none-any.whl.metadata (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-non

In [None]:
!python quiz_generator.py

python3: can't open file '/content/quiz_generator.py': [Errno 2] No such file or directory


In [None]:
!pip install faiss-cpu spacy sentence-transformers
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m97.5 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pdfplumber
import spacy
import logging
import os
import sys
from typing import List, Dict, Tuple
from datetime import datetime
import json
from collections import defaultdict
from transformers import pipeline
import re

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

class VectorDB:
    def __init__(self, dimension: int):
        self.dimension = dimension
        self.index = faiss.IndexFlatL2(dimension)
        self.texts = []
        self.metadata = []

    def add_vectors(self, vectors: np.ndarray, texts: List[str], metadata: List[Dict] = None):
        if metadata is None:
            metadata = [{} for _ in texts]
        self.index.add(vectors)
        self.texts.extend(texts)
        self.metadata.extend(metadata)

    def search(self, query_vector: np.ndarray, k: int = 5) -> List[Tuple[str, float, Dict]]:
        distances, indices = self.index.search(query_vector.reshape(1, -1), k)
        results = []
        for idx, distance in zip(indices[0], distances[0]):
            if idx != -1:
                results.append((self.texts[idx], float(distance), self.metadata[idx]))
        return results

class ContentAnalyzer:
    def __init__(self, nlp):
        self.nlp = nlp

    def extract_key_statements(self, text: str) -> List[Dict]:
        """Extract key factual statements and definitions from the text."""
        doc = self.nlp(text)
        statements = []

        for sent in doc.sents:
            sent_text = sent.text.strip()
            # Look for definition patterns
            if any(pattern in sent_text.lower() for pattern in [
                " is ", " are ", " refers to ", " means ", " defined as ",
                " consists of ", " contains ", " comprises ", " represents "
            ]):
                statements.append({
                    'text': sent_text,
                    'type': 'definition',
                    'keywords': self._extract_keywords(sent)
                })

            # Look for fact patterns
            elif any(token.dep_ in ['nsubj', 'dobj'] for token in sent):
                if len(sent_text.split()) >= 8:  # Ensure substantial content
                    statements.append({
                        'text': sent_text,
                        'type': 'fact',
                        'keywords': self._extract_keywords(sent)
                    })

        return statements

    def _extract_keywords(self, sent) -> List[str]:
        """Extract important keywords from a sentence."""
        keywords = []
        for token in sent:
            if (token.pos_ in ['NOUN', 'PROPN'] and
                not token.is_stop and
                len(token.text) > 2):
                keywords.append(token.text.lower())
        return list(set(keywords))

class QuizGenerator:
    def __init__(self):
        self.embedding_model = None
        self.question_generator = None
        self.nlp = None
        self.content_analyzer = None
        self.content_db = None
        self.results_db = None
        self.current_quiz_id = None
        self.content_sections = []
        self.user_score = {"correct": 0, "total": 0}

    def initialize_models(self):
        try:
            self.embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
            self.question_generator = pipeline("text2text-generation", model="t5-base")
            self.nlp = spacy.load("en_core_web_sm")
            self.content_analyzer = ContentAnalyzer(self.nlp)

            embedding_dim = self.embedding_model.get_sentence_embedding_dimension()
            self.content_db = VectorDB(embedding_dim)
            self.results_db = VectorDB(embedding_dim)

            logger.info("All models and databases initialized successfully")
        except Exception as e:
            logger.error(f"Error initializing models: {e}")
            raise

    def process_pdf(self, pdf_path: str) -> None:
        try:
            with pdfplumber.open(pdf_path) as pdf:
                current_section = {'title': 'Introduction', 'content': '', 'statements': []}

                for page in pdf.pages:
                    text = page.extract_text()
                    paragraphs = text.split('\n\n')

                    for para in paragraphs:
                        para = para.strip()
                        if not para:
                            continue

                        # Check if this is a new section heading
                        if len(para.split('\n')) == 1 and len(para.split()) <= 10:
                            if current_section['content']:
                                # Process previous section
                                self._process_section(current_section)
                                current_section = {'title': para, 'content': '', 'statements': []}
                        else:
                            current_section['content'] += para + '\n\n'

                # Process the last section
                if current_section['content']:
                    self._process_section(current_section)

                logger.info(f"Processed {len(self.content_sections)} sections from PDF")
        except Exception as e:
            logger.error(f"Error processing PDF: {e}")
            raise

    def _process_section(self, section: Dict) -> None:
        """Process a content section to extract key statements and create embeddings."""
        statements = self.content_analyzer.extract_key_statements(section['content'])
        section['statements'] = statements

        # Create embeddings for the section content
        embedding = self.embedding_model.encode([section['content']])
        self.content_db.add_vectors(
            embedding,
            [section['content']],
            [{'title': section['title'], 'statements': statements}]
        )

        self.content_sections.append(section)

    def generate_question_from_statement(self, statement: Dict, section_title: str) -> Dict:
        """Generate a question from a key statement."""
        text = statement['text']

        if statement['type'] == 'definition':
            # Create a "what is" question
            for keyword in statement['keywords']:
                if keyword in text.lower():
                    question_text = f"According to the section on {section_title}, which of the following best defines {keyword}?"
                    correct_answer = text
                    return self._create_question(question_text, correct_answer, statement)

        elif statement['type'] == 'fact':
            # Create a fact verification question
            question_text = f"Based on the material covered in {section_title}, which of the following statements is correct?"
            correct_answer = text
            return self._create_question(question_text, correct_answer, statement)

        return None

    def _create_question(self, question_text: str, correct_answer: str, statement: Dict) -> Dict:
        """Create a complete question with distractors."""
        wrong_options = self._generate_content_based_distractors(
            correct_answer,
            statement['keywords'],
            statement['type']
        )

        if len(wrong_options) == 3:
            options = [correct_answer] + wrong_options
            np.random.shuffle(options)

            return {
                "question": question_text,
                "options": options,
                "correct_answer": correct_answer,
                "topic": statement['keywords'],
                "type": statement['type']
            }

        return None

    def _generate_content_based_distractors(self, correct_answer: str, keywords: List[str], q_type: str) -> List[str]:
        """Generate distractors from related content."""
        distractors = set()

        # Find related statements from all sections
        for section in self.content_sections:
            for stmt in section['statements']:
                if (stmt['text'] != correct_answer and
                    stmt['type'] == q_type and
                    any(kw in stmt['keywords'] for kw in keywords)):
                    distractors.add(stmt['text'])

        # If we don't have enough distractors, modify some existing statements
        while len(distractors) < 3 and correct_answer:
            doc = self.nlp(correct_answer)
            modified = correct_answer

            # Replace or modify key terms
            for token in doc:
                if token.pos_ in ['NOUN', 'VERB', 'ADJ']:
                    antonyms = ['not', 'rarely', 'hardly', 'seldom'] if token.pos_ == 'VERB' else ['different', 'other', 'alternative']
                    modified = modified.replace(token.text, np.random.choice(antonyms) + ' ' + token.text)
                    if modified != correct_answer:
                        distractors.add(modified)
                        break

        return list(distractors)[:3]

    def generate_quiz(self, num_questions: int) -> List[Dict]:
        """Generate quiz questions from processed content."""
        questions = []
        self.current_quiz_id = datetime.now().strftime("%Y%m%d_%H%M%S")

        # Distribute questions across sections
        questions_per_section = max(1, num_questions // len(self.content_sections))

        for section in self.content_sections:
            section_questions = []
            statements = sorted(section['statements'],
                             key=lambda x: len(x['keywords']),
                             reverse=True)  # Prioritize content-rich statements

            for statement in statements:
                if len(section_questions) >= questions_per_section:
                    break

                question = self.generate_question_from_statement(
                    statement,
                    section['title']
                )

                if question:
                    question['id'] = f"{self.current_quiz_id}_q{len(questions)+1}"
                    question['section'] = section['title']
                    section_questions.append(question)

            questions.extend(section_questions)

            if len(questions) >= num_questions:
                break

        return questions[:num_questions]

    def conduct_quiz(self, questions: List[Dict]) -> Tuple[List[Dict], str]:
        if not questions:
            logger.warning("No questions available")
            return [], ""

        results = []
        self.user_score = {"correct": 0, "total": len(questions)}

        for idx, question in enumerate(questions, 1):
            print(f"\nQuestion {idx}/{len(questions)}: {question['question']}")
            for i, option in enumerate(question['options']):
                print(f"{chr(97 + i)}) {option}")

            while True:
                response = input("\nYour answer (a/b/c/d): ").strip().lower()
                if response in ["a", "b", "c", "d"]:
                    break
                print("Invalid input. Please enter a, b, c, or d.")

            selected_answer = question['options'][ord(response) - 97]
            is_correct = selected_answer == question['correct_answer']

            if is_correct:
                self.user_score["correct"] += 1

            # Show immediate feedback
            print(f"\n{'Correct!' if is_correct else 'Incorrect.'}")
            if not is_correct:
                print(f"The correct answer was: {question['correct_answer']}")

            # Show current score
            print(f"\nCurrent Score: {self.user_score['correct']}/{idx} "
                  f"({(self.user_score['correct']/idx*100):.1f}%)")

            results.append({
                "question_id": question['id'],
                "user_answer": selected_answer,
                "is_correct": is_correct,
                "timestamp": datetime.now().isoformat()
            })

        report = self.generate_report(questions, results)
        self.store_quiz_results(questions, results, report)

        return results, report

    def generate_report(self, questions: List[Dict], results: List[Dict]) -> str:
        score_percentage = (self.user_score["correct"] / self.user_score["total"] * 100)

        report = [
            f"=== Quiz Report (ID: {self.current_quiz_id}) ===\n",
            f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
            f"Total Questions: {self.user_score['total']}",
            f"Correct Answers: {self.user_score['correct']}",
            f"Final Score: {score_percentage:.1f}%\n",
            "Detailed Results:"
        ]

        # Group questions by section
        section_results = defaultdict(list)
        for q, r in zip(questions, results):
            section_results[q['section']].append((q, r))

        # Report results by section
        for section, section_qr in section_results.items():
            report.append(f"\n=== Section: {section} ===")
            section_correct = sum(1 for _, r in section_qr if r['is_correct'])
            section_total = len(section_qr)
            section_score = (section_correct / section_total * 100)
            report.append(f"Section Score: {section_correct}/{section_total} ({section_score:.1f}%)")

            for q, r in section_qr:
                report.append(f"\nQ: {q['question']}")
                report.append(f"Your Answer: {r['user_answer']}")
                report.append(f"Correct Answer: {q['correct_answer']}")
                report.append(f"Result: {'✓ Correct' if r['is_correct'] else '✗ Incorrect'}")

        # Add overall performance analysis
        report.append("\nPerformance by Question Type:")
        type_performance = defaultdict(lambda: {"correct": 0, "total": 0})
        for q, r in zip(questions, results):
            q_type = q.get('type', 'general')
            type_performance[q_type]["total"] += 1
            if r['is_correct']:
                type_performance[q_type]["correct"] += 1

        for q_type, stats in type_performance.items():
            percentage = (stats["correct"] / stats["total"] * 100) if stats["total"] > 0 else 0
            report.append(f"{q_type.capitalize()}: {stats['correct']}/{stats['total']} ({percentage:.1f}%)")

        return "\n".join(report)

    def store_quiz_results(self, questions: List[Dict], results: List[Dict], report: str):
        report_embedding = self.embedding_model.encode([report])

        self.results_db.add_vectors(
            report_embedding,
            [report],
            [{
                'quiz_id': self.current_quiz_id,
                'timestamp': datetime.now().isoformat(),
                'questions': questions,
                'results': results,
                'final_score': f"{(self.user_score['correct']/self.user_score['total']*100):.1f}%"
            }]
        )

        logger.info(f"Stored quiz results with ID: {self.current_quiz_id}")

def main():
    try:
        quiz_gen = QuizGenerator()
        quiz_gen.initialize_models()

        while True:
            print("\n=== PDF Quiz Generator ===")
            print("1. Load and process a PDF")
            print("2. Generate a new quiz")
            print("3. View previous quiz results")
            print("4. Exit")

            choice = input("\nEnter your choice (1-4): ").strip()

            if choice == '1':
                pdf_path = input("\nEnter the path to your PDF file: ").strip()
                if not os.path.exists(pdf_path):
                    print("Error: File not found!")
                    continue

                print("\nProcessing PDF, please wait...")
                quiz_gen.process_pdf(pdf_path)
                print(f"Successfully processed PDF with {len(quiz_gen.content_sections)} sections")

            elif choice == '2':
                if not quiz_gen.content_sections:
                    print("Please load a PDF first!")
                    continue

                while True:
                    try:
                        num_questions = int(input("\nHow many questions would you like (5-20)? "))
                        if 5 <= num_questions <= 20:
                            break
                        print("Please enter a number between 5 and 20")
                    except ValueError:
                        print("Please enter a valid number")

                print("\nGenerating quiz questions...")
                questions = quiz_gen.generate_quiz(num_questions)

                if not questions:
                    print("Failed to generate questions. Please try again.")
                    continue

                print("\nStarting quiz...\n")
                results, report = quiz_gen.conduct_quiz(questions)

                # Save report to file
                report_filename = f"quiz_report_{quiz_gen.current_quiz_id}.txt"
                with open(report_filename, 'w') as f:
                    f.write(report)
                print(f"\nReport saved to {report_filename}")

            elif choice == '3':
                if not quiz_gen.results_db or len(quiz_gen.results_db.texts) == 0:
                    print("No quiz results available!")
                    continue

                print("\nPrevious Quiz Results:")
                for i, metadata in enumerate(quiz_gen.results_db.metadata, 1):
                    print(f"\n{i}. Quiz ID: {metadata['quiz_id']}")
                    print(f"   Date: {metadata['timestamp']}")
                    print(f"   Score: {metadata['final_score']}")

                while True:
                    try:
                        selection = int(input("\nEnter the number of the quiz to view details (0 to cancel): "))
                        if selection == 0:
                            break
                        if 1 <= selection <= len(quiz_gen.results_db.texts):
                            print("\n" + quiz_gen.results_db.texts[selection - 1])
                            break
                        print("Invalid selection")
                    except ValueError:
                        print("Please enter a valid number")

            elif choice == '4':
                print("\nThank you for using PDF Quiz Generator!")
                break

            else:
                print("Invalid choice. Please try again.")

    except Exception as e:
        logger.error(f"An error occurred: {e}")
        raise

if __name__ == "__main__":
    main()

class QuizStatistics:
    """Class to analyze quiz results and provide statistical insights."""

    def __init__(self, results_db: VectorDB):
        self.results_db = results_db

    def calculate_overall_statistics(self) -> Dict:
        """Calculate overall statistics across all quizzes."""
        if not self.results_db.metadata:
            return {}

        total_questions = 0
        total_correct = 0
        scores = []
        section_performance = defaultdict(lambda: {"correct": 0, "total": 0})
        question_types = defaultdict(lambda: {"correct": 0, "total": 0})

        for metadata in self.results_db.metadata:
            questions = metadata['questions']
            results = metadata['results']

            for q, r in zip(questions, results):
                total_questions += 1
                if r['is_correct']:
                    total_correct += 1

                section_performance[q['section']]["total"] += 1
                question_types[q['type']]["total"] += 1

                if r['is_correct']:
                    section_performance[q['section']]["correct"] += 1
                    question_types[q['type']]["correct"] += 1

            score = float(metadata['final_score'].rstrip('%'))
            scores.append(score)

        stats = {
            "total_quizzes": len(self.results_db.metadata),
            "total_questions": total_questions,
            "overall_accuracy": (total_correct / total_questions * 100) if total_questions > 0 else 0,
            "average_score": sum(scores) / len(scores) if scores else 0,
            "highest_score": max(scores) if scores else 0,
            "lowest_score": min(scores) if scores else 0,
            "section_performance": {
                section: {
                    "accuracy": (stats["correct"] / stats["total"] * 100)
                    if stats["total"] > 0 else 0
                }
                for section, stats in section_performance.items()
            },
            "question_type_performance": {
                q_type: {
                    "accuracy": (stats["correct"] / stats["total"] * 100)
                    if stats["total"] > 0 else 0
                }
                for q_type, stats in question_types.items()
            }
        }

        return stats

    def generate_progress_report(self) -> str:
        """Generate a detailed progress report across all quizzes."""
        stats = self.calculate_overall_statistics()
        if not stats:
            return "No quiz data available."

        report = [
            "=== Overall Quiz Performance Report ===\n",
            f"Total Quizzes Taken: {stats['total_quizzes']}",
            f"Total Questions Answered: {stats['total_questions']}",
            f"Overall Accuracy: {stats['overall_accuracy']:.1f}%",
            f"Average Quiz Score: {stats['average_score']:.1f}%",
            f"Highest Score: {stats['highest_score']:.1f}%",
            f"Lowest Score: {stats['lowest_score']:.1f}%\n",
            "Performance by Section:",
        ]

        for section, perf in stats['section_performance'].items():
            report.append(f"- {section}: {perf['accuracy']:.1f}%")

        report.append("\nPerformance by Question Type:")
        for q_type, perf in stats['question_type_performance'].items():
            report.append(f"- {q_type.capitalize()}: {perf['accuracy']:.1f}%")

        return "\n".join(report)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Device set to use cpu



=== PDF Quiz Generator ===
1. Load and process a PDF
2. Generate a new quiz
3. View previous quiz results
4. Exit

Enter your choice (1-4): 1

Enter the path to your PDF file: /content/UNIT-4.pdf





Processing PDF, please wait...




Successfully processed PDF with 1 sections

=== PDF Quiz Generator ===
1. Load and process a PDF
2. Generate a new quiz
3. View previous quiz results
4. Exit

Enter your choice (1-4): 2

How many questions would you like (5-20)? 5

Generating quiz questions...

Starting quiz...


Question 1/5: According to the section on Introduction, which of the following best defines function?
a) CH 12 | P a ge

Blockchain Technology COURSE CODE-A7662 MODULE-04
}
function getValue() public view returns (uint) {
return counter;
}
function increment() public onlyOwner {
counter++;
emit ValueUpdated(counter);
}
receive() external payable {
emit ReceivedEther(msg.sender, msg.value);
}
Example of a Well-Structured Solidity Source File:
Solidity
// SPDX-License-Identifier: MIT
pragma solidity ^0.8.17;
import "openzeppelin-contracts/contracts/token/ERC20/ERC20.sol";
import "./Ownable.sol";
contract MyToken is ERC20, Ownable {
// State Variables
uint public constant INITIAL_SUPPLY = 1000 * 10**18;
// Events