In [None]:
import PyPDF2
import nltk
import random
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import wordnet
import re

# Download required NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

class QuestionPaperGenerator:
    def __init__(self):
        self.content = ""
        self.questions = []
        self.sentences = []
    
    def extract_text_from_pdf(self, pdf_path):
        """Extract text content from PDF file"""
        try:
            with open(pdf_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page in pdf_reader.pages:
                    self.content += page.extract_text()
            self.sentences = sent_tokenize(self.content)
            return True
        except Exception as e:
            print(f"Error reading PDF: {str(e)}")
            return False

    def generate_mcq(self, sentence):
        """Generate an MCQ from a sentence"""
        words = word_tokenize(sentence)
        tagged = nltk.pos_tag(words)
        
        # Find nouns and important words to create options
        key_words = [word for word, tag in tagged if tag.startswith(('NN', 'VB', 'JJ'))]
        
        if not key_words:
            return None
        
        answer = random.choice(key_words)
        # Create question by replacing the answer with a blank
        question = sentence.replace(answer, "_____")
        
        # Generate options
        options = [answer]
        # Try to find synonyms using WordNet
        synsets = wordnet.synsets(answer)
        if synsets:
            for syn in synsets:
                options.extend(syn.lemma_names())
        
        # If we don't have enough options, add some random words
        while len(set(options)) < 4 and key_words:
            options.append(random.choice(key_words))
        
        # Take unique options and limit to 4
        options = list(set(options))[:4]
        # Add random wrong options if needed
        while len(options) < 4:
            options.append(f"Option {len(options) + 1}")
        
        random.shuffle(options)
        correct_index = options.index(answer)
        
        return {
            'question': question,
            'options': options,
            'correct_answer': correct_index,
            'answer': answer
        }

    def generate_descriptive_question(self, sentence, marks):
        """Generate a descriptive question based on marks"""
        question_starters = {
            'short': ['Define', 'What is', 'List', 'State'],
            'medium': ['Explain', 'Describe', 'Elaborate on'],
            'long': ['Critically analyze', 'Evaluate', 'Compare and contrast']
        }
        
        if marks <= 2:
            starter = random.choice(question_starters['short'])
        elif marks <= 5:
            starter = random.choice(question_starters['medium'])
        else:
            starter = random.choice(question_starters['long'])
            
        return f"{starter} {sentence}"

    def generate_scenario_based(self, context_sentences):
        """Generate a scenario-based question"""
        if len(context_sentences) < 2:
            return None
            
        scenario = " ".join(context_sentences[:2])
        questions = [
            f"Based on the above scenario, analyze how would you handle this situation?",
            f"What are the key factors to consider in this scenario?",
            f"How would you apply the concepts learned to resolve this situation?",
            f"What would be your approach to address the challenges in this scenario?"
        ]
        
        return {
            'scenario': scenario,
            'question': random.choice(questions)
        }

    def generate_questions(self, question_config):
        """
        Generate questions based on configuration
        question_config: dict with format:
        {
            'mcq': {'marks': 1, 'count': 3},
            'descriptive': {'marks': 5, 'count': 2},
            'scenario': {'marks': 10, 'count': 1}
        }
        """
        # Generate MCQs
        if 'mcq' in question_config:
            mcq_count = question_config['mcq']['count']
            for _ in range(mcq_count):
                if self.sentences:
                    sentence = random.choice(self.sentences)
                    mcq = self.generate_mcq(sentence)
                    if mcq:
                        mcq['type'] = 'mcq'
                        mcq['marks'] = question_config['mcq']['marks']
                        self.questions.append(mcq)
        
        # Generate Descriptive Questions
        if 'descriptive' in question_config:
            desc_count = question_config['descriptive']['count']
            for _ in range(desc_count):
                if self.sentences:
                    sentence = random.choice(self.sentences)
                    question = self.generate_descriptive_question(
                        sentence,
                        question_config['descriptive']['marks']
                    )
                    self.questions.append({
                        'type': 'descriptive',
                        'question': question,
                        'marks': question_config['descriptive']['marks']
                    })
        
        # Generate Scenario-based Questions
        if 'scenario' in question_config:
            scenario_count = question_config['scenario']['count']
            for _ in range(scenario_count):
                if len(self.sentences) >= 2:
                    context_sentences = random.sample(self.sentences, 2)
                    scenario_q = self.generate_scenario_based(context_sentences)
                    if scenario_q:
                        scenario_q['type'] = 'scenario'
                        scenario_q['marks'] = question_config['scenario']['marks']
                        self.questions.append(scenario_q)

    def format_question_paper(self):
        """Format the generated questions into a proper question paper"""
        formatted_paper = "QUESTION PAPER\n"
        formatted_paper += "=" * 50 + "\n\n"
        
        total_marks = sum(q['marks'] for q in self.questions)
        formatted_paper += f"Total Marks: {total_marks}\n"
        formatted_paper += f"Time: {total_marks * 1.5} minutes\n\n"
        
        # Group questions by type
        mcqs = [q for q in self.questions if q['type'] == 'mcq']
        descriptive = [q for q in self.questions if q['type'] == 'descriptive']
        scenarios = [q for q in self.questions if q['type'] == 'scenario']
        
        # Format MCQs
        if mcqs:
            formatted_paper += "Section A: Multiple Choice Questions\n\n"
            for i, q in enumerate(mcqs, 1):
                formatted_paper += f"{i}. {q['question']} [{q['marks']} mark]\n"
                for j, option in enumerate(q['options']):
                    formatted_paper += f"   {chr(97+j)}) {option}\n"
                formatted_paper += "\n"
        
        # Format Descriptive Questions
        if descriptive:
            formatted_paper += "Section B: Descriptive Questions\n\n"
            for i, q in enumerate(descriptive, 1):
                formatted_paper += f"{i}. {q['question']} [{q['marks']} marks]\n\n"
        
        # Format Scenario-based Questions
        if scenarios:
            formatted_paper += "Section C: Scenario-based Questions\n\n"
            for i, q in enumerate(scenarios, 1):
                formatted_paper += f"{i}. Read the following scenario:\n"
                formatted_paper += f"{q['scenario']}\n\n"
                formatted_paper += f"Question: {q['question']} [{q['marks']} marks]\n\n"
        
        return formatted_paper

def generate_paper(pdf_path, question_config):
    """
    Main function to generate question paper
    """
    generator = QuestionPaperGenerator()
    if generator.extract_text_from_pdf(pdf_path):
        generator.generate_questions(question_config)
        return generator.format_question_paper()
    return None

In [None]:
pdf_path = '/Users/niyatipatel/Desktop/CC/1.pdf'
question_config = {
    'mcq': {'marks': 1, 'count': 3},
    'descriptive': {'marks': 5, 'count': 1},
    'scenario': {'marks': 5, 'count': 1}
}

question_paper = generate_paper(pdf_path, question_config)
print(question_paper)

# Meaningful 

In [None]:
import PyPDF2
import nltk
import random
import spacy
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import wordnet
from collections import defaultdict

# Download required packages
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('maxent_ne_chunker')
nltk.download('words')

class QuestionPaperGenerator:
    def __init__(self):
        self.content = ""
        self.sentences = []
        self.questions = []
        self.key_concepts = defaultdict(list)
        self.nlp = spacy.load('en_core_web_sm')
        
    def extract_text_from_pdf(self, pdf_path):
        try:
            with open(pdf_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page in pdf_reader.pages:
                    self.content += page.extract_text()
            self.sentences = sent_tokenize(self.content)
            self._extract_key_concepts()
            return True
        except Exception as e:
            print(f"Error reading PDF: {str(e)}")
            return False
    
    def _extract_key_concepts(self):
        """Extract key concepts and their related sentences"""
        doc = self.nlp(self.content)
        
        # Extract named entities and important noun phrases
        for ent in doc.ents:
            self.key_concepts[ent.label_].append({
                'text': ent.text,
                'context': ent.sent.text
            })
        
        # Extract important noun phrases and their definitions
        for sent in doc.sents:
            for chunk in sent.noun_chunks:
                if len(chunk.text.split()) > 1:  # Multi-word concepts
                    self.key_concepts['CONCEPT'].append({
                        'text': chunk.text,
                        'context': sent.text
                    })
    
    def generate_mcq(self):
        """Generate meaningful MCQ based on key concepts"""
        if not self.key_concepts:
            return None
            
        # Choose a random concept type and instance
        concept_type = random.choice(list(self.key_concepts.keys()))
        if not self.key_concepts[concept_type]:
            return None
            
        concept = random.choice(self.key_concepts[concept_type])
        context = concept['context']
        answer = concept['text']
        
        # Generate question based on concept type
        question_templates = {
            'PERSON': [
                f"Who is known for {context.replace(answer, '_____')}?",
                f"Which person {context.replace(answer, '_____')}?"
            ],
            'ORG': [
                f"Which organization {context.replace(answer, '_____')}?",
                f"What is the name of the organization that {context.replace(answer, '_____')}?"
            ],
            'CONCEPT': [
                f"Which concept describes: {context.replace(answer, '_____')}?",
                f"What term is used for {context.replace(answer, '_____')}?"
            ]
        }
        
        template = random.choice(question_templates.get(concept_type, question_templates['CONCEPT']))
        question = template if '____' in template else context.replace(answer, "_____")
        
        # Generate distractors using similar concepts and WordNet
        options = [answer]
        
        # Add similar concepts from the same category
        similar_concepts = [c['text'] for c in self.key_concepts[concept_type] if c['text'] != answer]
        if similar_concepts:
            options.extend(random.sample(similar_concepts, min(2, len(similar_concepts))))
        
        # Add WordNet synonyms if needed
        synsets = wordnet.synsets(answer)
        if synsets and len(options) < 4:
            for syn in synsets:
                options.extend([lemma.name() for lemma in syn.lemmas()])
                if len(set(options)) >= 4:
                    break
        
        # Ensure we have 4 unique options
        options = list(set(options))[:4]
        while len(options) < 4:
            options.append(f"Option {len(options) + 1}")
        
        random.shuffle(options)
        correct_index = options.index(answer)
        
        return {
            'question': question,
            'options': options,
            'correct_answer': correct_index,
            'answer': answer
        }
    
    def generate_descriptive_question(self, marks):
        """Generate meaningful descriptive questions based on content analysis"""
        if not self.key_concepts:
            return None
            
        templates = {
            'short': [
                "Define the concept of {concept} and explain its significance.",
                "What are the key characteristics of {concept}?",
                "Briefly explain how {concept} works."
            ],
            'medium': [
                "Explain the relationship between {concept1} and {concept2}. Provide examples.",
                "Describe the process of {concept} and its applications.",
                "Compare and contrast {concept1} with {concept2}."
            ],
            'long': [
                "Critically analyze the impact of {concept} on {context}. Support your answer with examples.",
                "Evaluate the significance of {concept} in relation to {context}. Discuss potential future implications.",
                "Explain the evolution of {concept} and its current relevance in {context}."
            ]
        }
        
        # Select template based on marks
        if marks <= 2:
            template = random.choice(templates['short'])
            concepts = random.sample([c['text'] for c in self.key_concepts['CONCEPT']], 1)
            question = template.format(concept=concepts[0])
        elif marks <= 5:
            template = random.choice(templates['medium'])
            concepts = random.sample([c['text'] for c in self.key_concepts['CONCEPT']], 2)
            question = template.format(concept1=concepts[0], concept2=concepts[1])
        else:
            template = random.choice(templates['long'])
            concept = random.choice([c['text'] for c in self.key_concepts['CONCEPT']])
            context = random.choice([c['text'] for c in self.key_concepts['ORG'] + self.key_concepts['CONCEPT']])
            question = template.format(concept=concept, context=context)
        
        return question
    
    def generate_scenario_based(self):
        """Generate meaningful scenario-based questions"""
        if not self.key_concepts:
            return None
        
        # Combine multiple concepts to create a scenario
        concepts = random.sample([c for c in self.key_concepts['CONCEPT']], min(2, len(self.key_concepts['CONCEPT'])))
        scenario = f"Consider a situation involving {concepts[0]['text']}. "
        scenario += concepts[0]['context'] + " "
        if len(concepts) > 1:
            scenario += f"This is related to {concepts[1]['text']}. " + concepts[1]['context']
        
        question_templates = [
            "Analyze this situation and explain how you would apply the concepts learned to address the challenges presented.",
            f"How would the principles of {concepts[0]['text']} help in resolving this scenario? Provide a detailed solution.",
            "What are the key factors to consider in this scenario and how would you address them?",
            f"Evaluate the relationship between {concepts[0]['text']} and {concepts[1]['text'] if len(concepts) > 1 else 'the given context'} in this scenario."
        ]
        
        return {
            'scenario': scenario,
            'question': random.choice(question_templates)
        }
    
    def generate_questions(self, question_config):
        """Generate questions based on configuration"""
        if 'mcq' in question_config:
            for _ in range(question_config['mcq']['count']):
                mcq = self.generate_mcq()
                if mcq:
                    mcq['type'] = 'mcq'
                    mcq['marks'] = question_config['mcq']['marks']
                    self.questions.append(mcq)
        
        if 'descriptive' in question_config:
            for _ in range(question_config['descriptive']['count']):
                question = self.generate_descriptive_question(question_config['descriptive']['marks'])
                if question:
                    self.questions.append({
                        'type': 'descriptive',
                        'question': question,
                        'marks': question_config['descriptive']['marks']
                    })
        
        if 'scenario' in question_config:
            for _ in range(question_config['scenario']['count']):
                scenario_q = self.generate_scenario_based()
                if scenario_q:
                    scenario_q['type'] = 'scenario'
                    scenario_q['marks'] = question_config['scenario']['marks']
                    self.questions.append(scenario_q)
    
    def format_question_paper(self):
        """Format the generated questions into a proper question paper"""
        formatted_paper = "QUESTION PAPER\n"
        formatted_paper += "=" * 50 + "\n\n"
        
        total_marks = sum(q['marks'] for q in self.questions)
        formatted_paper += f"Total Marks: {total_marks}\n"
        formatted_paper += f"Time: {total_marks * 1.5} minutes\n\n"
        
        mcqs = [q for q in self.questions if q['type'] == 'mcq']
        descriptive = [q for q in self.questions if q['type'] == 'descriptive']
        scenarios = [q for q in self.questions if q['type'] == 'scenario']
        
        if mcqs:
            formatted_paper += "Section A: Multiple Choice Questions\n\n"
            for i, q in enumerate(mcqs, 1):
                formatted_paper += f"{i}. {q['question']} [{q['marks']} mark]\n"
                for j, option in enumerate(q['options']):
                    formatted_paper += f"   {chr(97+j)}) {option}\n"
                formatted_paper += "\n"
        
        if descriptive:
            formatted_paper += "Section B: Descriptive Questions\n\n"
            for i, q in enumerate(descriptive, 1):
                formatted_paper += f"{i}. {q['question']} [{q['marks']} marks]\n\n"
        
        if scenarios:
            formatted_paper += "Section C: Scenario-based Questions\n\n"
            for i, q in enumerate(scenarios, 1):
                formatted_paper += f"{i}. Read the following scenario:\n"
                formatted_paper += f"{q['scenario']}\n\n"
                formatted_paper += f"Question: {q['question']} [{q['marks']} marks]\n\n"
        
        return formatted_paper

def generate_paper(pdf_path, question_config):
    generator = QuestionPaperGenerator()
    if generator.extract_text_from_pdf(pdf_path):
        generator.generate_questions(question_config)
        return generator.format_question_paper()
    return None

In [None]:

# Example usage
pdf_path = '/Users/niyatipatel/Desktop/CC/1.pdf'
question_config = {
    'mcq': {'marks': 1, 'count': 3},
    'descriptive': {'marks': 5, 'count': 2},
    'scenario': {'marks': 10, 'count': 1}
}

question_paper = generate_paper(pdf_path, question_config)
print(question_paper)

# Modifed with qood quality

In [5]:
import PyPDF2
import nltk
import spacy
import random
from nltk.tokenize import sent_tokenize
from nltk.corpus import wordnet
from collections import defaultdict

# Download required packages
#nltk.download('punkt')
#nltk.download('wordnet')
#nltk.download('averaged_perceptron_tagger')

class QuestionPaperGenerator:
    def __init__(self):
        self.content = ""
        self.sentences = []
        self.questions = []
        self.key_concepts = defaultdict(list)
        self.nlp = spacy.load('en_core_web_sm')
        
    def extract_text_from_pdf(self, pdf_path):
        try:
            with open(pdf_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page in pdf_reader.pages:
                    self.content += page.extract_text()
            self.sentences = sent_tokenize(self.content)
            self._extract_key_concepts()
            return True
        except Exception as e:
            print(f"Error reading PDF: {str(e)}")
            return False

    def _extract_key_concepts(self):
        doc = self.nlp(self.content)
        
        # Extract concepts with their definitions and examples
        for sent in doc.sents:
            concepts = []
            for chunk in sent.noun_chunks:
                if len(chunk.text.split()) > 1:
                    concepts.append({
                        'text': chunk.text,
                        'context': sent.text,
                        'related_terms': [],
                        'examples': []
                    })
            
            # Find related terms and examples
            for concept in concepts:
                doc_concept = self.nlp(concept['text'])
                for token in doc_concept:
                    # Find synonyms
                    synsets = wordnet.synsets(token.text)
                    for syn in synsets:
                        concept['related_terms'].extend(syn.lemma_names())
                    
                    # Find examples from content
                    for sent2 in doc.sents:
                        if token.text in sent2.text and sent2.text != sent.text:
                            concept['examples'].append(sent2.text)
                
                self.key_concepts['CONCEPT'].append(concept)
            

    def generate_mcq(self):
        if not self.key_concepts['CONCEPT']:
            return None
        
        concept = random.choice(self.key_concepts['CONCEPT'])
        
        # MCQ templates with appropriate distractors
        mcq_types = [
            {
                'template': "What is the best definition of {}?",
                'option_type': 'definition',
                'generate_options': lambda c: [
                    c['context'],  # Correct answer
                    *random.sample([ex for ex in c['examples'] if ex != c['context']][:3], 
                                 min(3, len([ex for ex in c['examples'] if ex != c['context']])))
                ]
            },
            {
                'template': "Which of the following best describes {}?",
                'option_type': 'characteristic',
                'generate_options': lambda c: [
                    c['context'],
                    *[self.nlp(term).text for term in c['related_terms'][:3]]
                ]
            },
            {
                'template': "In the context of {}, which statement is correct?",
                'option_type': 'application',
                'generate_options': lambda c: [
                    random.choice(c['examples']) if c['examples'] else c['context'],
                    *[s.text for s in random.sample(list(self.nlp(self.content).sents), 3)]
                ]
            }
        ]
        
        mcq_type = random.choice(mcq_types)
        question = mcq_type['template'].format(concept['text'])
        
        # Generate meaningful options
        options = mcq_type['generate_options'](concept)
        
        # Ensure we have 4 unique and meaningful options
        while len(options) < 4:
            new_option = random.choice([
                f"None of the above statements about {concept['text']} are correct",
                f"All of the above statements about {concept['text']} are correct",
                random.choice(self.sentences)
            ])
            if new_option not in options:
                options.append(new_option)
        
        options = options[:4]  # Limit to 4 options
        correct_answer = 0  # First option is always correct in our generation
        random.shuffle(options)  # Shuffle options
        correct_answer = options.index(concept['context'])  # Update correct answer after shuffle
        
        return {
            'question': question,
            'options': options,
            'correct_answer': correct_answer
        }

    def generate_descriptive_question(self, marks):
        if not self.key_concepts['CONCEPT']:
            return None
            
        # Enhanced question templates based on marks
        templates = {
            'short': [
                "Define {} and give an example.",
                "What are the key features of {}?",
                "Explain the significance of {} in brief.",
                "How does {} contribute to the field?",
                "Write short notes on {}."
            ],
            'medium': [
                "Explain the relationship between {} and {} with examples.",
                "Describe the process of {} and its applications.",
                "What are the advantages and disadvantages of {}?",
                "How does {} impact {}? Explain with examples.",
                "Analyze the role of {} in {}."
            ],
            'long': [
                "Critically evaluate the importance of {} in relation to {}. Support your answer with examples.",
                "Compare and contrast {} with {}. Provide detailed analysis.",
                "Discuss the evolution of {} and its current relevance in {}.",
                "'{}' has revolutionized '{}'. Justify this statement with examples.",
                "Analyze the challenges and opportunities associated with {} in the context of {}."
            ]
        }
        
        concepts = random.sample(self.key_concepts['CONCEPT'], min(2, len(self.key_concepts['CONCEPT'])))
        
        if marks <= 2:
            template = random.choice(templates['short'])
            question = template.format(concepts[0]['text'])
        elif marks <= 5:
            template = random.choice(templates['medium'])
            question = template.format(concepts[0]['text'], 
                                    concepts[1]['text'] if len(concepts) > 1 else "your field")
        else:
            template = random.choice(templates['long'])
            question = template.format(concepts[0]['text'], 
                                    concepts[1]['text'] if len(concepts) > 1 else "modern applications")
        
        return question

    def generate_scenario_based(self):
        if not self.key_concepts['CONCEPT']:
            return None
        
        # Create a meaningful scenario combining multiple concepts
        concepts = random.sample(self.key_concepts['CONCEPT'], 
                               min(2, len(self.key_concepts['CONCEPT'])))
        
        scenario_templates = [
            "In a recent project, a team was working with {} when they encountered challenges related to {}. ",
            "A company implementing {} found that it significantly affected their {}. ",
            "While developing a new system using {}, researchers discovered an interesting connection with {}. "
        ]
        
        question_templates = [
            "Analyze this situation and propose a solution using relevant concepts.",
            "What are the key challenges in this scenario and how would you address them?",
            "How would you apply theoretical concepts to resolve this situation?",
            "Evaluate the scenario and suggest improvements.",
            "What alternative approaches could be used in this situation?"
        ]
        
        scenario = random.choice(scenario_templates).format(
            concepts[0]['text'],
            concepts[1]['text'] if len(concepts) > 1 else "related systems"
        )
        scenario += concepts[0]['context'] + " "
        if len(concepts) > 1:
            scenario += concepts[1]['context']
        
        return {
            'scenario': scenario,
            'question': random.choice(question_templates)
        }

    def generate_questions(self, question_config):
        if 'mcq' in question_config:
            for _ in range(question_config['mcq']['count']):
                mcq = self.generate_mcq()
                if mcq:
                    mcq['type'] = 'mcq'
                    mcq['marks'] = question_config['mcq']['marks']
                    self.questions.append(mcq)
        
        if 'descriptive' in question_config:
            for _ in range(question_config['descriptive']['count']):
                question = self.generate_descriptive_question(
                    question_config['descriptive']['marks']
                )
                if question:
                    self.questions.append({
                        'type': 'descriptive',
                        'question': question,
                        'marks': question_config['descriptive']['marks']
                    })
        
        if 'scenario' in question_config:
            for _ in range(question_config['scenario']['count']):
                scenario_q = self.generate_scenario_based()
                if scenario_q:
                    scenario_q['type'] = 'scenario'
                    scenario_q['marks'] = question_config['scenario']['marks']
                    self.questions.append(scenario_q)

    def format_question_paper(self):
        formatted_paper = "QUESTION PAPER\n"
        formatted_paper += "=" * 50 + "\n\n"
        
        total_marks = sum(q['marks'] for q in self.questions)
        formatted_paper += f"Total Marks: {total_marks}\n"
        formatted_paper += f"Time: {total_marks * 1.5} minutes\n\n"
        
        mcqs = [q for q in self.questions if q['type'] == 'mcq']
        descriptive = [q for q in self.questions if q['type'] == 'descriptive']
        scenarios = [q for q in self.questions if q['type'] == 'scenario']
        
        if mcqs:
            formatted_paper += "Section A: Multiple Choice Questions\n\n"
            for i, q in enumerate(mcqs, 1):
                formatted_paper += f"{i}. {q['question']} [{q['marks']} mark]\n"
                for j, option in enumerate(q['options']):
                    formatted_paper += f"   {chr(97+j)}) {option}\n"
                formatted_paper += "\n"
        
        if descriptive:
            formatted_paper += "Section B: Descriptive Questions\n\n"
            for i, q in enumerate(descriptive, 1):
                formatted_paper += f"{i}. {q['question']} [{q['marks']} marks]\n\n"
        
        if scenarios:
            formatted_paper += "Section C: Scenario-based Questions\n\n"
            for i, q in enumerate(scenarios, 1):
                formatted_paper += f"{i}. Read the following scenario:\n"
                formatted_paper += f"{q['scenario']}\n\n"
                formatted_paper += f"Question: {q['question']} [{q['marks']} marks]\n\n"
        
        return formatted_paper

def generate_paper(pdf_path, question_config):
    generator = QuestionPaperGenerator()
    if generator.extract_text_from_pdf(pdf_path):
        generator.generate_questions(question_config)
        return generator.format_question_paper()
    return None


In [None]:
pdf_path = '/Users/niyatipatel/Desktop/CC/1.pdf'
question_config = {
    'mcq': {'marks': 1, 'count': 3},
    'descriptive': {'marks': 5, 'count': 2},
    'scenario': {'marks': 10, 'count': 1}
}

question_paper = generate_paper(pdf_path, question_config)
print(question_paper)

# PDF

In [7]:
import PyPDF2
import nltk
import spacy
import random
from nltk.tokenize import sent_tokenize
from nltk.corpus import wordnet
from collections import defaultdict
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, ListFlowable, ListItem
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_JUSTIFY, TA_LEFT, TA_CENTER
from datetime import datetime

# Download required packages
# nltk.download('punkt')
# nltk.download('wordnet')
# nltk.download('averaged_perceptron_tagger')

class QuestionPaperGenerator:
    def __init__(self):
        self.content = ""
        self.sentences = []
        self.questions = []
        self.key_concepts = defaultdict(list)
        self.nlp = spacy.load('en_core_web_sm')
    
    
        
    def extract_text_from_pdf(self, pdf_path):
        try:
            with open(pdf_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page in pdf_reader.pages:
                    self.content += page.extract_text()
            self.sentences = sent_tokenize(self.content)
            self._extract_key_concepts()
            return True
        except Exception as e:
            print(f"Error reading PDF: {str(e)}")
            return False

    def _extract_key_concepts(self):
        doc = self.nlp(self.content)
        
        # Extract concepts with their definitions and examples
        for sent in doc.sents:
            concepts = []
            for chunk in sent.noun_chunks:
                if len(chunk.text.split()) > 1:
                    concepts.append({
                        'text': chunk.text,
                        'context': sent.text,
                        'related_terms': [],
                        'examples': []
                    })
            
            # Find related terms and examples
            for concept in concepts:
                doc_concept = self.nlp(concept['text'])
                for token in doc_concept:
                    # Find synonyms
                    synsets = wordnet.synsets(token.text)
                    for syn in synsets:
                        concept['related_terms'].extend(syn.lemma_names())
                    
                    # Find examples from content
                    for sent2 in doc.sents:
                        if token.text in sent2.text and sent2.text != sent.text:
                            concept['examples'].append(sent2.text)
                
                self.key_concepts['CONCEPT'].append(concept)
    

    def generate_mcq(self):
        if not self.key_concepts['CONCEPT']:
            return None
        
        concept = random.choice(self.key_concepts['CONCEPT'])
        
        # MCQ templates with appropriate distractors
        mcq_types = [
            {
                'template': "What is the best definition of {}?",
                'option_type': 'definition',
                'generate_options': lambda c: [
                    c['context'],  # Correct answer
                    *random.sample([ex for ex in c['examples'] if ex != c['context']][:3], 
                                 min(3, len([ex for ex in c['examples'] if ex != c['context']])))
                ]
            },
            {
                'template': "Which of the following best describes {}?",
                'option_type': 'characteristic',
                'generate_options': lambda c: [
                    c['context'],
                    *[self.nlp(term).text for term in c['related_terms'][:3]]
                ]
            },
            {
                'template': "In the context of {}, which statement is correct?",
                'option_type': 'application',
                'generate_options': lambda c: [
                    random.choice(c['examples']) if c['examples'] else c['context'],
                    *[s.text for s in random.sample(list(self.nlp(self.content).sents), 3)]
                ]
            }
        ]
        
        mcq_type = random.choice(mcq_types)
        question = mcq_type['template'].format(concept['text'])
        
        # Generate meaningful options
        options = mcq_type['generate_options'](concept)
        
        # Ensure we have 4 unique and meaningful options
        while len(options) < 4:
            new_option = random.choice([
                f"None of the above statements about {concept['text']} are correct",
                f"All of the above statements about {concept['text']} are correct",
                random.choice(self.sentences)
            ])
            if new_option not in options:
                options.append(new_option)
        
        options = options[:4]  # Limit to 4 options
        correct_answer = 0  # First option is always correct in our generation
        random.shuffle(options)  # Shuffle options
        correct_answer = options.index(concept['context'])  # Update correct answer after shuffle
        
        return {
            'question': question,
            'options': options,
            'correct_answer': correct_answer
        }

    def generate_descriptive_question(self, marks):
        if not self.key_concepts['CONCEPT']:
            return None
            
        # Enhanced question templates based on marks
        templates = {
            'short': [
                "Define {} and give an example.",
                "What are the key features of {}?",
                "Explain the significance of {} in brief.",
                "How does {} contribute to the field?",
                "Write short notes on {}."
            ],
            'medium': [
                "Explain the relationship between {} and {} with examples.",
                "Describe the process of {} and its applications.",
                "What are the advantages and disadvantages of {}?",
                "How does {} impact {}? Explain with examples.",
                "Analyze the role of {} in {}."
            ],
            'long': [
                "Critically evaluate the importance of {} in relation to {}. Support your answer with examples.",
                "Compare and contrast {} with {}. Provide detailed analysis.",
                "Discuss the evolution of {} and its current relevance in {}.",
                "'{}' has revolutionized '{}'. Justify this statement with examples.",
                "Analyze the challenges and opportunities associated with {} in the context of {}."
            ]
        }
        
        concepts = random.sample(self.key_concepts['CONCEPT'], min(2, len(self.key_concepts['CONCEPT'])))
        
        if marks <= 2:
            template = random.choice(templates['short'])
            question = template.format(concepts[0]['text'])
        elif marks <= 5:
            template = random.choice(templates['medium'])
            question = template.format(concepts[0]['text'], 
                                    concepts[1]['text'] if len(concepts) > 1 else "your field")
        else:
            template = random.choice(templates['long'])
            question = template.format(concepts[0]['text'], 
                                    concepts[1]['text'] if len(concepts) > 1 else "modern applications")
        
        return question

    def generate_scenario_based(self):
        if not self.key_concepts['CONCEPT']:
            return None
        
        # Create a meaningful scenario combining multiple concepts
        concepts = random.sample(self.key_concepts['CONCEPT'], 
                               min(2, len(self.key_concepts['CONCEPT'])))
        
        scenario_templates = [
            "In a recent project, a team was working with {} when they encountered challenges related to {}. ",
            "A company implementing {} found that it significantly affected their {}. ",
            "While developing a new system using {}, researchers discovered an interesting connection with {}. "
        ]
        
        question_templates = [
            "Analyze this situation and propose a solution using relevant concepts.",
            "What are the key challenges in this scenario and how would you address them?",
            "How would you apply theoretical concepts to resolve this situation?",
            "Evaluate the scenario and suggest improvements.",
            "What alternative approaches could be used in this situation?"
        ]
        
        scenario = random.choice(scenario_templates).format(
            concepts[0]['text'],
            concepts[1]['text'] if len(concepts) > 1 else "related systems"
        )
        scenario += concepts[0]['context'] + " "
        if len(concepts) > 1:
            scenario += concepts[1]['context']
        
        return {
            'scenario': scenario,
            'question': random.choice(question_templates)
        }

    def generate_questions(self, question_config):
        if 'mcq' in question_config:
            for _ in range(question_config['mcq']['count']):
                mcq = self.generate_mcq()
                if mcq:
                    mcq['type'] = 'mcq'
                    mcq['marks'] = question_config['mcq']['marks']
                    self.questions.append(mcq)
        
        if 'descriptive' in question_config:
            for _ in range(question_config['descriptive']['count']):
                question = self.generate_descriptive_question(
                    question_config['descriptive']['marks']
                )
                if question:
                    self.questions.append({
                        'type': 'descriptive',
                        'question': question,
                        'marks': question_config['descriptive']['marks']
                    })
        
        if 'scenario' in question_config:
            for _ in range(question_config['scenario']['count']):
                scenario_q = self.generate_scenario_based()
                if scenario_q:
                    scenario_q['type'] = 'scenario'
                    scenario_q['marks'] = question_config['scenario']['marks']
                    self.questions.append(scenario_q)

    def format_question_paper(self):
        formatted_paper = "QUESTION PAPER\n"
        formatted_paper += "=" * 50 + "\n\n"
        
        total_marks = sum(q['marks'] for q in self.questions)
        formatted_paper += f"Total Marks: {total_marks}\n"
        formatted_paper += f"Time: {total_marks * 1.5} minutes\n\n"
        
        mcqs = [q for q in self.questions if q['type'] == 'mcq']
        descriptive = [q for q in self.questions if q['type'] == 'descriptive']
        scenarios = [q for q in self.questions if q['type'] == 'scenario']
        
        if mcqs:
            formatted_paper += "Section A: Multiple Choice Questions\n\n"
            for i, q in enumerate(mcqs, 1):
                formatted_paper += f"{i}. {q['question']} [{q['marks']} mark]\n"
                for j, option in enumerate(q['options']):
                    formatted_paper += f"   {chr(97+j)}) {option}\n"
                formatted_paper += "\n"
        
        if descriptive:
            formatted_paper += "Section B: Descriptive Questions\n\n"
            for i, q in enumerate(descriptive, 1):
                formatted_paper += f"{i}. {q['question']} [{q['marks']} marks]\n\n"
        
        if scenarios:
            formatted_paper += "Section C: Scenario-based Questions\n\n"
            for i, q in enumerate(scenarios, 1):
                formatted_paper += f"{i}. Read the following scenario:\n"
                formatted_paper += f"{q['scenario']}\n\n"
                formatted_paper += f"Question: {q['question']} [{q['marks']} marks]\n\n"
        
        return formatted_paper
    
    def export_to_pdf(self, output_path):
        
        doc = SimpleDocTemplate(
            output_path,
            pagesize=letter,
            rightMargin=72,
            leftMargin=72,
            topMargin=72,
            bottomMargin=72
        )
        
        # Styles
        styles = getSampleStyleSheet()
        styles.add(ParagraphStyle(
            name='CustomTitle',
            parent=styles['Heading1'],
            fontSize=16,
            spaceAfter=30,
            alignment=TA_CENTER
        ))
        styles.add(ParagraphStyle(
            name='QuestionStyle',
            parent=styles['Normal'],
            fontSize=12,
            spaceAfter=12,
            leftIndent=20
        ))
        
        # Build content
        content = []
        
        # Header
        content.append(Paragraph("QUESTION PAPER", styles['CustomTitle']))
        content.append(Paragraph(f"Total Marks: {sum(q['marks'] for q in self.questions)}", styles['Normal']))
        content.append(Paragraph(f"Time: {sum(q['marks'] for q in self.questions) * 1.5} minutes", styles['Normal']))
        content.append(Spacer(1, 20))
        
        # MCQs
        mcqs = [q for q in self.questions if q['type'] == 'mcq']
        if mcqs:
            content.append(Paragraph("Section A: Multiple Choice Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(mcqs, 1):
                content.append(Paragraph(f"{i}. {q['question']} [{q['marks']} mark]", styles['QuestionStyle']))
                options = []
                for j, option in enumerate(q['options']):
                    options.append(ListItem(Paragraph(f"{chr(97+j)}) {option}", styles['Normal'])))
                content.append(ListFlowable(options, bulletType='bullet', leftIndent=50))
                content.append(Spacer(1, 12))
        
        # Descriptive Questions
        descriptive = [q for q in self.questions if q['type'] == 'descriptive']
        if descriptive:
            content.append(Paragraph("Section B: Descriptive Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(descriptive, 1):
                content.append(Paragraph(
                    f"{i}. {q['question']} [{q['marks']} marks]",
                    styles['QuestionStyle']
                ))
                content.append(Spacer(1, 12))
        
        # Scenario Questions
        scenarios = [q for q in self.questions if q['type'] == 'scenario']
        if scenarios:
            content.append(Paragraph("Section C: Scenario-based Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(scenarios, 1):
                content.append(Paragraph(f"{i}. Read the following scenario:", styles['QuestionStyle']))
                content.append(Paragraph(q['scenario'], styles['Normal']))
                content.append(Spacer(1, 6))
                content.append(Paragraph(
                    f"Question: {q['question']} [{q['marks']} marks]",
                    styles['QuestionStyle']
                ))
                content.append(Spacer(1, 12))
        
        # Build PDF
        doc.build(content)


def generate_paper(pdf_path, question_config):
    generator = QuestionPaperGenerator()
    if generator.extract_text_from_pdf(pdf_path):
        generator.generate_questions(question_config)
        return generator.format_question_paper()
    return None


# Add this function to handle PDF generation
def generate_paper_with_pdf(pdf_path, question_config, output_pdf_path):
    generator = QuestionPaperGenerator()
    if generator.extract_text_from_pdf(pdf_path):
        generator.generate_questions(question_config)
        generator.export_to_pdf(output_pdf_path)
        return True
    return False

In [None]:

# Generate question paper in PDF
pdf_path = '/Users/niyatipatel/Desktop/CC/1.pdf'
output_pdf = 'question_paper.pdf'
question_config = {
    'mcq': {'marks': 1, 'count': 3},
    'descriptive': {'marks': 5, 'count': 2},
    'scenario': {'marks': 10, 'count': 1}
}

success = generate_paper_with_pdf(pdf_path, question_config, output_pdf)
if success:
    print(f"Question paper has been generated at: {output_pdf}")

# Compleate question code

In [None]:
import PyPDF2
import nltk
import spacy
import random
from nltk.corpus import wordnet
from nltk.tokenize import sent_tokenize, word_tokenize
import re
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, ListFlowable, ListItem
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_JUSTIFY, TA_LEFT, TA_CENTER
from collections import defaultdict
from datetime import datetime

# Download required NLTK data
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

class QuestionPaperGenerator:
    def __init__(self):
        self.content = ""
        self.sentences = []
        self.questions = []
        self.key_concepts = defaultdict(list)
        self.nlp = spacy.load('en_core_web_sm')
        self.key_terms = {}
        
    def extract_text_from_pdf(self, pdf_path):
        try:
            with open(pdf_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page in pdf_reader.pages:
                    self.content += page.extract_text()
            self.sentences = sent_tokenize(self.content)
            self.doc = self.nlp(self.content)
            self._extract_key_concepts()
            self._extract_key_terms()
            return True
        except Exception as e:
            print(f"Error reading PDF: {str(e)}")
            return False

    def _extract_key_terms(self):
        """Extract key terms for MCQs"""
        for sent in self.doc.sents:
            definition_patterns = [
                r'(?P<term>[A-Z][^.]*?) (?:is|are|refers to|means) (?P<definition>[^.]*\.)',
                r'(?P<term>[A-Z][^.]*?): (?P<definition>[^.]*\.)',
                r'(?P<definition>[^.]*?) is called (?P<term>[^.]*\.)',
            ]
            
            for pattern in definition_patterns:
                matches = re.finditer(pattern, sent.text)
                for match in matches:
                    term = match.group('term').strip()
                    definition = match.group('definition').strip()
                    
                    if term not in self.key_terms:
                        self.key_terms[term] = {
                            'definition': definition,
                            'examples': [],
                            'related_terms': [],
                            'context': []
                        }
            
            for term in self.key_terms:
                if term.lower() in sent.text.lower():
                    self.key_terms[term]['context'].append(sent.text)
                    for chunk in sent.noun_chunks:
                        if chunk.text.lower() != term.lower():
                            self.key_terms[term]['related_terms'].append(chunk.text)

    def _extract_key_concepts(self):
        """Extract key concepts for descriptive and scenario questions"""
        doc = self.nlp(self.content)
        
        for ent in doc.ents:
            self.key_concepts[ent.label_].append({
                'text': ent.text,
                'context': ent.sent.text
            })
        
        for sent in doc.sents:
            for chunk in sent.noun_chunks:
                if len(chunk.text.split()) > 1:
                    self.key_concepts['CONCEPT'].append({
                        'text': chunk.text,
                        'context': sent.text
                    })

    def generate_mcq_distractors(self, correct_answer, term_info):
        """Generate meaningful distractors for MCQs"""
        distractors = set()
        
        # Use related terms
        distractors.update(term_info['related_terms'][:2])
        
        # Use WordNet
        synsets = wordnet.synsets(correct_answer)
        if synsets:
            for syn in synsets:
                distractors.update([lemma.name() for lemma in syn.lemmas()][:2])
                for hypernym in syn.hypernyms():
                    distractors.update([lemma.name() for lemma in hypernym.lemmas()][:1])
        
        # Use similar terms from document
        doc_terms = [ent.text for ent in self.doc.ents if ent.label_ in ['ORG', 'PRODUCT', 'TECH']]
        if doc_terms:
            distractors.update(random.sample(doc_terms, min(2, len(doc_terms))))
        
        # Clean distractors
        distractors = [d for d in distractors 
                      if d.lower() != correct_answer.lower() 
                      and len(d) > 2 
                      and not d.isnumeric()]
        
        if len(distractors) < 3:
            words = word_tokenize(term_info['definition'])
            key_words = [word for word, pos in nltk.pos_tag(words) 
                        if pos.startswith(('NN', 'VB', 'JJ')) 
                        and word.lower() != correct_answer.lower()]
            if key_words:
                distractors.extend(random.sample(key_words, min(3 - len(distractors), len(key_words))))
        
        return list(set(distractors))[:3]

    def generate_mcq(self):
        """Generate a single MCQ"""
        if not self.key_terms:
            return None
            
        term, term_info = random.choice(list(self.key_terms.items()))
        
        templates = [
            f"What is {term}?",
            f"Which of the following best defines {term}?",
            f"What is the correct description of {term}?",
            f"Which statement correctly explains {term}?",
            f"The term '{term}' refers to:",
        ]
        
        question = random.choice(templates)
        correct_answer = term_info['definition']
        
        distractors = self.generate_mcq_distractors(term, term_info)
        options = [correct_answer] + distractors
        random.shuffle(options)
        
        return {
            'question': question,
            'options': options,
            'correct_answer': options.index(correct_answer)
        }

    def generate_descriptive_question(self, marks):
        """Generate descriptive questions"""
        templates = {
            'short': [
                "Define {} and give an example.",
                "What are the key features of {}?",
                "Explain the significance of {} in brief.",
                "How does {} contribute to the field?",
                "Write short notes on {}."
            ],
            'medium': [
                "Explain the relationship between {} and {} with examples.",
                "Describe the process of {} and its applications.",
                "What are the advantages and disadvantages of {}?",
                "How does {} impact {}? Explain with examples.",
                "Analyze the role of {} in {}."
            ],
            'long': [
                "Critically evaluate the importance of {} in relation to {}. Support your answer with examples.",
                "Compare and contrast {} with {}. Provide detailed analysis.",
                "Discuss the evolution of {} and its current relevance in {}.",
                "'{}' has revolutionized '{}'. Justify this statement with examples.",
                "Analyze the challenges and opportunities associated with {} in the context of {}."
            ]
        }
        
        concepts = random.sample(self.key_concepts['CONCEPT'], min(2, len(self.key_concepts['CONCEPT'])))
        
        if marks <= 2:
            template = random.choice(templates['short'])
            question = template.format(concepts[0]['text'])
        elif marks <= 5:
            template = random.choice(templates['medium'])
            question = template.format(concepts[0]['text'], 
                                    concepts[1]['text'] if len(concepts) > 1 else "your field")
        else:
            template = random.choice(templates['long'])
            question = template.format(concepts[0]['text'], 
                                    concepts[1]['text'] if len(concepts) > 1 else "modern applications")
        
        return question

    def generate_scenario_based(self):
        """Generate scenario-based questions"""
        if not self.key_concepts['CONCEPT']:
            return None
        
        concepts = random.sample(self.key_concepts['CONCEPT'], 
                               min(2, len(self.key_concepts['CONCEPT'])))
        
        scenario_templates = [
            "In a recent project, a team was working with {} when they encountered challenges related to {}. ",
            "A company implementing {} found that it significantly affected their {}. ",
            "While developing a new system using {}, researchers discovered an interesting connection with {}. "
        ]
        
        question_templates = [
            "Analyze this situation and propose a solution using relevant concepts.",
            "What are the key challenges in this scenario and how would you address them?",
            "How would you apply theoretical concepts to resolve this situation?",
            "Evaluate the scenario and suggest improvements.",
            "What alternative approaches could be used in this situation?"
        ]
        
        scenario = random.choice(scenario_templates).format(
            concepts[0]['text'],
            concepts[1]['text'] if len(concepts) > 1 else "related systems"
        )
        scenario += concepts[0]['context'] + " "
        if len(concepts) > 1:
            scenario += concepts[1]['context']
        
        return {
            'scenario': scenario,
            'question': random.choice(question_templates)
        }

    def generate_questions(self, question_config):
        """Generate all types of questions"""
        if 'mcq' in question_config:
            for _ in range(question_config['mcq']['count']):
                mcq = self.generate_mcq()
                if mcq:
                    mcq['type'] = 'mcq'
                    mcq['marks'] = question_config['mcq']['marks']
                    self.questions.append(mcq)
        
        if 'descriptive' in question_config:
            for _ in range(question_config['descriptive']['count']):
                question = self.generate_descriptive_question(
                    question_config['descriptive']['marks']
                )
                if question:
                    self.questions.append({
                        'type': 'descriptive',
                        'question': question,
                        'marks': question_config['descriptive']['marks']
                    })
        
        if 'scenario' in question_config:
            for _ in range(question_config['scenario']['count']):
                scenario_q = self.generate_scenario_based()
                if scenario_q:
                    scenario_q['type'] = 'scenario'
                    scenario_q['marks'] = question_config['scenario']['marks']
                    self.questions.append(scenario_q)

    def export_to_pdf(self, output_path):
        """Export question paper to PDF"""
        doc = SimpleDocTemplate(
            output_path,
            pagesize=letter,
            rightMargin=72,
            leftMargin=72,
            topMargin=72,
            bottomMargin=72
        )
        
        styles = getSampleStyleSheet()
        styles.add(ParagraphStyle(
            name='CustomTitle',
            parent=styles['Heading1'],
            fontSize=16,
            spaceAfter=30,
            alignment=TA_CENTER
        ))
        styles.add(ParagraphStyle(
            name='QuestionStyle',
            parent=styles['Normal'],
            fontSize=12,
            spaceAfter=12,
            leftIndent=20
        ))
        
        content = []
        
        # Header
        content.append(Paragraph("QUESTION PAPER", styles['CustomTitle']))
        content.append(Paragraph(f"Date: {datetime.now().strftime('%B %d, %Y')}", styles['Normal']))
        content.append(Paragraph(f"Total Marks: {sum(q['marks'] for q in self.questions)}", styles['Normal']))
        content.append(Paragraph(f"Time: {sum(q['marks'] for q in self.questions) * 1.5} minutes", styles['Normal']))
        content.append(Spacer(1, 20))
        
        # MCQs
        mcqs = [q for q in self.questions if q['type'] == 'mcq']
        if mcqs:
            content.append(Paragraph("Section A: Multiple Choice Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(mcqs, 1):
                content.append(Paragraph(f"{i}. {q['question']} [{q['marks']} mark]", styles['QuestionStyle']))
                options = []
                for j, option in enumerate(q['options']):
                    options.append(ListItem(Paragraph(f"{chr(97+j)}) {option}", styles['Normal'])))
                content.append(ListFlowable(options, bulletType='bullet', leftIndent=50))
                content.append(Spacer(1, 12))
        
        # Descriptive Questions
        descriptive = [q for q in self.questions if q['type'] == 'descriptive']
        if descriptive:
            content.append(Paragraph("Section B: Descriptive Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(descriptive, 1):
                content.append(Paragraph(
                    f"{i}. {q['question']} [{q['marks']} marks]",
                    styles['QuestionStyle']
                ))
                content.append(Spacer(1, 12))
        
        # Scenario Questions
        scenarios = [q for q in self.questions if q['type'] == 'scenario']
        if scenarios:
            content.append(Paragraph("Section C: Scenario-based Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(scenarios, 1):
                content.append(Paragraph(f"{i}. Read the following scenario:", styles['QuestionStyle']))
                content.append(Paragraph(q['scenario'], styles['Normal']))
                content.append(Spacer(1, 6))
                content.append(Paragraph(
                    f"Question: {q['question']} [{q['marks']} marks]",
                    styles['QuestionStyle']
                ))
                content.append(Spacer(1, 12))
        
        doc.build(content)

def generate_question_paper(pdf_path, question_config, output_pdf_path):
    """Main function to generate question paper"""
    generator = QuestionPaperGenerator()
    if generator.extract_text_from_pdf(pdf_path):
        generator.generate_questions(question_config)
        generator.export_to_pdf(output_pdf_path)
        return True
    return False

In [None]:

# Generate question paper
pdf_path = '/Users/niyatipatel/Desktop/MAD/Flutter 1 and 2.pdf'
output_pdf = 'question_paper.pdf'

question_config = {
    'mcq': {'marks': 1, 'count': 20},
    'descriptive': {'marks': 5, 'count': 5},
    'scenario': {'marks': 10, 'count': 2}
}

success = generate_question_paper(pdf_path, question_config, output_pdf)
if success:
    print(f"Question paper has been generated at: {output_pdf}")

# With Answers

In [1]:
import PyPDF2
import nltk
import spacy
import random
from nltk.corpus import wordnet
from nltk.tokenize import sent_tokenize, word_tokenize
import re
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, ListFlowable, ListItem
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_JUSTIFY, TA_LEFT, TA_CENTER
from collections import defaultdict
from datetime import datetime

# Previous QuestionPaperGenerator class remains the same until the export_to_pdf method

class QuestionPaperGenerator:
    def __init__(self):
        self.content = ""
        self.sentences = []
        self.questions = []
        self.key_concepts = defaultdict(list)
        self.nlp = spacy.load('en_core_web_sm')
        self.key_terms = {}
        
    def extract_text_from_pdf(self, pdf_path):
        try:
            with open(pdf_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page in pdf_reader.pages:
                    self.content += page.extract_text()
            self.sentences = sent_tokenize(self.content)
            self.doc = self.nlp(self.content)
            self._extract_key_concepts()
            self._extract_key_terms()
            return True
        except Exception as e:
            print(f"Error reading PDF: {str(e)}")
            return False

    def _extract_key_terms(self):
        """Extract key terms for MCQs"""
        for sent in self.doc.sents:
            definition_patterns = [
                r'(?P<term>[A-Z][^.]*?) (?:is|are|refers to|means) (?P<definition>[^.]*\.)',
                r'(?P<term>[A-Z][^.]*?): (?P<definition>[^.]*\.)',
                r'(?P<definition>[^.]*?) is called (?P<term>[^.]*\.)',
            ]
            
            for pattern in definition_patterns:
                matches = re.finditer(pattern, sent.text)
                for match in matches:
                    term = match.group('term').strip()
                    definition = match.group('definition').strip()
                    
                    if term not in self.key_terms:
                        self.key_terms[term] = {
                            'definition': definition,
                            'examples': [],
                            'related_terms': [],
                            'context': []
                        }
            
            for term in self.key_terms:
                if term.lower() in sent.text.lower():
                    self.key_terms[term]['context'].append(sent.text)
                    for chunk in sent.noun_chunks:
                        if chunk.text.lower() != term.lower():
                            self.key_terms[term]['related_terms'].append(chunk.text)

    def _extract_key_concepts(self):
        """Extract key concepts for descriptive and scenario questions"""
        doc = self.nlp(self.content)
        
        for ent in doc.ents:
            self.key_concepts[ent.label_].append({
                'text': ent.text,
                'context': ent.sent.text
            })
        
        for sent in doc.sents:
            for chunk in sent.noun_chunks:
                if len(chunk.text.split()) > 1:
                    self.key_concepts['CONCEPT'].append({
                        'text': chunk.text,
                        'context': sent.text
                    })

    def generate_mcq_distractors(self, correct_answer, term_info):
        """Generate meaningful distractors for MCQs"""
        distractors = set()
        
        # Use related terms
        distractors.update(term_info['related_terms'][:2])
        
        # Use WordNet
        synsets = wordnet.synsets(correct_answer)
        if synsets:
            for syn in synsets:
                distractors.update([lemma.name() for lemma in syn.lemmas()][:2])
                for hypernym in syn.hypernyms():
                    distractors.update([lemma.name() for lemma in hypernym.lemmas()][:1])
        
        # Use similar terms from document
        doc_terms = [ent.text for ent in self.doc.ents if ent.label_ in ['ORG', 'PRODUCT', 'TECH']]
        if doc_terms:
            distractors.update(random.sample(doc_terms, min(2, len(doc_terms))))
        
        # Clean distractors
        distractors = [d for d in distractors 
                      if d.lower() != correct_answer.lower() 
                      and len(d) > 2 
                      and not d.isnumeric()]
        
        if len(distractors) < 3:
            words = word_tokenize(term_info['definition'])
            key_words = [word for word, pos in nltk.pos_tag(words) 
                        if pos.startswith(('NN', 'VB', 'JJ')) 
                        and word.lower() != correct_answer.lower()]
            if key_words:
                distractors.extend(random.sample(key_words, min(3 - len(distractors), len(key_words))))
        
        return list(set(distractors))[:3]

    def generate_mcq(self):
        """Generate a single MCQ"""
        if not self.key_terms:
            return None
            
        term, term_info = random.choice(list(self.key_terms.items()))
        
        templates = [
            f"What is {term}?",
            f"Which of the following best defines {term}?",
            f"What is the correct description of {term}?",
            f"Which statement correctly explains {term}?",
            f"The term '{term}' refers to:",
        ]
        
        question = random.choice(templates)
        correct_answer = term_info['definition']
        
        distractors = self.generate_mcq_distractors(term, term_info)
        options = [correct_answer] + distractors
        random.shuffle(options)
        
        return {
            'question': question,
            'options': options,
            'correct_answer': options.index(correct_answer)
        }

    def generate_descriptive_question(self, marks):
        """Generate descriptive questions"""
        templates = {
            'short': [
                "Define {} and give an example.",
                "What are the key features of {}?",
                "Explain the significance of {} in brief.",
                "How does {} contribute to the field?",
                "Write short notes on {}."
            ],
            'medium': [
                "Explain the relationship between {} and {} with examples.",
                "Describe the process of {} and its applications.",
                "What are the advantages and disadvantages of {}?",
                "How does {} impact {}? Explain with examples.",
                "Analyze the role of {} in {}."
            ],
            'long': [
                "Critically evaluate the importance of {} in relation to {}. Support your answer with examples.",
                "Compare and contrast {} with {}. Provide detailed analysis.",
                "Discuss the evolution of {} and its current relevance in {}.",
                "'{}' has revolutionized '{}'. Justify this statement with examples.",
                "Analyze the challenges and opportunities associated with {} in the context of {}."
            ]
        }
        
        concepts = random.sample(self.key_concepts['CONCEPT'], min(2, len(self.key_concepts['CONCEPT'])))
        
        if marks <= 2:
            template = random.choice(templates['short'])
            question = template.format(concepts[0]['text'])
        elif marks <= 5:
            template = random.choice(templates['medium'])
            question = template.format(concepts[0]['text'], 
                                    concepts[1]['text'] if len(concepts) > 1 else "your field")
        else:
            template = random.choice(templates['long'])
            question = template.format(concepts[0]['text'], 
                                    concepts[1]['text'] if len(concepts) > 1 else "modern applications")
        
        return question

    def generate_scenario_based(self):
        """Generate scenario-based questions"""
        if not self.key_concepts['CONCEPT']:
            return None
        
        concepts = random.sample(self.key_concepts['CONCEPT'], 
                               min(2, len(self.key_concepts['CONCEPT'])))
        
        scenario_templates = [
            "In a recent project, a team was working with {} when they encountered challenges related to {}. ",
            "A company implementing {} found that it significantly affected their {}. ",
            "While developing a new system using {}, researchers discovered an interesting connection with {}. "
        ]
        
        question_templates = [
            "Analyze this situation and propose a solution using relevant concepts.",
            "What are the key challenges in this scenario and how would you address them?",
            "How would you apply theoretical concepts to resolve this situation?",
            "Evaluate the scenario and suggest improvements.",
            "What alternative approaches could be used in this situation?"
        ]
        
        scenario = random.choice(scenario_templates).format(
            concepts[0]['text'],
            concepts[1]['text'] if len(concepts) > 1 else "related systems"
        )
        scenario += concepts[0]['context'] + " "
        if len(concepts) > 1:
            scenario += concepts[1]['context']
        
        return {
            'scenario': scenario,
            'question': random.choice(question_templates)
        }

    def generate_questions(self, question_config):
        """Generate all types of questions"""
        if 'mcq' in question_config:
            for _ in range(question_config['mcq']['count']):
                mcq = self.generate_mcq()
                if mcq:
                    mcq['type'] = 'mcq'
                    mcq['marks'] = question_config['mcq']['marks']
                    self.questions.append(mcq)
        
        if 'descriptive' in question_config:
            for _ in range(question_config['descriptive']['count']):
                question = self.generate_descriptive_question(
                    question_config['descriptive']['marks']
                )
                if question:
                    self.questions.append({
                        'type': 'descriptive',
                        'question': question,
                        'marks': question_config['descriptive']['marks']
                    })
        
        if 'scenario' in question_config:
            for _ in range(question_config['scenario']['count']):
                scenario_q = self.generate_scenario_based()
                if scenario_q:
                    scenario_q['type'] = 'scenario'
                    scenario_q['marks'] = question_config['scenario']['marks']
                    self.questions.append(scenario_q)

    def export_to_pdf(self, output_path):
        """Export question paper to PDF"""
        doc = SimpleDocTemplate(
            output_path,
            pagesize=letter,
            rightMargin=72,
            leftMargin=72,
            topMargin=72,
            bottomMargin=72
        )
        
        styles = getSampleStyleSheet()
        styles.add(ParagraphStyle(
            name='CustomTitle',
            parent=styles['Heading1'],
            fontSize=16,
            spaceAfter=30,
            alignment=TA_CENTER
        ))
        styles.add(ParagraphStyle(
            name='QuestionStyle',
            parent=styles['Normal'],
            fontSize=12,
            spaceAfter=12,
            leftIndent=20
        ))
        
        content = []
        
        # Header
        content.append(Paragraph("QUESTION PAPER", styles['CustomTitle']))
        content.append(Paragraph(f"Date: {datetime.now().strftime('%B %d, %Y')}", styles['Normal']))
        content.append(Paragraph(f"Total Marks: {sum(q['marks'] for q in self.questions)}", styles['Normal']))
        content.append(Paragraph(f"Time: {sum(q['marks'] for q in self.questions) * 1.5} minutes", styles['Normal']))
        content.append(Spacer(1, 20))
        
        # MCQs
        mcqs = [q for q in self.questions if q['type'] == 'mcq']
        if mcqs:
            content.append(Paragraph("Section A: Multiple Choice Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(mcqs, 1):
                content.append(Paragraph(f"{i}. {q['question']} [{q['marks']} mark]", styles['QuestionStyle']))
                options = []
                for j, option in enumerate(q['options']):
                    options.append(ListItem(Paragraph(f"{chr(97+j)}) {option}", styles['Normal'])))
                content.append(ListFlowable(options, bulletType='bullet', leftIndent=50))
                content.append(Spacer(1, 12))
        
        # Descriptive Questions
        descriptive = [q for q in self.questions if q['type'] == 'descriptive']
        if descriptive:
            content.append(Paragraph("Section B: Descriptive Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(descriptive, 1):
                content.append(Paragraph(
                    f"{i}. {q['question']} [{q['marks']} marks]",
                    styles['QuestionStyle']
                ))
                content.append(Spacer(1, 12))
        
        # Scenario Questions
        scenarios = [q for q in self.questions if q['type'] == 'scenario']
        if scenarios:
            content.append(Paragraph("Section C: Scenario-based Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(scenarios, 1):
                content.append(Paragraph(f"{i}. Read the following scenario:", styles['QuestionStyle']))
                content.append(Paragraph(q['scenario'], styles['Normal']))
                content.append(Spacer(1, 6))
                content.append(Paragraph(
                    f"Question: {q['question']} [{q['marks']} marks]",
                    styles['QuestionStyle']
                ))
                content.append(Spacer(1, 12))
        
        doc.build(content)



class EnhancedQuestionPaperGenerator(QuestionPaperGenerator):
    def __init__(self):
        super().__init__()
        self.answers = []

    def generate_mcq(self):
        mcq = super().generate_mcq()
        if mcq:
            correct_option = chr(97 + mcq['correct_answer'])
            explanation = f"The correct answer is option {correct_option}. {mcq['options'][mcq['correct_answer']]}"
            mcq['answer'] = explanation
        return mcq

    def generate_descriptive_question(self, marks):
        question = super().generate_descriptive_question(marks)
        if question:
            # Generate answer from context
            relevant_concepts = [concept for concept in self.key_concepts['CONCEPT'] 
                              if any(term in question.lower() for term in concept['text'].lower().split())]
            
            answer = "Answer:\n"
            if relevant_concepts:
                for concept in relevant_concepts[:2]:
                    answer += f"• {concept['context']}\n"
                    related_terms = [term for term in self.key_terms 
                                   if term.lower() in concept['context'].lower()]
                    for term in related_terms:
                        answer += f"• {term}: {self.key_terms[term]['definition']}\n"
            return {'question': question, 'answer': answer}
        return None

    def generate_scenario_based(self):
        scenario = super().generate_scenario_based()
        if scenario:
            # Generate solution approach
            concepts_mentioned = []
            for concept in self.key_concepts['CONCEPT']:
                if concept['text'].lower() in scenario['scenario'].lower():
                    concepts_mentioned.append(concept)
            
            answer = "Suggested Solution:\n"
            if concepts_mentioned:
                answer += "1. Analysis of the Situation:\n"
                for concept in concepts_mentioned:
                    answer += f"   • {concept['context']}\n"
                
                answer += "\n2. Proposed Solutions:\n"
                related_terms = []
                for concept in concepts_mentioned:
                    for term in self.key_terms:
                        if term.lower() in concept['context'].lower():
                            related_terms.append(term)
                
                for term in related_terms[:3]:
                    answer += f"   • Apply {term}: {self.key_terms[term]['definition']}\n"
                
            scenario['answer'] = answer
        return scenario

    def generate_questions(self, question_config):
        super().generate_questions(question_config)
        for question in self.questions:
            if 'answer' not in question:
                question['answer'] = "See detailed solution in answer key."

    def export_answer_key(self, output_path):
        """Export answer key to PDF"""
        doc = SimpleDocTemplate(
            output_path,
            pagesize=letter,
            rightMargin=72,
            leftMargin=72,
            topMargin=72,
            bottomMargin=72
        )
        
        styles = getSampleStyleSheet()
        styles.add(ParagraphStyle(
            name='CustomTitle',
            parent=styles['Heading1'],
            fontSize=16,
            spaceAfter=30,
            alignment=TA_CENTER
        ))
        styles.add(ParagraphStyle(
            name='AnswerStyle',
            parent=styles['Normal'],
            fontSize=12,
            spaceAfter=12,
            leftIndent=20
        ))
        
        content = []
        
        # Header
        content.append(Paragraph("ANSWER KEY", styles['CustomTitle']))
        content.append(Paragraph(f"Date: {datetime.now().strftime('%B %d, %Y')}", styles['Normal']))
        content.append(Spacer(1, 20))
        
        # MCQ Answers
        mcqs = [q for q in self.questions if q['type'] == 'mcq']
        if mcqs:
            content.append(Paragraph("Section A: Multiple Choice Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(mcqs, 1):
                content.append(Paragraph(
                    f"{i}. {q['question']}\n{q['answer']}", 
                    styles['AnswerStyle']
                ))
                content.append(Spacer(1, 12))
        
        # Descriptive Answers
        descriptive = [q for q in self.questions if q['type'] == 'descriptive']
        if descriptive:
            content.append(Paragraph("Section B: Descriptive Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(descriptive, 1):
                content.append(Paragraph(
                    f"{i}. {q['question']}\n\n{q['answer']}", 
                    styles['AnswerStyle']
                ))
                content.append(Spacer(1, 12))
        
        # Scenario Answers
        scenarios = [q for q in self.questions if q['type'] == 'scenario']
        if scenarios:
            content.append(Paragraph("Section C: Scenario-based Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(scenarios, 1):
                content.append(Paragraph(
                    f"{i}. Scenario: {q['scenario']}\n\nQuestion: {q['question']}\n\n{q['answer']}", 
                    styles['AnswerStyle']
                ))
                content.append(Spacer(1, 12))
        
        doc.build(content)

def generate_question_paper_with_answers(pdf_path, question_config, output_pdf_path, answer_key_path):
    """Main function to generate question paper and answer key"""
    generator = EnhancedQuestionPaperGenerator()
    if generator.extract_text_from_pdf(pdf_path):
        generator.generate_questions(question_config)
        generator.export_to_pdf(output_pdf_path)
        generator.export_answer_key(answer_key_path)
        return True
    return False

In [None]:
config = {
    'mcq': {'count': 5, 'marks': 1},
    'descriptive': {'count': 2, 'marks': 5},
    'descriptive': {'count': 1, 'marks': 3},
    'scenario': {'count': 1, 'marks': 10}
}

generate_question_paper_with_answers(
    '/Users/niyatipatel/Desktop/CC/1.pdf',
    config,
    'question_paper.pdf',
    'answer_key.pdf'
)

# Fixed

In [1]:
import PyPDF2
import nltk
import spacy
import random
from nltk.corpus import wordnet
from nltk.tokenize import sent_tokenize, word_tokenize
import re
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, ListFlowable, ListItem
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_JUSTIFY, TA_LEFT, TA_CENTER
from collections import defaultdict
from datetime import datetime

# Previous imports and base QuestionPaperGenerator class remain the same

class QuestionPaperGenerator:
    def __init__(self):
        self.content = ""
        self.sentences = []
        self.questions = []
        self.key_concepts = defaultdict(list)
        self.nlp = spacy.load('en_core_web_sm')
        self.key_terms = {}
        
    def extract_text_from_pdf(self, pdf_path):
        try:
            with open(pdf_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page in pdf_reader.pages:
                    self.content += page.extract_text()
            self.sentences = sent_tokenize(self.content)
            self.doc = self.nlp(self.content)
            self._extract_key_concepts()
            self._extract_key_terms()
            return True
        except Exception as e:
            print(f"Error reading PDF: {str(e)}")
            return False

    def _extract_key_terms(self):
        """Extract key terms for MCQs"""
        for sent in self.doc.sents:
            definition_patterns = [
                r'(?P<term>[A-Z][^.]*?) (?:is|are|refers to|means) (?P<definition>[^.]*\.)',
                r'(?P<term>[A-Z][^.]*?): (?P<definition>[^.]*\.)',
                r'(?P<definition>[^.]*?) is called (?P<term>[^.]*\.)',
            ]
            
            for pattern in definition_patterns:
                matches = re.finditer(pattern, sent.text)
                for match in matches:
                    term = match.group('term').strip()
                    definition = match.group('definition').strip()
                    
                    if term not in self.key_terms:
                        self.key_terms[term] = {
                            'definition': definition,
                            'examples': [],
                            'related_terms': [],
                            'context': []
                        }
            
            for term in self.key_terms:
                if term.lower() in sent.text.lower():
                    self.key_terms[term]['context'].append(sent.text)
                    for chunk in sent.noun_chunks:
                        if chunk.text.lower() != term.lower():
                            self.key_terms[term]['related_terms'].append(chunk.text)

    def _extract_key_concepts(self):
        """Extract key concepts for descriptive and scenario questions"""
        doc = self.nlp(self.content)
        
        for ent in doc.ents:
            self.key_concepts[ent.label_].append({
                'text': ent.text,
                'context': ent.sent.text
            })
        
        for sent in doc.sents:
            for chunk in sent.noun_chunks:
                if len(chunk.text.split()) > 1:
                    self.key_concepts['CONCEPT'].append({
                        'text': chunk.text,
                        'context': sent.text
                    })

    def generate_mcq_distractors(self, correct_answer, term_info):
        """Generate meaningful distractors for MCQs"""
        distractors = set()
        
        # Use related terms
        distractors.update(term_info['related_terms'][:2])
        
        # Use WordNet
        synsets = wordnet.synsets(correct_answer)
        if synsets:
            for syn in synsets:
                distractors.update([lemma.name() for lemma in syn.lemmas()][:2])
                for hypernym in syn.hypernyms():
                    distractors.update([lemma.name() for lemma in hypernym.lemmas()][:1])
        
        # Use similar terms from document
        doc_terms = [ent.text for ent in self.doc.ents if ent.label_ in ['ORG', 'PRODUCT', 'TECH']]
        if doc_terms:
            distractors.update(random.sample(doc_terms, min(2, len(doc_terms))))
        
        # Clean distractors
        distractors = [d for d in distractors 
                      if d.lower() != correct_answer.lower() 
                      and len(d) > 2 
                      and not d.isnumeric()]
        
        if len(distractors) < 3:
            words = word_tokenize(term_info['definition'])
            key_words = [word for word, pos in nltk.pos_tag(words) 
                        if pos.startswith(('NN', 'VB', 'JJ')) 
                        and word.lower() != correct_answer.lower()]
            if key_words:
                distractors.extend(random.sample(key_words, min(3 - len(distractors), len(key_words))))
        
        return list(set(distractors))[:3]

    def generate_mcq(self):
        """Generate a single MCQ"""
        if not self.key_terms:
            return None
            
        term, term_info = random.choice(list(self.key_terms.items()))
        
        templates = [
            f"What is {term}?",
            f"Which of the following best defines {term}?",
            f"What is the correct description of {term}?",
            f"Which statement correctly explains {term}?",
            f"The term '{term}' refers to:",
        ]
        
        question = random.choice(templates)
        correct_answer = term_info['definition']
        
        distractors = self.generate_mcq_distractors(term, term_info)
        options = [correct_answer] + distractors
        random.shuffle(options)
        
        return {
            'question': question,
            'options': options,
            'correct_answer': options.index(correct_answer)
        }

    def generate_descriptive_question(self, marks):
        """Generate descriptive questions"""
        templates = {
            'short': [
                "Define {} and give an example.",
                "What are the key features of {}?",
                "Explain the significance of {} in brief.",
                "How does {} contribute to the field?",
                "Write short notes on {}."
            ],
            'medium': [
                "Explain the relationship between {} and {} with examples.",
                "Describe the process of {} and its applications.",
                "What are the advantages and disadvantages of {}?",
                "How does {} impact {}? Explain with examples.",
                "Analyze the role of {} in {}."
            ],
            'long': [
                "Critically evaluate the importance of {} in relation to {}. Support your answer with examples.",
                "Compare and contrast {} with {}. Provide detailed analysis.",
                "Discuss the evolution of {} and its current relevance in {}.",
                "'{}' has revolutionized '{}'. Justify this statement with examples.",
                "Analyze the challenges and opportunities associated with {} in the context of {}."
            ]
        }
        
        concepts = random.sample(self.key_concepts['CONCEPT'], min(2, len(self.key_concepts['CONCEPT'])))
        
        if marks <= 2:
            template = random.choice(templates['short'])
            question = template.format(concepts[0]['text'])
        elif marks <= 5:
            template = random.choice(templates['medium'])
            question = template.format(concepts[0]['text'], 
                                    concepts[1]['text'] if len(concepts) > 1 else "your field")
        else:
            template = random.choice(templates['long'])
            question = template.format(concepts[0]['text'], 
                                    concepts[1]['text'] if len(concepts) > 1 else "modern applications")
        
        return question

    def generate_scenario_based(self):
        """Generate scenario-based questions"""
        if not self.key_concepts['CONCEPT']:
            return None
        
        concepts = random.sample(self.key_concepts['CONCEPT'], 
                               min(2, len(self.key_concepts['CONCEPT'])))
        
        scenario_templates = [
            "In a recent project, a team was working with {} when they encountered challenges related to {}. ",
            "A company implementing {} found that it significantly affected their {}. ",
            "While developing a new system using {}, researchers discovered an interesting connection with {}. "
        ]
        
        question_templates = [
            "Analyze this situation and propose a solution using relevant concepts.",
            "What are the key challenges in this scenario and how would you address them?",
            "How would you apply theoretical concepts to resolve this situation?",
            "Evaluate the scenario and suggest improvements.",
            "What alternative approaches could be used in this situation?"
        ]
        
        scenario = random.choice(scenario_templates).format(
            concepts[0]['text'],
            concepts[1]['text'] if len(concepts) > 1 else "related systems"
        )
        scenario += concepts[0]['context'] + " "
        if len(concepts) > 1:
            scenario += concepts[1]['context']
        
        return {
            'scenario': scenario,
            'question': random.choice(question_templates)
        }

    def generate_questions(self, question_config):
        """Generate all types of questions"""
        if 'mcq' in question_config:
            for _ in range(question_config['mcq']['count']):
                mcq = self.generate_mcq()
                if mcq:
                    mcq['type'] = 'mcq'
                    mcq['marks'] = question_config['mcq']['marks']
                    self.questions.append(mcq)
        
        if 'descriptive' in question_config:
            for _ in range(question_config['descriptive']['count']):
                question = self.generate_descriptive_question(
                    question_config['descriptive']['marks']
                )
                if question:
                    self.questions.append({
                        'type': 'descriptive',
                        'question': question,
                        'marks': question_config['descriptive']['marks']
                    })
        
        if 'scenario' in question_config:
            for _ in range(question_config['scenario']['count']):
                scenario_q = self.generate_scenario_based()
                if scenario_q:
                    scenario_q['type'] = 'scenario'
                    scenario_q['marks'] = question_config['scenario']['marks']
                    self.questions.append(scenario_q)

    def export_to_pdf(self, output_path):
        """Export question paper to PDF"""
        doc = SimpleDocTemplate(
            output_path,
            pagesize=letter,
            rightMargin=72,
            leftMargin=72,
            topMargin=72,
            bottomMargin=72
        )
        
        styles = getSampleStyleSheet()
        styles.add(ParagraphStyle(
            name='CustomTitle',
            parent=styles['Heading1'],
            fontSize=16,
            spaceAfter=30,
            alignment=TA_CENTER
        ))
        styles.add(ParagraphStyle(
            name='QuestionStyle',
            parent=styles['Normal'],
            fontSize=12,
            spaceAfter=12,
            leftIndent=20
        ))
        
        content = []
        
        # Header
        content.append(Paragraph("QUESTION PAPER", styles['CustomTitle']))
        content.append(Paragraph(f"Date: {datetime.now().strftime('%B %d, %Y')}", styles['Normal']))
        content.append(Paragraph(f"Total Marks: {sum(q['marks'] for q in self.questions)}", styles['Normal']))
        content.append(Paragraph(f"Time: {sum(q['marks'] for q in self.questions) * 1.5} minutes", styles['Normal']))
        content.append(Spacer(1, 20))
        
        # MCQs
        mcqs = [q for q in self.questions if q['type'] == 'mcq']
        if mcqs:
            content.append(Paragraph("Section A: Multiple Choice Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(mcqs, 1):
                content.append(Paragraph(f"{i}. {q['question']} [{q['marks']} mark]", styles['QuestionStyle']))
                options = []
                for j, option in enumerate(q['options']):
                    options.append(ListItem(Paragraph(f"{chr(97+j)}) {option}", styles['Normal'])))
                content.append(ListFlowable(options, bulletType='bullet', leftIndent=50))
                content.append(Spacer(1, 12))
        
        # Descriptive Questions
        descriptive = [q for q in self.questions if q['type'] == 'descriptive']
        if descriptive:
            content.append(Paragraph("Section B: Descriptive Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(descriptive, 1):
                content.append(Paragraph(
                    f"{i}. {q['question']} [{q['marks']} marks]",
                    styles['QuestionStyle']
                ))
                content.append(Spacer(1, 12))
        
        # Scenario Questions
        scenarios = [q for q in self.questions if q['type'] == 'scenario']
        if scenarios:
            content.append(Paragraph("Section C: Scenario-based Questions", styles['Heading2']))
            content.append(Spacer(1, 12))
            
            for i, q in enumerate(scenarios, 1):
                content.append(Paragraph(f"{i}. Read the following scenario:", styles['QuestionStyle']))
                content.append(Paragraph(q['scenario'], styles['Normal']))
                content.append(Spacer(1, 6))
                content.append(Paragraph(
                    f"Question: {q['question']} [{q['marks']} marks]",
                    styles['QuestionStyle']
                ))
                content.append(Spacer(1, 12))
        
        doc.build(content)


class EnhancedQuestionPaperGenerator(QuestionPaperGenerator):
    def __init__(self):
        super().__init__()
        self.answers = []

    def generate_mcq(self):
        mcq = super().generate_mcq()
        if mcq:
            correct_option = chr(97 + mcq['correct_answer'])
            explanation = f"The correct answer is option {correct_option}. {mcq['options'][mcq['correct_answer']]}"
            mcq['answer'] = explanation
        return mcq

    def generate_descriptive_question(self, marks):
        question = super().generate_descriptive_question(marks)
        if question:
            # Generate answer from context
            relevant_concepts = [concept for concept in self.key_concepts['CONCEPT'] 
                              if any(term in question.lower() for term in concept['text'].lower().split())]
            
            answer = ""
            if relevant_concepts:
                for concept in relevant_concepts[:2]:
                    answer += f"{concept['context']}\n\n"
                    related_terms = [term for term in self.key_terms 
                                   if term.lower() in concept['context'].lower()]
                    for term in related_terms:
                        answer += f"• {term}: {self.key_terms[term]['definition']}\n"
            
            # Return only the question for the question paper, but store both for the answer key
            return {
                'question': question,
                'complete_answer': answer if answer else "See detailed solution in answer key."
            }
        return None

    def generate_scenario_based(self):
        scenario = super().generate_scenario_based()
        if scenario:
            # Generate solution approach
            concepts_mentioned = []
            for concept in self.key_concepts['CONCEPT']:
                if concept['text'].lower() in scenario['scenario'].lower():
                    concepts_mentioned.append(concept)
            
            answer = "Suggested Solution:\n\n"
            if concepts_mentioned:
                answer += "1. Analysis of the Situation:\n"
                for concept in concepts_mentioned:
                    answer += f"   • {concept['context']}\n"
                
                answer += "\n2. Proposed Solutions:\n"
                related_terms = []
                for concept in concepts_mentioned:
                    for term in self.key_terms:
                        if term.lower() in concept['context'].lower():
                            related_terms.append(term)
                
                for term in related_terms[:3]:
                    answer += f"   • Apply {term}: {self.key_terms[term]['definition']}\n"
                
            scenario['answer'] = answer
        return scenario

    def generate_questions(self, question_config):
        """Generate all types of questions"""
        if 'mcq' in question_config:
            for _ in range(question_config['mcq']['count']):
                mcq = self.generate_mcq()
                if mcq:
                    mcq['type'] = 'mcq'
                    mcq['marks'] = question_config['mcq']['marks']
                    self.questions.append(mcq)
        
        if 'descriptive' in question_config:
            for _ in range(question_config['descriptive']['count']):
                question_data = self.generate_descriptive_question(
                    question_config['descriptive']['marks']
                )
                if question_data:
                    self.questions.append({
                        'type': 'descriptive',
                        'question': question_data['question'],
                        'answer': question_data['complete_answer'],
                        'marks': question_config['descriptive']['marks']
                    })
        
        if 'scenario' in question_config:
            for _ in range(question_config['scenario']['count']):
                scenario_q = self.generate_scenario_based()
                if scenario_q:
                    scenario_q['type'] = 'scenario'
                    scenario_q['marks'] = question_config['scenario']['marks']
                    self.questions.append(scenario_q)

    def export_answer_key(self, output_path):
        """Export answer key to PDF with improved formatting"""
        doc = SimpleDocTemplate(
            output_path,
            pagesize=letter,
            rightMargin=72,
            leftMargin=72,
            topMargin=72,
            bottomMargin=72
        )
        
        styles = getSampleStyleSheet()
        
        # Enhanced styles for better formatting
        styles.add(ParagraphStyle(
            name='CustomTitle',
            parent=styles['Heading1'],
            fontSize=16,
            spaceAfter=30,
            alignment=TA_CENTER
        ))
        
        styles.add(ParagraphStyle(
            name='SectionHeader',
            parent=styles['Heading2'],
            fontSize=14,
            spaceAfter=20,
            spaceBefore=20,
            textColor=colors.HexColor('#2E5090')
        ))
        
        styles.add(ParagraphStyle(
            name='QuestionHeader',
            parent=styles['Normal'],
            fontSize=12,
            spaceBefore=15,
            spaceAfter=10,
            textColor=colors.HexColor('#000000'),
            fontName='Helvetica-Bold'
        ))
        
        styles.add(ParagraphStyle(
            name='AnswerText',
            parent=styles['Normal'],
            fontSize=11,
            spaceAfter=15,
            leftIndent=20,
            leading=16
        ))
        
        content = []
        
        # Header
        content.append(Paragraph("ANSWER KEY", styles['CustomTitle']))
        content.append(Paragraph(f"Date: {datetime.now().strftime('%B %d, %Y')}", styles['Normal']))
        content.append(Spacer(1, 20))
        
        # MCQ Answers
        mcqs = [q for q in self.questions if q['type'] == 'mcq']
        if mcqs:
            content.append(Paragraph("Section A: Multiple Choice Questions", styles['SectionHeader']))
            
            for i, q in enumerate(mcqs, 1):
                # Question in bold
                content.append(Paragraph(
                    f"Question {i}: {q['question']}", 
                    styles['QuestionHeader']
                ))
                # Answer with proper indentation
                content.append(Paragraph(q['answer'], styles['AnswerText']))
                content.append(Spacer(1, 10))
        
        # Descriptive Answers
        descriptive = [q for q in self.questions if q['type'] == 'descriptive']
        if descriptive:
            content.append(Paragraph("Section B: Descriptive Questions", styles['SectionHeader']))
            
            for i, q in enumerate(descriptive, 1):
                # Question in bold
                content.append(Paragraph(
                    f"Question {i}: {q['question']}", 
                    styles['QuestionHeader']
                ))
                # Answer with proper indentation
                content.append(Paragraph(
                    q['answer'].replace('\n', '<br/>'),
                    styles['AnswerText']
                ))
                content.append(Spacer(1, 15))
        
        # Scenario Answers
        scenarios = [q for q in self.questions if q['type'] == 'scenario']
        if scenarios:
            content.append(Paragraph("Section C: Scenario-based Questions", styles['SectionHeader']))
            
            for i, q in enumerate(scenarios, 1):
                # Scenario and question in bold
                content.append(Paragraph(
                    f"Question {i}:", 
                    styles['QuestionHeader']
                ))
                content.append(Paragraph(
                    f"Scenario: {q['scenario']}", 
                    styles['QuestionHeader']
                ))
                content.append(Paragraph(
                    f"Question: {q['question']}", 
                    styles['QuestionHeader']
                ))
                # Answer with proper indentation
                content.append(Paragraph(
                    q['answer'].replace('\n', '<br/>'), 
                    styles['AnswerText']
                ))
                content.append(Spacer(1, 15))
        
        doc.build(content)

def generate_question_paper_with_answers(pdf_path, question_config, output_pdf_path, answer_key_path):
    """Main function to generate question paper and answer key"""
    generator = EnhancedQuestionPaperGenerator()
    if generator.extract_text_from_pdf(pdf_path):
        generator.generate_questions(question_config)
        generator.export_to_pdf(output_pdf_path)
        generator.export_answer_key(answer_key_path)
        return True
    return False

In [2]:
config = {
    'mcq': {'count': 5, 'marks': 1},
    'descriptive': {'count': 3, 'marks': 5},
    'scenario': {'count': 2, 'marks': 10}
}

generate_question_paper_with_answers(
    '/Users/niyatipatel/Desktop/CC/1.pdf',
    config,
    'question_paper.pdf',
    'answer_key.pdf'
)

True

# Updated

In [3]:
import PyPDF2
import nltk
import spacy
import random
from nltk.corpus import wordnet
from nltk.tokenize import sent_tokenize, word_tokenize
import re
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, ListFlowable, ListItem
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_JUSTIFY, TA_LEFT, TA_CENTER
from collections import defaultdict
from datetime import datetime


class QuestionPaperGenerator:
    def __init__(self):
        self.content = ""
        self.sentences = []
        self.questions = []
        self.key_concepts = defaultdict(list)
        self.nlp = spacy.load('en_core_web_sm')
        self.key_terms = {}
        
    def extract_text_from_pdf(self, pdf_path):
        try:
            with open(pdf_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page in pdf_reader.pages:
                    self.content += page.extract_text()
            self.sentences = sent_tokenize(self.content)
            self.doc = self.nlp(self.content)
            self._extract_key_concepts()
            self._extract_key_terms()
            return True
        except Exception as e:
            print(f"Error reading PDF: {str(e)}")
            return False

    def _extract_key_terms(self):
        """Extract key terms for MCQs"""
        for sent in self.doc.sents:
            definition_patterns = [
                r'(?P<term>[A-Z][^.]*?) (?:is|are|refers to|means) (?P<definition>[^.]*\.)',
                r'(?P<term>[A-Z][^.]*?): (?P<definition>[^.]*\.)',
                r'(?P<definition>[^.]*?) is called (?P<term>[^.]*\.)',
            ]
            
            for pattern in definition_patterns:
                matches = re.finditer(pattern, sent.text)
                for match in matches:
                    term = match.group('term').strip()
                    definition = match.group('definition').strip()
                    
                    if term not in self.key_terms:
                        self.key_terms[term] = {
                            'definition': definition,
                            'examples': [],
                            'related_terms': [],
                            'context': []
                        }
            
            for term in self.key_terms:
                if term.lower() in sent.text.lower():
                    self.key_terms[term]['context'].append(sent.text)
                    for chunk in sent.noun_chunks:
                        if chunk.text.lower() != term.lower():
                            self.key_terms[term]['related_terms'].append(chunk.text)

    def _extract_key_concepts(self):
        """Extract key concepts for descriptive and scenario questions"""
        doc = self.nlp(self.content)
        
        for ent in doc.ents:
            self.key_concepts[ent.label_].append({
                'text': ent.text,
                'context': ent.sent.text
            })
        
        for sent in doc.sents:
            for chunk in sent.noun_chunks:
                if len(chunk.text.split()) > 1:
                    self.key_concepts['CONCEPT'].append({
                        'text': chunk.text,
                        'context': sent.text
                    })

    def generate_mcq_distractors(self, correct_answer, term_info):
        """Generate meaningful distractors for MCQs"""
        distractors = set()
        
        # Use related terms
        distractors.update(term_info['related_terms'][:2])
        
        # Use WordNet
        synsets = wordnet.synsets(correct_answer)
        if synsets:
            for syn in synsets:
                distractors.update([lemma.name() for lemma in syn.lemmas()][:2])
                for hypernym in syn.hypernyms():
                    distractors.update([lemma.name() for lemma in hypernym.lemmas()][:1])
        
        # Use similar terms from document
        doc_terms = [ent.text for ent in self.doc.ents if ent.label_ in ['ORG', 'PRODUCT', 'TECH']]
        if doc_terms:
            distractors.update(random.sample(doc_terms, min(2, len(doc_terms))))
        
        # Clean distractors
        distractors = [d for d in distractors 
                      if d.lower() != correct_answer.lower() 
                      and len(d) > 2 
                      and not d.isnumeric()]
        
        if len(distractors) < 3:
            words = word_tokenize(term_info['definition'])
            key_words = [word for word, pos in nltk.pos_tag(words) 
                        if pos.startswith(('NN', 'VB', 'JJ')) 
                        and word.lower() != correct_answer.lower()]
            if key_words:
                distractors.extend(random.sample(key_words, min(3 - len(distractors), len(key_words))))
        
        return list(set(distractors))[:3]

    def generate_mcq(self):
        """Generate a single MCQ"""
        if not self.key_terms:
            return None
            
        term, term_info = random.choice(list(self.key_terms.items()))
        
        templates = [
            f"What is {term}?",
            f"Which of the following best defines {term}?",
            f"What is the correct description of {term}?",
            f"Which statement correctly explains {term}?",
            f"The term '{term}' refers to:",
        ]
        
        question = random.choice(templates)
        correct_answer = term_info['definition']
        
        distractors = self.generate_mcq_distractors(term, term_info)
        options = [correct_answer] + distractors
        random.shuffle(options)
        
        return {
            'question': question,
            'options': options,
            'correct_answer': options.index(correct_answer)
        }

    def generate_descriptive_question(self, marks):
        """Generate descriptive questions"""
        templates = {
            'short': [
                "Define {} and give an example.",
                "What are the key features of {}?",
                "Explain the significance of {} in brief.",
                "How does {} contribute to the field?",
                "Write short notes on {}."
            ],
            'medium': [
                "Explain the relationship between {} and {} with examples.",
                "Describe the process of {} and its applications.",
                "What are the advantages and disadvantages of {}?",
                "How does {} impact {}? Explain with examples.",
                "Analyze the role of {} in {}."
            ],
            'long': [
                "Critically evaluate the importance of {} in relation to {}. Support your answer with examples.",
                "Compare and contrast {} with {}. Provide detailed analysis.",
                "Discuss the evolution of {} and its current relevance in {}.",
                "'{}' has revolutionized '{}'. Justify this statement with examples.",
                "Analyze the challenges and opportunities associated with {} in the context of {}."
            ]
        }
        
        concepts = random.sample(self.key_concepts['CONCEPT'], min(2, len(self.key_concepts['CONCEPT'])))
        
        if marks <= 2:
            template = random.choice(templates['short'])
            question = template.format(concepts[0]['text'])
        elif marks <= 5:
            template = random.choice(templates['medium'])
            question = template.format(concepts[0]['text'], 
                                    concepts[1]['text'] if len(concepts) > 1 else "your field")
        else:
            template = random.choice(templates['long'])
            question = template.format(concepts[0]['text'], 
                                    concepts[1]['text'] if len(concepts) > 1 else "modern applications")
        
        return question

    def generate_scenario_based(self):
        """Generate scenario-based questions"""
        if not self.key_concepts['CONCEPT']:
            return None
        
        concepts = random.sample(self.key_concepts['CONCEPT'], 
                               min(2, len(self.key_concepts['CONCEPT'])))
        
        scenario_templates = [
            "In a recent project, a team was working with {} when they encountered challenges related to {}. ",
            "A company implementing {} found that it significantly affected their {}. ",
            "While developing a new system using {}, researchers discovered an interesting connection with {}. "
        ]
        
        question_templates = [
            "Analyze this situation and propose a solution using relevant concepts.",
            "What are the key challenges in this scenario and how would you address them?",
            "How would you apply theoretical concepts to resolve this situation?",
            "Evaluate the scenario and suggest improvements.",
            "What alternative approaches could be used in this situation?"
        ]
        
        scenario = random.choice(scenario_templates).format(
            concepts[0]['text'],
            concepts[1]['text'] if len(concepts) > 1 else "related systems"
        )
        scenario += concepts[0]['context'] + " "
        if len(concepts) > 1:
            scenario += concepts[1]['context']
        
        return {
            'scenario': scenario,
            'question': random.choice(question_templates)
        }

    def generate_questions(self, question_config):
        """Generate all types of questions"""
        if 'mcq' in question_config:
            for _ in range(question_config['mcq']['count']):
                mcq = self.generate_mcq()
                if mcq:
                    mcq['type'] = 'mcq'
                    mcq['marks'] = question_config['mcq']['marks']
                    self.questions.append(mcq)
        
        if 'descriptive' in question_config:
            for _ in range(question_config['descriptive']['count']):
                question = self.generate_descriptive_question(
                    question_config['descriptive']['marks']
                )
                if question:
                    self.questions.append({
                        'type': 'descriptive',
                        'question': question,
                        'marks': question_config['descriptive']['marks']
                    })
        
        if 'scenario' in question_config:
            for _ in range(question_config['scenario']['count']):
                scenario_q = self.generate_scenario_based()
                if scenario_q:
                    scenario_q['type'] = 'scenario'
                    scenario_q['marks'] = question_config['scenario']['marks']
                    self.questions.append(scenario_q)
    
    

class EnhancedQuestionPaperGenerator(QuestionPaperGenerator):
    def __init__(self):
        super().__init__()
        self.answers = []

    def generate_mcq(self):
        mcq = super().generate_mcq()
        if mcq:
            correct_option = chr(97 + mcq['correct_answer'])
            explanation = f"The correct answer is option {correct_option}. {mcq['options'][mcq['correct_answer']]}"
            mcq['answer'] = explanation
        return mcq

    def generate_descriptive_question(self, marks):
        question = super().generate_descriptive_question(marks)
        if question:
            relevant_concepts = [concept for concept in self.key_concepts['CONCEPT'] 
                              if any(term in question.lower() for term in concept['text'].lower().split())]
            
            answer = ""
            if relevant_concepts:
                concept_count = min(marks // 2, len(relevant_concepts))
                for concept in relevant_concepts[:concept_count]:
                    answer += f"{concept['context']}\n\n"
                    related_terms = [term for term in self.key_terms 
                                   if term.lower() in concept['context'].lower()]
                    for term in related_terms[:marks]:
                        answer += f"• {term}: {self.key_terms[term]['definition']}\n"
            
            # Return both question and answer in the structure
            return {
                'question': question,
                'answer': answer if answer else "See detailed solution in answer key."
            }
        return None

    def generate_scenario_based(self):
        scenario = super().generate_scenario_based()
        if scenario:
            concepts_mentioned = []
            for concept in self.key_concepts['CONCEPT']:
                if concept['text'].lower() in scenario['scenario'].lower():
                    concepts_mentioned.append(concept)
            
            answer = "Suggested Solution:\n\n"
            if concepts_mentioned:
                answer += "1. Analysis of the Situation:\n"
                for concept in concepts_mentioned:
                    answer += f"   • {concept['context']}\n"
                
                answer += "\n2. Proposed Solutions:\n"
                related_terms = []
                for concept in concepts_mentioned:
                    for term in self.key_terms:
                        if term.lower() in concept['context'].lower():
                            related_terms.append(term)
                
                for term in related_terms[:3]:
                    answer += f"   • Apply {term}: {self.key_terms[term]['definition']}\n"
            
            scenario['answer'] = answer if answer else "See detailed solution in answer key."
            return scenario
        return None

    def generate_questions(self, question_config):
        """Generate questions with multiple mark categories"""
        # Handle MCQs
        if 'mcq' in question_config:
            for mark_category in question_config['mcq']:
                for _ in range(mark_category['count']):
                    mcq = self.generate_mcq()
                    if mcq:
                        mcq['type'] = 'mcq'
                        mcq['marks'] = mark_category['marks']
                        self.questions.append(mcq)
        
        # Handle descriptive questions
        if 'descriptive' in question_config:
            for mark_category in question_config['descriptive']:
                for _ in range(mark_category['count']):
                    question_data = self.generate_descriptive_question(mark_category['marks'])
                    if question_data:
                        self.questions.append({
                            'type': 'descriptive',
                            'question': question_data['question'],
                            'answer': question_data['answer'],  # Make sure answer is included
                            'marks': mark_category['marks']
                        })
        
        # Handle scenario questions
        if 'scenario' in question_config:
            for mark_category in question_config['scenario']:
                for _ in range(mark_category['count']):
                    scenario_q = self.generate_scenario_based()
                    if scenario_q:
                        scenario_q['type'] = 'scenario'
                        scenario_q['marks'] = mark_category['marks']
                        if 'answer' not in scenario_q:  # Add default answer if missing
                            scenario_q['answer'] = "See detailed solution in answer key."
                        self.questions.append(scenario_q)

    # ... rest of the methods (export_to_pdf and export_answer_key) remain the same ...

# Previous code remains the same...
    def export_to_pdf(self, output_path):
        """Export question paper to PDF with mark categories"""
        doc = SimpleDocTemplate(
            output_path,
            pagesize=letter,
            rightMargin=72,
            leftMargin=72,
            topMargin=72,
            bottomMargin=72
        )
        
        styles = getSampleStyleSheet()
        
        # Add all required custom styles
        styles.add(ParagraphStyle(
            name='CustomTitle',
            parent=styles['Heading1'],
            fontSize=16,
            spaceAfter=30,
            alignment=TA_CENTER
        ))

        # Add the missing QuestionStyle
        styles.add(ParagraphStyle(
            name='QuestionStyle',
            parent=styles['Normal'],
            fontSize=12,
            spaceAfter=12,
            leftIndent=20,
            leading=14
        ))

        # Add other custom styles
        styles.add(ParagraphStyle(
            name='SectionHeader',
            parent=styles['Heading2'],
            fontSize=14,
            spaceAfter=20,
            spaceBefore=20,
            textColor=colors.HexColor('#2E5090')
        ))

        styles.add(ParagraphStyle(
            name='MarkCategory',
            parent=styles['Normal'],
            fontSize=12,
            spaceBefore=10,
            spaceAfter=6,
            textColor=colors.HexColor('#444444'),
            fontName='Helvetica-Bold'
        ))
        
        content = []
        
        # Header
        content.append(Paragraph("QUESTION PAPER", styles['CustomTitle']))
        content.append(Paragraph(f"Date: {datetime.now().strftime('%B %d, %Y')}", styles['Normal']))
        
        # Calculate total marks
        total_marks = sum(q['marks'] for q in self.questions)
        content.append(Paragraph(f"Total Marks: {total_marks}", styles['Normal']))
        content.append(Paragraph(f"Time: {total_marks * 1.5} minutes", styles['Normal']))
        content.append(Spacer(1, 20))
        
        # Group questions by type and marks
        def group_questions():
            grouped = defaultdict(lambda: defaultdict(list))
            for q in self.questions:
                grouped[q['type']][q['marks']].append(q)
            return grouped
        
        grouped_questions = group_questions()
        
        # MCQs
        if 'mcq' in grouped_questions:
            content.append(Paragraph("Section A: Multiple Choice Questions", styles['SectionHeader']))
            content.append(Spacer(1, 12))
            
            question_num = 1
            for marks, questions in sorted(grouped_questions['mcq'].items()):
                if questions:
                    content.append(Paragraph(f"{marks} Mark Questions:", styles['MarkCategory']))
                    content.append(Spacer(1, 6))
                    
                    for q in questions:
                        content.append(Paragraph(
                            f"{question_num}. {q['question']} [{marks} mark{'s' if marks > 1 else ''}]", 
                            styles['QuestionStyle']
                        ))
                        options = []
                        for j, option in enumerate(q['options']):
                            options.append(ListItem(
                                Paragraph(f"{chr(97+j)}) {option}", styles['Normal'])
                            ))
                        content.append(ListFlowable(options, bulletType='bullet', leftIndent=50))
                        content.append(Spacer(1, 12))
                        question_num += 1
        
        # Descriptive Questions
        if 'descriptive' in grouped_questions:
            content.append(Paragraph("Section B: Descriptive Questions", styles['SectionHeader']))
            content.append(Spacer(1, 12))
            
            question_num = 1
            for marks, questions in sorted(grouped_questions['descriptive'].items()):
                if questions:
                    content.append(Paragraph(f"{marks} Mark Questions:", styles['MarkCategory']))
                    content.append(Spacer(1, 6))
                    
                    for q in questions:
                        content.append(Paragraph(
                            f"{question_num}. {q['question']} [{marks} marks]",
                            styles['QuestionStyle']
                        ))
                        content.append(Spacer(1, 12))
                        question_num += 1
        
        # Scenario Questions
        if 'scenario' in grouped_questions:
            content.append(Paragraph("Section C: Scenario-based Questions", styles['SectionHeader']))
            content.append(Spacer(1, 12))
            
            question_num = 1
            for marks, questions in sorted(grouped_questions['scenario'].items()):
                if questions:
                    content.append(Paragraph(f"{marks} Mark Questions:", styles['MarkCategory']))
                    content.append(Spacer(1, 6))
                    
                    for q in questions:
                        content.append(Paragraph(f"{question_num}. Read the following scenario:", styles['QuestionStyle']))
                        content.append(Paragraph(q['scenario'], styles['Normal']))
                        content.append(Spacer(1, 6))
                        content.append(Paragraph(
                            f"Question: {q['question']} [{marks} marks]",
                            styles['QuestionStyle']
                        ))
                        content.append(Spacer(1, 12))
                        question_num += 1
        
        doc.build(content)
        
    def export_answer_key(self, output_path):
        """Export answer key to PDF with improved formatting"""
        doc = SimpleDocTemplate(
            output_path,
            pagesize=letter,
            rightMargin=72,
            leftMargin=72,
            topMargin=72,
            bottomMargin=72
        )
        
        styles = getSampleStyleSheet()
        
        # Define styles for answer key
        styles.add(ParagraphStyle(
            name='CustomTitle',
            parent=styles['Heading1'],
            fontSize=16,
            spaceAfter=30,
            alignment=TA_CENTER
        ))
        
        styles.add(ParagraphStyle(
            name='SectionHeader',
            parent=styles['Heading2'],
            fontSize=14,
            spaceAfter=20,
            spaceBefore=20,
            textColor=colors.HexColor('#2E5090')
        ))
        
        styles.add(ParagraphStyle(
            name='QuestionHeader',
            parent=styles['Normal'],
            fontSize=12,
            spaceBefore=15,
            spaceAfter=10,
            textColor=colors.HexColor('#000000'),
            fontName='Helvetica-Bold'
        ))
        
        styles.add(ParagraphStyle(
            name='AnswerText',
            parent=styles['Normal'],
            fontSize=11,
            spaceAfter=15,
            leftIndent=20,
            leading=16
        ))

        styles.add(ParagraphStyle(
            name='MarkCategory',
            parent=styles['Normal'],
            fontSize=12,
            spaceBefore=10,
            spaceAfter=6,
            textColor=colors.HexColor('#444444'),
            fontName='Helvetica-Bold'
        ))
        
        content = []
        
        # Header
        content.append(Paragraph("ANSWER KEY", styles['CustomTitle']))
        content.append(Paragraph(f"Date: {datetime.now().strftime('%B %d, %Y')}", styles['Normal']))
        content.append(Spacer(1, 20))
        
        # Group questions by type and marks
        def group_questions():
            grouped = defaultdict(lambda: defaultdict(list))
            for q in self.questions:
                grouped[q['type']][q['marks']].append(q)
            return grouped
        
        grouped_questions = group_questions()
        
        # MCQ Answers
        if 'mcq' in grouped_questions:
            content.append(Paragraph("Section A: Multiple Choice Questions", styles['SectionHeader']))
            
            question_num = 1
            for marks, questions in sorted(grouped_questions['mcq'].items()):
                if questions:
                    content.append(Paragraph(f"{marks} Mark Questions:", styles['MarkCategory']))
                    content.append(Spacer(1, 6))
                    
                    for q in questions:
                        content.append(Paragraph(
                            f"Question {question_num}: {q['question']}", 
                            styles['QuestionHeader']
                        ))
                        content.append(Paragraph(q['answer'], styles['AnswerText']))
                        content.append(Spacer(1, 10))
                        question_num += 1
        
        # Descriptive Answers
        if 'descriptive' in grouped_questions:
            content.append(Paragraph("Section B: Descriptive Questions", styles['SectionHeader']))
            
            question_num = 1
            for marks, questions in sorted(grouped_questions['descriptive'].items()):
                if questions:
                    content.append(Paragraph(f"{marks} Mark Questions:", styles['MarkCategory']))
                    content.append(Spacer(1, 6))
                    
                    for q in questions:
                        content.append(Paragraph(
                            f"Question {question_num}: {q['question']}", 
                            styles['QuestionHeader']
                        ))
                        content.append(Paragraph(
                            q['answer'].replace('\n', '<br/>'),
                            styles['AnswerText']
                        ))
                        content.append(Spacer(1, 15))
                        question_num += 1
        
        # Scenario Answers
        if 'scenario' in grouped_questions:
            content.append(Paragraph("Section C: Scenario-based Questions", styles['SectionHeader']))
            
            question_num = 1
            for marks, questions in sorted(grouped_questions['scenario'].items()):
                if questions:
                    content.append(Paragraph(f"{marks} Mark Questions:", styles['MarkCategory']))
                    content.append(Spacer(1, 6))
                    
                    for q in questions:
                        content.append(Paragraph(
                            f"Question {question_num}:", 
                            styles['QuestionHeader']
                        ))
                        content.append(Paragraph(
                            f"Scenario: {q['scenario']}", 
                            styles['QuestionHeader']
                        ))
                        content.append(Paragraph(
                            f"Question: {q['question']}", 
                            styles['QuestionHeader']
                        ))
                        content.append(Paragraph(
                            q['answer'].replace('\n', '<br/>'), 
                            styles['AnswerText']
                        ))
                        content.append(Spacer(1, 15))
                        question_num += 1
        
        doc.build(content)

# Rest of the code remains the same...

def generate_question_paper_with_answers(pdf_path, question_config, output_pdf_path, answer_key_path):
    """Main function to generate question paper and answer key"""
    generator = EnhancedQuestionPaperGenerator()
    if generator.extract_text_from_pdf(pdf_path):
        generator.generate_questions(question_config)
        generator.export_to_pdf(output_pdf_path)
        generator.export_answer_key(answer_key_path)
        return True
    return False

In [5]:
config = {
    'mcq': [
        {'marks': 1, 'count': 5},
    ],
    'descriptive': [
        {'marks': 3, 'count': 2},
        {'marks': 5, 'count': 1},
        {'marks': 10, 'count': 1},
        {'marks': 15, 'count': 1}
    ],
    'scenario': [
        {'marks': 5, 'count': 1},
    ]
}

generate_question_paper_with_answers(
    '/Users/niyatipatel/Desktop/CC/1.pdf',
    config,
    'question_paper.pdf',
    'answer_key.pdf'
)

True