In [None]:
import PyPDF2
import nltk
import random
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import wordnet
import re

# Download required NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

class QuestionPaperGenerator:
    def __init__(self):
        self.content = ""
        self.questions = []
        self.sentences = []
    
    def extract_text_from_pdf(self, pdf_path):
        """Extract text content from PDF file"""
        try:
            with open(pdf_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page in pdf_reader.pages:
                    self.content += page.extract_text()
            self.sentences = sent_tokenize(self.content)
            return True
        except Exception as e:
            print(f"Error reading PDF: {str(e)}")
            return False

    def generate_mcq(self, sentence):
        """Generate an MCQ from a sentence"""
        words = word_tokenize(sentence)
        tagged = nltk.pos_tag(words)
        
        # Find nouns and important words to create options
        key_words = [word for word, tag in tagged if tag.startswith(('NN', 'VB', 'JJ'))]
        
        if not key_words:
            return None
        
        answer = random.choice(key_words)
        # Create question by replacing the answer with a blank
        question = sentence.replace(answer, "_____")
        
        # Generate options
        options = [answer]
        # Try to find synonyms using WordNet
        synsets = wordnet.synsets(answer)
        if synsets:
            for syn in synsets:
                options.extend(syn.lemma_names())
        
        # If we don't have enough options, add some random words
        while len(set(options)) < 4 and key_words:
            options.append(random.choice(key_words))
        
        # Take unique options and limit to 4
        options = list(set(options))[:4]
        # Add random wrong options if needed
        while len(options) < 4:
            options.append(f"Option {len(options) + 1}")
        
        random.shuffle(options)
        correct_index = options.index(answer)
        
        return {
            'question': question,
            'options': options,
            'correct_answer': correct_index,
            'answer': answer
        }

    def generate_descriptive_question(self, sentence, marks):
        """Generate a descriptive question based on marks"""
        question_starters = {
            'short': ['Define', 'What is', 'List', 'State'],
            'medium': ['Explain', 'Describe', 'Elaborate on'],
            'long': ['Critically analyze', 'Evaluate', 'Compare and contrast']
        }
        
        if marks <= 2:
            starter = random.choice(question_starters['short'])
        elif marks <= 5:
            starter = random.choice(question_starters['medium'])
        else:
            starter = random.choice(question_starters['long'])
            
        return f"{starter} {sentence}"

    def generate_scenario_based(self, context_sentences):
        """Generate a scenario-based question"""
        if len(context_sentences) < 2:
            return None
            
        scenario = " ".join(context_sentences[:2])
        questions = [
            f"Based on the above scenario, analyze how would you handle this situation?",
            f"What are the key factors to consider in this scenario?",
            f"How would you apply the concepts learned to resolve this situation?",
            f"What would be your approach to address the challenges in this scenario?"
        ]
        
        return {
            'scenario': scenario,
            'question': random.choice(questions)
        }

    def generate_questions(self, question_config):
        """
        Generate questions based on configuration
        question_config: dict with format:
        {
            'mcq': {'marks': 1, 'count': 3},
            'descriptive': {'marks': 5, 'count': 2},
            'scenario': {'marks': 10, 'count': 1}
        }
        """
        # Generate MCQs
        if 'mcq' in question_config:
            mcq_count = question_config['mcq']['count']
            for _ in range(mcq_count):
                if self.sentences:
                    sentence = random.choice(self.sentences)
                    mcq = self.generate_mcq(sentence)
                    if mcq:
                        mcq['type'] = 'mcq'
                        mcq['marks'] = question_config['mcq']['marks']
                        self.questions.append(mcq)
        
        # Generate Descriptive Questions
        if 'descriptive' in question_config:
            desc_count = question_config['descriptive']['count']
            for _ in range(desc_count):
                if self.sentences:
                    sentence = random.choice(self.sentences)
                    question = self.generate_descriptive_question(
                        sentence,
                        question_config['descriptive']['marks']
                    )
                    self.questions.append({
                        'type': 'descriptive',
                        'question': question,
                        'marks': question_config['descriptive']['marks']
                    })
        
        # Generate Scenario-based Questions
        if 'scenario' in question_config:
            scenario_count = question_config['scenario']['count']
            for _ in range(scenario_count):
                if len(self.sentences) >= 2:
                    context_sentences = random.sample(self.sentences, 2)
                    scenario_q = self.generate_scenario_based(context_sentences)
                    if scenario_q:
                        scenario_q['type'] = 'scenario'
                        scenario_q['marks'] = question_config['scenario']['marks']
                        self.questions.append(scenario_q)

    def format_question_paper(self):
        """Format the generated questions into a proper question paper"""
        formatted_paper = "QUESTION PAPER\n"
        formatted_paper += "=" * 50 + "\n\n"
        
        total_marks = sum(q['marks'] for q in self.questions)
        formatted_paper += f"Total Marks: {total_marks}\n"
        formatted_paper += f"Time: {total_marks * 1.5} minutes\n\n"
        
        # Group questions by type
        mcqs = [q for q in self.questions if q['type'] == 'mcq']
        descriptive = [q for q in self.questions if q['type'] == 'descriptive']
        scenarios = [q for q in self.questions if q['type'] == 'scenario']
        
        # Format MCQs
        if mcqs:
            formatted_paper += "Section A: Multiple Choice Questions\n\n"
            for i, q in enumerate(mcqs, 1):
                formatted_paper += f"{i}. {q['question']} [{q['marks']} mark]\n"
                for j, option in enumerate(q['options']):
                    formatted_paper += f"   {chr(97+j)}) {option}\n"
                formatted_paper += "\n"
        
        # Format Descriptive Questions
        if descriptive:
            formatted_paper += "Section B: Descriptive Questions\n\n"
            for i, q in enumerate(descriptive, 1):
                formatted_paper += f"{i}. {q['question']} [{q['marks']} marks]\n\n"
        
        # Format Scenario-based Questions
        if scenarios:
            formatted_paper += "Section C: Scenario-based Questions\n\n"
            for i, q in enumerate(scenarios, 1):
                formatted_paper += f"{i}. Read the following scenario:\n"
                formatted_paper += f"{q['scenario']}\n\n"
                formatted_paper += f"Question: {q['question']} [{q['marks']} marks]\n\n"
        
        return formatted_paper

def generate_paper(pdf_path, question_config):
    """
    Main function to generate question paper
    """
    generator = QuestionPaperGenerator()
    if generator.extract_text_from_pdf(pdf_path):
        generator.generate_questions(question_config)
        return generator.format_question_paper()
    return None

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/niyatipatel/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/niyatipatel/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/niyatipatel/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
pdf_path = '/Users/niyatipatel/Desktop/CC/1.pdf'
question_config = {
    'mcq': {'marks': 1, 'count': 3},
    'descriptive': {'marks': 5, 'count': 2},
    'scenario': {'marks': 5, 'count': 2}
}

question_paper = generate_paper(pdf_path, question_config)
print(question_paper)

QUESTION PAPER

Total Marks: 23
Time: 34.5 minutes

Section A: Multiple Choice Questions

1. Due to its ﬂexibility, it is a desirable choice for 
businesses that _____ the advantages of a private cloud deployment without the initial capital outlay 
and ongoing maintenance expenses involved with an on-premise implementation. [1 mark]
   a) desire
   b) hope
   c) trust
   d) want

2. The business _____ 
not need to build its own IT infrastructure or purchase hardware or equipment. [1 mark]
   a) coif
   b) does
   c) practice
   d) doe

3. SaaS, PaaS, and IaaS are the three _____ cloud computing service model categories. [1 mark]
   a) master
   b) independent
   c) main
   d) briny

Section B: Descriptive Questions

1. Elaborate on The consumer does not manage or control the underlying cloud 
infrastructure including network, servers, operating systems, or storage, but has control over the deployed 
applications and possibly conﬁguration settings for the application-hosting environment