In [10]:
import PyPDF2
import nltk
import random
from nltk.tokenize import sent_tokenize
import re

# Download required NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

class QuestionPaperGenerator:
    def __init__(self):
        self.content = ""
        self.questions = []
    
    def extract_text_from_pdf(self, pdf_path):
        """Extract text content from PDF file"""
        try:
            with open(pdf_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page in pdf_reader.pages:
                    self.content += page.extract_text()
            return True
        except Exception as e:
            print(f"Error reading PDF: {str(e)}")
            return False
    
    def generate_questions(self, num_questions, marks_distribution):
        """Generate questions based on the content"""
        sentences = sent_tokenize(self.content)
        
        # Filter sentences that are more likely to be convertible to questions
        potential_questions = []
        for sent in sentences:
            # Look for sentences with key patterns that might make good questions
            if any(keyword in sent.lower() for keyword in ['is', 'are', 'was', 'were', 'define', 'explain', 'describe', 'what', 'why', 'how']):
                potential_questions.append(sent)
        
        # Generate different types of questions based on marks distribution
        for marks, count in marks_distribution.items():
            for _ in range(count):
                if potential_questions:
                    sentence = random.choice(potential_questions)
                    potential_questions.remove(sentence)  # Avoid repetition
                    
                    # Generate different types of questions based on marks
                    if marks <= 2:  # Short questions
                        question = self._generate_short_question(sentence)
                    elif marks <= 5:  # Medium questions
                        question = self._generate_medium_question(sentence)
                    else:  # Long questions
                        question = self._generate_long_question(sentence)
                    
                    self.questions.append({
                        'question': question,
                        'marks': marks,
                        'original_text': sentence
                    })
    
    def _generate_short_question(self, sentence):
        """Generate a short question (1-2 marks)"""
        # Convert statements to questions
        if sentence.lower().startswith(('the', 'a', 'an')):
            question = f"Define or explain: {sentence}"
        else:
            question = f"What is meant by {sentence.strip('.')}?"
        return question
    
    def _generate_medium_question(self, sentence):
        """Generate a medium question (3-5 marks)"""
        # Create explanatory questions
        return f"Explain in detail: {sentence}"
    
    def _generate_long_question(self, sentence):
        """Generate a long question (>5 marks)"""
        # Create analytical questions
        return f"Critically analyze and explain in detail: {sentence}"
    
    def format_question_paper(self):
        """Format the generated questions into a proper question paper"""
        formatted_paper = "QUESTION PAPER\n"
        formatted_paper += "=" * 50 + "\n\n"
        
        total_marks = sum(q['marks'] for q in self.questions)
        formatted_paper += f"Total Marks: {total_marks}\n"
        formatted_paper += f"Time: {total_marks * 1.5} minutes\n\n"
        
        for i, q in enumerate(self.questions, 1):
            formatted_paper += f"Q{i}. {q['question']} [{q['marks']} marks]\n\n"
        
        return formatted_paper

def generate_paper(pdf_path, question_counts):
    """
    Main function to generate question paper
    question_counts: dict with marks as key and number of questions as value
    Example: {2: 5, 5: 3, 10: 2} means:
        - 5 questions of 2 marks each
        - 3 questions of 5 marks each
        - 2 questions of 10 marks each
    """
    generator = QuestionPaperGenerator()
    if generator.extract_text_from_pdf(pdf_path):
        generator.generate_questions(sum(question_counts.values()), question_counts)
        return generator.format_question_paper()
    return None

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/niyatipatel/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/niyatipatel/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [13]:
# Example usage
pdf_path = '/Users/niyatipatel/Desktop/CC/1.pdf'
question_distribution = {
    2: 5,  # 5 questions of 2 marks each
    5: 3,  # 3 questions of 5 marks each
    10: 2  # 2 questions of 10 marks each
}

question_paper = generate_paper(pdf_path, question_distribution)
print(question_paper)

QUESTION PAPER

Total Marks: 45
Time: 67.5 minutes

Q1. What is meant by With the help of this root, all the related resources like memory space, 
processor speed, and hard drive space are utilized in the best possible manner? [2 marks]

Q2. What is meant by It is suitable for organizations that require a separate cloud for their personal use and data security is 
the ﬁrst priority? [2 marks]

Q3. What is meant by To make it 
simple, take the example of an annual day function, you will have two options either to create a venue or 
to rent a venue but the function is the same? [2 marks]

Q4. What is meant by SaaS tools may become incompatible with other tools and hardware already used in your business? [2 marks]

Q5. Define or explain: All three of these things describe frontend cloud architecture components. [2 marks]

Q6. Explain in detail: Hybrid enables eﬃcient workload distribution based on speciﬁc needs and performance requirements. [5 marks]

Q7. Explain in detail: Second Root: D