In [2]:
!pip install pypdf

Collecting pypdf
  Downloading pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)
Downloading pypdf-5.1.0-py3-none-any.whl (297 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/298.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.1.0


In [5]:
import os
import re
import sqlite3
from abc import ABC, abstractmethod
from pypdf import PdfReader

class QuestionInterface(ABC):
    @abstractmethod
    def store_question(self, db_connection):
        pass

class SubjectiveQuestion(QuestionInterface):
    def __init__(self, text, chapter):
        self.text = text
        self.chapter = chapter

    def store_question(self, db_connection):
        cursor = db_connection.cursor()
        cursor.execute('''
            INSERT INTO questions
            (subject, question_text, question_type, chapter)
            VALUES (?, ?, ?, ?)
        ''', ('Chemistry', self.text, 'Subjective', self.chapter))

class MultipleChoiceQuestion(QuestionInterface):
    def __init__(self, text, options, chapter):
        self.text = text
        self.options = options
        self.chapter = chapter

    def store_question(self, db_connection):
        cursor = db_connection.cursor()
        cursor.execute('''
            INSERT INTO questions
            (subject, question_text, question_type, options, chapter)
            VALUES (?, ?, ?, ?, ?)
        ''', ('Chemistry', self.text, 'Multiple Choice', str(self.options), self.chapter))

class PDFQuestionExtractor:
    def __init__(self, pdf_path, db_path):
        self.pdf_path = pdf_path
        self.db_path = db_path

    def create_database(self):
        try:
            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()
            cursor.execute('''
                CREATE TABLE IF NOT EXISTS questions (
                    id INTEGER PRIMARY KEY,
                    subject TEXT,
                    question_text TEXT,
                    question_type TEXT,
                    options TEXT,
                    chapter TEXT
                )
            ''')
            conn.commit()
            return conn
        except sqlite3.Error as e:
            print(f"Database error: {e}")
            return None

    def extract_questions(self):
        db_connection = self.create_database()
        if not db_connection:
            return

        try:
            reader = PdfReader(self.pdf_path)
            for page in reader.pages:
                text = page.extract_text()
                # Implement regex-based question extraction logic here
                # This is a placeholder for actual extraction
                questions = re.findall(r'(Question:.*?)\n', text, re.DOTALL)
                for question in questions:
                    # Determine question type and create appropriate object
                    # Example logic - you'll need to customize
                    if '?' in question:
                        q_obj = SubjectiveQuestion(question, 'General Chemistry')
                    else:
                        q_obj = MultipleChoiceQuestion(question, ['A', 'B', 'C', 'D'], 'General Chemistry')

                    q_obj.store_question(db_connection)

            db_connection.commit()
        except Exception as e:
            print(f"Error processing PDF: {e}")
        finally:
            if db_connection:
                db_connection.close()

# Usage
extractor = PDFQuestionExtractor('/content/Chemistry Questions.pdf', 'chemistry_questions.db')
extractor.extract_questions()