In [None]:
pip install PyPDF2

In [5]:
import os
import re
from PyPDF2 import PdfReader
import json

# ✅ Define folder paths
FOLDER_PATHS = {
    'Depression': './Depression & Anxiety',
    'Personality & Behaviour': './Personality & Behaviour Analysis',
    'Stress & Coping': './Stress & Coping Mechanisms',
    'Trauma & PTSD': './Trauma & PTSD'
}

# ✅ Define scoring templates (example)
SCORING_TEMPLATE = {
    "Depression": {
        "0": "Not at all",
        "1": "Several days",
        "2": "More than half the days",
        "3": "Nearly every day"
    },
    "Personality & Behaviour": {
        "1": "Strongly disagree",
        "2": "Disagree",
        "3": "Neutral",
        "4": "Agree",
        "5": "Strongly agree"
    },
    "Stress & Coping": {
        "0": "Never",
        "1": "Rarely",
        "2": "Sometimes",
        "3": "Often"
    },
    "Trauma & PTSD": {
        "0": "No",
        "1": "Yes"
    }
}

# ✅ Thresholds for interpretation
THRESHOLDS = {
    "Depression": {
        "Minimal": (0, 4),
        "Mild": (5, 9),
        "Moderate": (10, 14),
        "Severe": (15, 21)
    },
    "Personality & Behaviour": {
        "Introverted": (0, 10),
        "Balanced": (11, 20),
        "Extroverted": (21, 30)
    },
    "Stress & Coping": {
        "Low": (0, 6),
        "Moderate": (7, 12),
        "High": (13, 18)
    },
    "Trauma & PTSD": {
        "No trauma": (0, 1),
        "Possible trauma": (2, 3),
        "Severe trauma": (4, 5)
    }
}

# ✅ Extract text from PDF
def extract_text_from_pdf(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() or ""
    return text

# ✅ Extract questions and options using regex
def extract_questions_with_options(text):
    questions_with_options = []
    # Pattern to find questions (numbered or starting with Q.) followed by options (a., b., etc.)
    question_pattern = re.compile(r"((?:Q\.\s*\d+|\d+\.)\s*.*?[\?\.])\s*([a-z]\.\s*.*(?:\n|$))+", re.MULTILINE | re.IGNORECASE)
    option_pattern = re.compile(r"([a-z])\.\s*(.*)", re.IGNORECASE)
    standalone_question_pattern = re.compile(r"^(.*?[\?\.])\s*$", re.MULTILINE) # To catch questions without explicit options

    question_matches = question_pattern.finditer(text)
    for match in question_matches:
        question_text = match.group(1).strip()
        options_text = match.group(2).strip()
        options = {}
        for option_match in option_pattern.finditer(options_text):
            option_label = option_match.group(1).strip()
            option_value = option_match.group(2).strip()
            options[option_label] = option_value
        questions_with_options.append({"question": question_text, "options": options})

    # Find standalone questions (those not immediately followed by the option pattern)
    text_without_matched_questions = re.sub(question_pattern, '', text)
    standalone_matches = standalone_question_pattern.finditer(text_without_matched_questions)
    for match in standalone_matches:
        question_text = match.group(1).strip()
        if question_text:  # Avoid empty matches
            questions_with_options.append({"question": question_text, "options": {}}) # Empty options for standalone questions

    return questions_with_options

# ✅ Extract questions with options from all PDFs in a folder
def extract_questions_from_folder(folder_path):
    all_questions_with_options = []
    for file in os.listdir(folder_path):
        if file.endswith('.pdf'):
            file_path = os.path.join(folder_path, file)
            try:
                print(f"📄 Processing {file_path}...")
                text = extract_text_from_pdf(file_path)
                extracted = extract_questions_with_options(text)
                if extracted:
                    all_questions_with_options.extend(extracted)
            except Exception as e:
                print(f"❌ Error reading {file_path}: {e}")
    return all_questions_with_options

# ✅ Calculate scores based on user input (assuming numerical answers for simplicity)
def calculate_score(category, answers):
    score = sum(answers)

    # ✅ Classify based on thresholds
    if category in THRESHOLDS:
        for level, (low, high) in THRESHOLDS[category].items():
            if low <= score <= high:
                return score, level
        return score, "Unknown"
    return score, "Category thresholds not defined"

# ✅ Improved questionnaire generation with options
generated_questionnaire = {}

for category, path in FOLDER_PATHS.items():
    if os.path.exists(path):
        print(f"\n🔎 Extracting questions for {category}...")
        extracted_questions = extract_questions_from_folder(path)
        if extracted_questions:
            generated_questionnaire[category] = extracted_questions
        else:
            print(f"⚠️ No questions found in {category}")
    else:
        print(f"❌ Folder not found: {path}")

# ✅ Display the generated questionnaire with options
print("\n📝 Generated Questionnaire with Options:")
for category, questions in generated_questionnaire.items():
    print(f"\n--- {category} ---")
    for i, item in enumerate(questions, 1):
        print(f"{i}. Question: {item['question']}")
        if item['options']:
            print("   Options:")
            for label, option_text in item['options'].items():
                print(f"     {label}. {option_text}")
        print("-" * 20)

# ✅ Mock scoring example (user answers) - adjust based on the number of questions and options
mock_answers = {}
for category, questions in generated_questionnaire.items():
    num_questions = len(questions)
    category_answers = []
    for i in range(min(num_questions, 5)):  # Provide up to 5 mock answers per category
        # You would typically get these answers from user input
        category_answers.append(1)  # Replace with actual numerical answers corresponding to options
    mock_answers[category] = category_answers

# ✅ Compute scores
final_scores = {}
for category, answers in mock_answers.items():
    if category in THRESHOLDS:
        score, level = calculate_score(category, answers)
        final_scores[category] = {"score": score, "level": level}
    else:
        final_scores[category] = {"score": "N/A", "level": "N/A (Thresholds not defined)"}

# ✅ Save the generated questionnaire with options to JSON
with open('generated_questionnaire_with_options.json', 'w', encoding='utf-8') as f:
    json.dump(generated_questionnaire, f, indent=4)

# ✅ Save scoring results to JSON
with open('scoring_results.json', 'w', encoding='utf-8') as f:
    json.dump(final_scores, f, indent=4)

print("\n✅ Generated questionnaire with options saved to 'generated_questionnaire_with_options.json'")
print("✅ Scoring results saved to 'scoring_results.json'")


🔎 Extracting questions for Depression...
📄 Processing ./Depression & Anxiety\Beck Anxiety Inventory (BAI) – Measures anxiety levels..pdf...
📄 Processing ./Depression & Anxiety\Beck-Depression-Inventory-BDI(self reported test for depression symptoms).pdf...
📄 Processing ./Depression & Anxiety\GAD-7_Anxiety-updated_0(Assesses anxiety levels.).pdf...
📄 Processing ./Depression & Anxiety\HAMILTON-DEPRESSION(Used for clinical depression assessment.).pdf...
📄 Processing ./Depression & Anxiety\PHQ9 id date 08.03(to measure severity of dpession).pdf...

🔎 Extracting questions for Personality & Behaviour...
📄 Processing ./Personality & Behaviour Analysis\MBTI-Personality-Type-Test.pdf...
📄 Processing ./Personality & Behaviour Analysis\MMPI-2-Test-Questions.pdf...

🔎 Extracting questions for Stress & Coping...
📄 Processing ./Stress & Coping Mechanisms\COPE Inventory – Assesses different coping styles (e.g., avoidance, problem-solving)..pdf...
📄 Processing ./Stress & Coping Mechanisms\Perceived S