In [2]:
import re
import json
from typing import List, Dict

class QAParser:
    def __init__(self, file_path: str):
        with open(file_path, 'r') as file:
            self.page_content = file.read()

    def parse_qa(self) -> List[Dict[str, str]]:
        qa_set = []
        lines = self.page_content.split("\n")
        reading_question = False
        reading_answer = False
        current_question = ""
        current_answer = ""

        question_re = re.compile(r'"Question":')
        answer_re = re.compile(r'"Answer":')

        for line in lines:
            if question_re.search(line):
                print("Found a Question block")
                reading_question = True
                split_line = line.split('"Question": "')
                if len(split_line) > 1:
                    current_question = split_line[1].strip(' "\n')

            elif answer_re.search(line):
                print("Found an Answer block")
                reading_answer = True
                split_line = line.split('"Answer": "')
                if len(split_line) > 1:
                    current_answer = split_line[1].strip(' "\n')

            elif reading_answer and line.strip() == "},":
                reading_answer = False
                qa_set.append({
                    'question': current_question,
                    'answer': current_answer
                })
                print(f"Appended QA: {qa_set[-1]}")
                # Reset current question and answer
                current_question = ""
                current_answer = ""
            else:
                if reading_question:
                    current_question += ' ' + line.strip(' "\n')
                if reading_answer:
                    current_answer += ' ' + line.strip(' "\n')
        
        return qa_set

if __name__ == "__main__":
    file_path = '../assets/q_a.json'
    parser = QAParser(file_path)
    parsed_qa = parser.parse_qa()

    # Save to a new JSON file
    with open('../assets/q_a_parsed.json', 'w') as f:
        json.dump(parsed_qa, f, indent=4)

    print(f"Saved")


Found a Question block
Found an Answer block
Appended QA: {'question': 'Resource Management - details of directly employed staff and levels of expertise, demonstrate the ability to cope with volume of works",', 'answer': 'During mobilisation, and in conjunction with any applicable TUPE transfer, MCP will work with Sandwell MBC to plan likely labor-resourcing issues and review historical void trends to ensure the right skills balance is established. Voids will be delivered by a core team of specialist subcontractors and directly-employed operatives to meet an established baseline demand, based on historical and known trends. We believe that a consistent team is essential when working to establish strong relationships and good communications. We will provide a dedicated contract management team to deliver the contract, combined with our team of subcontractor partners to ensure consistency and reliability. Each team member is competent and experienced in carrying out similar works. The su

In [3]:
parsed_qa

[{'question': 'Resource Management - details of directly employed staff and levels of expertise, demonstrate the ability to cope with volume of works",',
  'answer': 'During mobilisation, and in conjunction with any applicable TUPE transfer, MCP will work with Sandwell MBC to plan likely labor-resourcing issues and review historical void trends to ensure the right skills balance is established. Voids will be delivered by a core team of specialist subcontractors and directly-employed operatives to meet an established baseline demand, based on historical and known trends. We believe that a consistent team is essential when working to establish strong relationships and good communications. We will provide a dedicated contract management team to deliver the contract, combined with our team of subcontractor partners to ensure consistency and reliability. Each team member is competent and experienced in carrying out similar works. The supervision and contract management team will be supporte