In [None]:
import pandas as pd
import json
import xml.etree.ElementTree as ET

class SymptomsParser:
    def __init__(self, dict_file='symptoms_dict.json'):
        self.symptoms_dict = {}
        self.dict_file = dict_file
        self.load_dictionary()

    def read_data(self, file_path):
        # Read data based on file extension
        if file_path.endswith('.csv'):
            return pd.read_csv('/content/data.csv')
        elif file_path.endswith('.tsv'):
            return pd.read_csv(file_path, delimiter='\t')
        elif file_path.endswith('.json'):
            return pd.read_json(file_path)
        elif file_path.endswith('.xml'):
            return self.parse_xml_to_dataframe(file_path)
        else:
            raise ValueError("Unsupported file format")

    @staticmethod
    def parse_xml_to_dataframe(file_path):
        tree = ET.parse(file_path)
        root = tree.getroot()
        data = [{child.tag: child.text for child in patient} for patient in root.findall('./Patient')]
        return pd.DataFrame(data)

    def enhance_dictionary(self, data, symptom_col='Other_Symptoms'):
        # Extract and add new symptoms to the dictionary
        for symptoms in data[symptom_col].dropna():
            for symptom in symptoms.split(','):
                symptom = symptom.strip()
                if not any(symptom in group for group in self.symptoms_dict.values()):
                    self.symptoms_dict[f'Symptom{len(self.symptoms_dict) + 1}'] = {symptom: ['Mild', 'Low', 'High']}
        return self.symptoms_dict

    def print_data_based_on_dict(self, data, symptom_col='Other_Symptoms'):
        available_symptoms = {symptom for group in self.symptoms_dict.values() for symptom in group}
        filtered_data = data[data[symptom_col].apply(lambda x: any(symptom in x for symptom in available_symptoms) if pd.notnull(x) else False)]
        print("Filtered Data Based on Available Symptoms in Dictionary:")
        print(filtered_data)

    def dump_dictionary(self):
        with open(self.dict_file, 'w') as f:
            json.dump(self.symptoms_dict, f, indent=4)
        print(f"Dictionary saved to {self.dict_file}")

    def load_dictionary(self):
        try:
            with open(self.dict_file, 'r') as f:
                self.symptoms_dict = json.load(f)
        except FileNotFoundError:
            print(f"File not found: {self.dict_file}. Initializing an empty dictionary.")

    def manual_update(self):
        symptom = input("Enter symptom: ")
        severity_levels = [level.strip() for level in input("Enter severity levels (comma-separated): ").split(',')]
        if any(symptom in group for group in self.symptoms_dict.values()):
            print(f"Symptom '{symptom}' already exists.")
        else:
            self.symptoms_dict[f'Symptom{len(self.symptoms_dict) + 1}'] = {symptom: severity_levels}
            print(f"Added '{symptom}' with levels {severity_levels}.")
            self.dump_dictionary()

    def process_data(self, file_path, symptom_col='Other_Symptoms'):
        data = self.read_data(file_path)
        self.print_data_based_on_dict(data, symptom_col)
        self.enhance_dictionary(data, symptom_col)
        self.print_data_based_on_dict(data, symptom_col)
        self.dump_dictionary()

if __name__ == "__main__":
    parser = SymptomsParser(dict_file='symptoms_dict.json')
    parser.process_data('data.csv', symptom_col='Other_Symptoms')
    if input("Update dictionary manually? [Y/N]: ").strip().upper() == "Y":
        parser.manual_update()


File not found: symptoms_dict.json. Initializing an empty dictionary.
Filtered Data Based on Available Symptoms in Dictionary:
Empty DataFrame
Columns: [SrNo, Patient_Id, Fever_Mild, Fever_Low, Fever_High, DOB, Cough_Mild, Cough_Low, Cough_High, Cold_Mild, Cold_Low, Cold_High, Other_Symptoms]
Index: []
Filtered Data Based on Available Symptoms in Dictionary:
    SrNo  Patient_Id Fever_Mild Fever_Low Fever_High         DOB Cough_Mild  \
0      1           1          Y         N          N  04-05-1996          N   
1      2           2          Y         N          N  26-08-1980          N   
2      3           3          Y         N          N  31-11-2003          N   
3      4           4          Y         N          N  27-06-1999          N   
4      5           5          Y         N          N  21-07-2000          N   
5      6           6          Y         N          N  07-03-2013          N   
6      7           7          Y         N          N  03-12-2005          N   
7      