In [None]:
import pandas as pd

# Step 1


In [None]:
file_path = '/content/Auto_Regressive_Model_Diffusion_Model_V_1.0.xlsx'

data = pd.read_excel(file_path, sheet_name='Base_Data_Set')

In [None]:
data = data[['Symptoms - 1', 'Symptoms - 2', 'Symptoms - 3', 'Other Symptoms']]


In [None]:
symptom_dict = {}

# Step 2

In [None]:
def parse_symptoms(row, symptom_dict):
    """Parses symptoms from a row and updates the dictionary."""
    symptoms = {
        'Symptoms - 1': row['Symptoms - 1'],
        'Symptoms - 2': row['Symptoms - 2'],
        'Symptoms - 3': row['Symptoms - 3'],
        'Other Symptoms': row['Other Symptoms']
    }

    for symptom_name, symptom_value in symptoms.items():
        if pd.notna(symptom_value):  # Only add non-NaN values
            if symptom_name not in symptom_dict:
                symptom_dict[symptom_name] = set()
            symptom_dict[symptom_name].add(symptom_value)

In [None]:
def display_dict(symptom_dict):
    """Displays the current state of the symptom dictionary."""
    for symptom, variations in symptom_dict.items():
        print(f"{symptom}: {', '.join(variations)}")

In [None]:
def check_missing_columns(symptom_dict, expected_columns):
    """Checks and prints missing columns (loss)."""
    missing_columns = [col for col in expected_columns if col not in symptom_dict]
    if missing_columns:
        print(f"Missing columns (Loss): {missing_columns}")
    else:
        print("All expected columns are present.")

In [None]:
for _, row in data.iterrows():
    parse_symptoms(row, symptom_dict)

In [None]:
print("Initial Symptom Dictionary:")
display_dict(symptom_dict)

Initial Symptom Dictionary:
Symptoms - 1: Fever, Y, Mild
Symptoms - 2: N, Cough, Mild
Symptoms - 3: N, Mild, Cold
Other Symptoms: Vertigo, Nausia, Text, Vertigo, Head Ache, Shivering, Nausia, Body Ache, Sickness


In [None]:
expected_columns = ['Symptoms - 1', 'Symptoms - 2', 'Symptoms - 3', 'Other Symptoms']
check_missing_columns(symptom_dict, expected_columns)

All expected columns are present.


In [None]:
for _, row in data.iterrows():
    parse_symptoms(row, symptom_dict)


In [None]:
print("\nEnhanced Symptom Dictionary:")
display_dict(symptom_dict)


Enhanced Symptom Dictionary:
Symptoms - 1: Fever, Y, Mild
Symptoms - 2: N, Cough, Mild
Symptoms - 3: N, Mild, Cold
Other Symptoms: Vertigo, Nausia, Text, Vertigo, Head Ache, Shivering, Nausia, Body Ache, Sickness


# Step 3


In [None]:
symptom_dict = {
    "S1": {"Fever": ["mild", "low", "high"]},
    "S2": {"Cough": ["mild", "low", "high"]},
    "S3": {"Cold": ["mild", "low", "high"]},
    "S4": {"Body Ache": {}},
    "S5": {"Cold": {}},
    "S6": {"Shivering": ["mild", "high", "intermittent"]}
}

In [None]:
# Step 2: Define function to parse, print Loss, and enhance the dictionary
def print_loss_and_enhance_dictionary(data, symptom_dict):
    loss = []  # Track missing attributes
    for _, row in data.iterrows():

        # Check if the column name is 'SrNo' or 'Sr.No'
        sr_no = row.get('SrNo') if 'SrNo' in row else row.get('Sr.No')

        # If sr_no is still None, it means neither column exists
        if sr_no is None:
            raise KeyError("Neither 'SrNo' nor 'Sr.No' column found in the DataFrame.")

        patient_id = row['Patient_Id']
        observation = row['Observation']
        particulars = row['Particulars']
        time_period = row['Time Period']
        location = row[['City', 'State', 'Country', 'Pincode']].to_dict()

        # If there is a valid observation, check against the dictionary
        if pd.notna(observation):
            symptoms = observation.split(", ")
            for symptom in symptoms:
                found = False
                for key, symptom_data in symptom_dict.items():
                    if symptom in symptom_data:
                        # Symptom found, attach attributes (only to dictionary types)
                        if isinstance(symptom_data[symptom], dict):
                            symptom_dict[key][symptom].update({
                                "Particulars": particulars,
                                "Time Period": time_period,
                                "Location": location
                            })
                        found = True
                        break
                if not found:
                    loss.append((sr_no, patient_id, symptom))  # Track loss if symptom not found

    # Print the loss in Step 2
    print("Loss found in Step 2:")
    if loss:
        for sr_no, patient_id, symptom in loss:
            print(f"SrNo: {sr_no}, Patient_Id: {patient_id}, Symptom: {symptom} (Not found in dictionary)")
    else:
        print("No loss found. All symptoms matched the dictionary.")



In [None]:
 #Print the enhanced dictionary
print("\nEnhanced Symptom Dictionary:")
for key, symptoms in symptom_dict.items():
  print(f"{key}: {symptoms}")


Enhanced Symptom Dictionary:
S1: {'Fever': ['mild', 'low', 'high']}
S2: {'Cough': ['mild', 'low', 'high']}
S3: {'Cold': ['mild', 'low', 'high']}
S4: {'Body Ache': {}}
S5: {'Cold': {}}
S6: {'Shivering': ['mild', 'high', 'intermittent']}


In [None]:
def check_loss_of_attributes(symptom_dict):
    attribute_loss = 0
    for key, symptoms in symptom_dict.items():
        for symptom, details in symptoms.items():
            if isinstance(details, dict):
                required_keys = ["Particulars", "Time Period", "Location"]
                if any(key not in details for key in required_keys):
                    attribute_loss += 1

    print(f"\nLoss of attributes is {attribute_loss}")
    if attribute_loss == 0:
        print("No attribute loss, all attributes were correctly attached.")

In [None]:
check_loss_of_attributes(symptom_dict)


Loss of attributes is 2


# Step 4

In [None]:
import json
import pandas as pd
import xml.etree.ElementTree as ET
import os

In [50]:
class SymptomParser:
    def __init__(self):
        self.symptom_dict = {}
        self.loss = []
        self.loss_of_attributes = 0  # Initialize loss of attributes

    def read_data(self, file_path):
        """Reads data from various formats and enhances the dictionary."""
        file_extension = os.path.splitext(file_path)[1]

        if file_extension == '.csv':
            self.read_csv(file_path)
        elif file_extension == '.tsv':
            self.read_tsv(file_path)
        elif file_extension == '.json':
            self.read_json(file_path)
        elif file_extension == '.xml':
            self.read_xml(file_path)
        else:
            print("Unsupported file format.")

    def read_csv(self, file_path):
        df = pd.read_csv(file_path)
        self.enhance_dictionary(df)

    def read_tsv(self, file_path):
        df = pd.read_csv(file_path, sep='\t')
        self.enhance_dictionary(df)

    def read_json(self, file_path):
        with open(file_path) as f:
            data = json.load(f)
            df = pd.json_normalize(data)
            self.enhance_dictionary(df)

    def read_xml(self, file_path):
        tree = ET.parse(file_path)
        root = tree.getroot()
        data = []

        # Convert XML to a list of dictionaries
        for item in root.findall('.//item'):  # Adjust the tag based on your XML structure
            entry = {}
            for child in item:
                entry[child.tag] = child.text
            data.append(entry)

        df = pd.DataFrame(data)
        self.enhance_dictionary(df)

    def enhance_dictionary(self, df):
        """Enhances the dictionary based on the DataFrame."""
        for index, row in df.iterrows():
            for column, value in row.items():
                if column not in self.symptom_dict:
                    self.symptom_dict[column] = set()
                self.symptom_dict[column].add(value)

        self.loss_of_attributes = 0  # Reset the loss of attributes

    def dump_dictionary(self, filename):
        """Dumps the dictionary to a JSON file."""
        with open(filename, 'w') as f:
            json.dump(self.symptom_dict, f, indent=4)

    def print_dictionary(self):
        """Prints the symptom dictionary in a structured format."""
        print("\nSymptom Dictionary:")
        for key, value in self.symptom_dict.items():
            print(f"\t'{key}': {list(value)}")

    def manual_editing(self):
        """Allows manual editing of the dictionary."""
        while True:
            print("\nCurrent Dictionary:")
            self.print_dictionary()
            edit_key = input("\nEnter the key to edit (or type 'exit' to finish): ")
            if edit_key.lower() == 'exit':
                break

            if edit_key in self.symptom_dict:
                new_values = input("Enter new values (comma separated): ")
                self.symptom_dict[edit_key] = set(new_values.split(","))
            else:
                print("Key not found. Try again.")

if __name__ == "__main__":
    parser = SymptomParser()

    print("1. Read and Create Dictionary")
    file_path = input("Enter the data file path (CSV, TSV, JSON, XML): ")
    parser.read_data(file_path)

    print(f"Dictionary Loss is: {parser.loss_of_attributes}")

    print("2. Dump Dictionary")
    dump_filename = input("Enter the filename to save the dictionary (e.g., output.json): ")
    parser.dump_dictionary(dump_filename)

    print("3. Print Data Sets")
    parser.print_dictionary()

    print("4. Allow editing and reparsing of data")
    parser.manual_editing()

1. Read and Create Dictionary
Enter the data file path (CSV, TSV, JSON, XML): /content/Auto_Regressive_Model_Diffusion_Model_V_1.0.xlsx
Unsupported file format.
Dictionary Loss is: 0
2. Dump Dictionary
Enter the filename to save the dictionary (e.g., output.json): output.json
3. Print Data Sets

Symptom Dictionary:
4. Allow editing and reparsing of data

Current Dictionary:

Symptom Dictionary:

Enter the key to edit (or type 'exit' to finish): Symptoms
Key not found. Try again.

Current Dictionary:

Symptom Dictionary:

Enter the key to edit (or type 'exit' to finish): exit
