In [None]:
from openai import OpenAI
import os
import base64
import requests
import json
import requests
import pandas as pd
import sys
#Get the current working directory
notebook_dir = os.getcwd()

# Add the root directory to the Python path
sys.path.append(os.path.abspath(os.path.join(notebook_dir, '..')))

# Import the API key from config
from config.config import API_KEY


In [6]:
def gpt_authenticate():
    API_key = API_KEY 
    client = OpenAI(api_key = API_key)
    return client


In [7]:
def check_image_url(url):
    try:
        # Send a GET request to the image URL
        response = requests.get(url)
        
        # Check if the response status code is 200 (OK)
        if response.status_code == 200:
            # Get the Content-Type header from the response
            content_type = response.headers.get('Content-Type', '')
            
            # Check if the Content-Type is an image format
            if 'image' in content_type:
                print(f"Success: URL {url} is accessible and is of type {content_type}.")
                return False  # No error
            else:
                print(f"Error: URL {url} is not an image. Content-Type: {content_type}")
                return True  # Error occurred
        else:
            print(f"Error: URL {url} returned status code {response.status_code}.")
            return True  # Error occurred
    
    except requests.exceptions.RequestException as e:
        print(f"Error: Failed to access URL {url}. Exception: {e}")
        return True  # Error occurred

In [8]:
#%% using links as images
def gpt_extract(client, url):
    response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
      {
        "role": "user",
        "content": [
          {"type": "text", "text": '''Describe the image (Figure/Graphical abstract) from an article on comorbidity between COVID-19 and Neurodegeneration.   
                                      1. Name potential mechanisms (pathophysiological processes) of Covid-19's impact on the brain depicted in the image. 
                                      2. Describe each process depicted in the image as semantic triples (subject–predicate–object).  
                                      Example: 
                                      Pathophysiological Process: Astrocyte_Activation 
                                      Triples:
                                      SARS-CoV-2_infection|triggers|astrocyte_activation
                                      
                                      Use ONLY the information shown in the image! Follow the structure precisely and don't write anything else! Replace spaces in names with _ sign, make sure that words "Pathophysiology Process:" and "Triples:" are presented, don't use bold font and margins. Each triple must contain ONLY THREE elements separated by a | sign, four and more are not allowed!'''},
          {
            "type": "image_url",
            "image_url": {
              "url": url,
            },
          },
        ],
      }
    ],
    max_tokens=900,
  )
    content = response.choices[0].message.content
    return content

In [17]:
path = 'C:\\Users\\User\\Desktop\\Data\\SCAI\\'
relevance_GPT = pd.read_excel(path+"FINAL_RELEVANT_URLs.xlsx")
relevance_GPT.drop(['Unnamed: 0', 'Relevance_manual'], axis=1, inplace=True)
relevance_GPT

Unnamed: 0,URL
0,https://media.springernature.com/lw685/springe...
1,https://media.springernature.com/lw1200/spring...
2,https://www.eurekaselect.com/images/graphical-...
3,https://media.springernature.com/lw685/springe...
4,https://qeios-uploads.s3.eu-west-1.amazonaws.c...
...,...
284,https://f6publishing.blob.core.windows.net/728...
285,https://europepmc.org/articles/PMC9922164/bin/...
286,https://www.mdpi.com/cells/cells-12-00816/arti...
287,https://media.springernature.com/lw685/springe...


In [18]:
def triples_extraction_from_urls(path_URLs):

    client = gpt_authenticate(API_key)
    relevance_GPT = pd.read_excel(path_URLs)

    # Initialize an empty list to store parsed data
    parsed_data = []

    # Loop through each row in the dataframe
    for idx, row in (relevance_GPT.head(5)).iterrows():
        try:
            url = row["URL"]  # Extract image URL

            # Extract content from the image using GPT
            content = gpt_extract(client, url)
            print(url, content)

            # Parse the text for mechanisms and triples
            mechanisms = content.strip().split('Pathophysiology Process: ')

            for mechanism_block in mechanisms[1:]:
                lines = mechanism_block.strip().split('\n')
                mechanism_name = lines[0].strip()
                triples = lines[2:]  # Skip the 'Triples:' line

                for triple in triples:
                    subject, action, obj = triple.strip().split('|')
                    parsed_data.append([url, mechanism_name, subject, action, obj])

        except Exception as e:
            # Print the error and continue to the next row
            print(f"Error processing {url}: {e}")
            continue  # Skip to the next row in case of an error

    # Create a new DataFrame to store parsed results
    parsed_df = pd.DataFrame(parsed_data, columns=['URL', 'Pathophysiological Process', 'Subject', 'Predicate', 'Object'])

    # Save dataframe
    parsed_df.to_csv('Triples_Final.csv')
    parsed_df.to_excel(r"Triples_Final.xlsx", index=False)
    print('Triples_Final file is successfully saved as csv. and xlsx.')

https://media.springernature.com/lw685/springer-static/image/art%3A10.1007%2Fs12640-020-00219-8/MediaObjects/12640_2020_219_Fig1_HTML.png Pathophysiology Process: Viral_Entry_via_Olfactory_Neurons  
Triples:  
SARS-CoV-2|enters|olfactory_neurons  
Olfactory_neurons|transmit|virus_to_brain  
Virus|causes|cell_damage  

Pathophysiology Process: Blood-Brain_Barrier_Disruption  
Triples:  
SARS-CoV-2|binds_to|ACE2  
Virus|disrupts|tight_junctions  
Disruption|leads_to|brain_edema  

Pathophysiology Process: Immune_Response_Activation  
Triples:  
Antigen|activates|B_cells  
B_cells|produce|antibodies  
Activated_T_cells|attack|neurons  

Pathophysiology Process: Stress_and_Psychiatric_Symptoms  
Triples:  
Stress|activates|HPA_axis  
HPA_axis|increases|glucocorticoid_release  
Increased_GC|induces|psychiatric_symptoms  
https://media.springernature.com/lw1200/springer-static/image/art%3A10.1007%2Fs11033-024-09279-x/MediaObjects/11033_2024_9279_Fig2_HTML.png Pathophysiology Process: Viral_E

https://media.springernature.com/lw1200/springer-static/image/art%3A10.1007%2Fs11033-024-09279-x/MediaObjects/11033_2024_9279_Fig5_HTML.png Pathophysiology Process: Cytokine_Storm  
Triples:  
SARS-CoV-2|induces|PICs_&_chemokines  
PICs_&_chemokines|lead_to|cytokine_storm  
Cytokine_storm|activates|free_calcium  
Cytokine_storm|disrupts|BBB  

Pathophysiology Process: Apoptosis_of_ECs  
Triples:  
Free_calcium|causes|apoptosis_of_ECs  
Apoptosis_of_ECs|leads_to|vascular_leakage_&_cerebral_edema  

Pathophysiology Process: Neuroinflammation  
Triples:  
SARS-CoV-2_&_cytokines|reach|CNS  
Microglia_&_IL-1_activation|causes|neuroinflammation  
Neuroinflammation|results_in|neural_tissue_damage  
https://media.springernature.com/lw1200/springer-static/image/art%3A10.1007%2Fs12038-021-00165-3/MediaObjects/12038_2021_165_Figa_HTML.png Pathophysiology Process: Virus_Entry_Through_Receptors  
Triples:  
SARS-CoV-2|binds|ACE2_receptor

Pathophysiology Process: Neuronal_Hijacking  
Triples:  
SAR

https://media.springernature.com/lw1200/springer-static/image/art%3A10.1007%2Fs10072-020-04823-6/MediaObjects/10072_2020_4823_Fig1_HTML.png Pathophysiology Process: Leakage_and_Vascular_Permeability

Triples:
SARS-CoV-2_infection|causes|leakage_in_Blood_Brain_Barrier
leakage_in_Blood_Brain_Barrier|impacts|neurons_astrocytes_and_microglia

Pathophysiology Process: Dysfunctional_Immune_Response

Triples:
neurons_astrocytes_and_microglia|trigger|dysfunctional_immune_response
dysfunctional_immune_response|leads_to|infiltration_of_monocytes_macrophages_T-cells

Pathophysiology Process: Cytokine_Storm

Triples:
infiltration_of_monocytes_macrophages_T-cells|induces|cytokine_storm
cytokine_storm|produces|IL-6_IL-1β_IL-2_TNF-α

Pathophysiology Process: Neuro-inflammation_and_Oxidative_Stress

Triples:
cytokine_storm|causes|neuro-inflammation_and_oxidative_stress
neuro-inflammation_and_oxidative_stress|results_in|neurodegeneration_and_neurological_symptoms
Error processing https://media.springer

https://media.springernature.com/lw685/springer-static/image/art%3A10.1007%2Fs44192-023-00036-3/MediaObjects/44192_2023_36_Fig1_HTML.png Pathophysiology Process: Blood-Brain_Barrier_Disruption  
Triples:  
SARS-CoV-2_infection|disrupts|blood-brain_barrier  

Pathophysiology Process: Neuroinflammation  
Triples:  
SARS-CoV-2_infection|induces|neuroinflammation  

Pathophysiology Process: Astrocyte_Activation  
Triples:  
SARS-CoV-2_infection|triggers|astrocyte_activation  

Pathophysiology Process: Neuronal_Damage  
Triples:  
Coronavirus|causes|neuronal_damage  

Pathophysiology Process: Microglial_Activation  
Triples:  
SARS-CoV-2_infection|activates|microglia
https://media.springernature.com/lw685/springer-static/image/art%3A10.1038%2Fs41467-022-34068-0/MediaObjects/41467_2022_34068_Fig9_HTML.png Pathophysiology Process: Astroglia_Activation  
Triples:  
SARS-CoV-2_infection|triggers|astroglia_activation  

Pathophysiology Process: Microgliosis  
Triples:  
SARS-CoV-2_infection|indu

https://media.springernature.com/lw685/springer-static/image/art%3A10.1186%2Fs41983-023-00705-8/MediaObjects/41983_2023_705_Fig1_HTML.png Pathophysiology Process: Viral_Entry

Triples:
SARS-CoV-2|binds_to|ACE2_Receptor

Pathophysiology Process: Blood-Brain_Barrier_Disruption

Triples:
SARS-CoV-2|disrupts|Tight_Junction

Pathophysiology Process: Neuroinflammation

Triples:
SARS-CoV-2|activates|Astrocytes
SARS-CoV-2|activates|Microglial_cells
Error processing https://media.springernature.com/lw685/springer-static/image/art%3A10.1186%2Fs41983-023-00705-8/MediaObjects/41983_2023_705_Fig1_HTML.png: not enough values to unpack (expected 3, got 1)
Error processing https://europepmc.org/articles/PMC9528075/bin/gr3_lrg.jpg: Error code: 400 - {'error': {'message': 'Timeout while downloading https://europepmc.org/articles/PMC9528075/bin/gr3_lrg.jpg.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_image_url'}}
https://media.springernature.com/lw1200/springer-static/image/art%3A1

https://ve.scielo.org/img/revistas/ic/v64n1//0535-5133-ic-64-01-108-gf1.jpg Pathophysiology Process: SARS-CoV-2_and_ACE2  
Triples:  
SARS-CoV-2|binds|ACE2_receptor  
ACE2_receptor|activates|viral_entry  
TMPRSS2|facilitates|viral_entry   

Pathophysiology Process: Blood_Brain_Barrier  
Triples:  
SARS-CoV-2|disrupts|blood_brain_barrier  
Disrupted_barrier|allows|virus_entry  
Virus_entry|triggers|neuroinflammation  

Pathophysiology Process: Immunology_Pathways  
Triples:  
SARS-CoV-2|activates|cytokine_release  
Cytokine_release|induces|inflammation  
Inflammation|alters|BBB_permeability  

Pathophysiology Process: Olfactory-Neural_Transport  
Triples:  
SARS-CoV-2|enters|olfactory_epithelium  
Olfactory_epithelium|provides|virus_pathway  
Virus_pathway|reaches|brain
https://media.springernature.com/full/springer-static/image/art%3A10.1038%2Fs41392-022-01072-1/MediaObjects/41392_2022_1072_Fig1_HTML.png Pathophysiology Process: Brain_Volume_Change  
Triples:  
SARS-CoV-2_infection|red

https://www.frontiersin.org/files/Articles/657004/fneur-12-657004-HTML/image_m/fneur-12-657004-g001.jpg Pathophysiology Process: Neuroinvasion_via_cranial_nerves  
Triples:  
Neuroinvasion_via_cranial_nerves|induces|altered_tryptophan_metabolism  

Pathophysiology Process: Elevation_in_peripheral_cytokines  
Triples:  
Poor_diet|causes|elevation_in_peripheral_cytokines  

Pathophysiology Process: Elevation_in_CNS_cytokines  
Triples:  
Elevation_in_peripheral_cytokines|leads_to|elevation_in_CNS_cytokines  

Pathophysiology Process: Microglial_activation  
Triples:  
Elevation_in_CNS_cytokines|results_in|microglial_activation  

Pathophysiology Process: Neuroplasticity/neurogenesis_defects  
Triples:  
Microglial_activation|causes|neuroplasticity/neurogenesis_defects  

Pathophysiology Process: Sedentary_behavior  
Triples:  
Sedentary_behavior|increases|elevation_in_peripheral_cytokines  

Pathophysiology Process: Psychological_stress  
Triples:  
Psychological_stress|elevates|elevatio

https://media.springernature.com/lw1200/springer-static/image/art%3A10.1007%2Fs12640-022-00540-4/MediaObjects/12640_2022_540_Fig1_HTML.png Pathophysiology Process: Immune_Responses  
Triples:  
SARS-CoV-2_N-protein|interacts_with|α-synuclein  
Interferon_therapy|reduces|comorbidities  

Pathophysiology Process: ACE2_Decarboxylase_Co-localization  
Triples:  
ACE2_receptors|co-localize_with|DDC  
ACE2_and_DDC|co-regulate|in_non-neuronal_cells  

Pathophysiology Process: Loss_of_Smell  
Triples:  
Loss_of_smell|is|non-motor_symptom  
Loss_of_smell|occurs_in|COVID-19  

Pathophysiology Process: Anxiety_and_Depression  
Triples:  
COVID-19|causes|isolation  
Isolation|leads_to|anxiety  
Anxiety|leads_to|depression  

Pathophysiology Process: Gut-Brain_Axis_Interaction  
Triples:  
α-synuclein|interacts_with|coronavirus  
α-synuclein|spreads_from|gut_to_brain  
Vagal_nerve|mediates|α-synuclein_spread  
https://www.mdpi.com/pharmaceuticals/pharmaceuticals-17-00788/article_deploy/html/images/

https://media.springernature.com/lw1200/springer-static/image/art%3A10.1007%2Fs00134-021-06439-6/MediaObjects/134_2021_6439_Fig1_HTML.png Pathophysiology Process: Neuroinflammation  
Triples:  
SARS-CoV-2|induces|inflammatory_response

Pathophysiology Process: Direct_neuroinvasion  
Triples:  
SARS-CoV-2|invades|olfactory_nerve  
SARS-CoV-2|invades|vagus_nerve

Pathophysiology Process: Neurotropism  
Triples:  
SARS-CoV-2|exhibits|neurotropism

Pathophysiology Process: Autoimmune_disorders  
Triples:  
SARS-CoV-2|triggers|autoimmune_reactions
https://media.springernature.com/lw685/springer-static/image/art%3A10.1007%2Fs12035-020-02149-0/MediaObjects/12035_2020_2149_Fig3_HTML.png Pathophysiology Process: Retrograde_Invasion  
Triples:  
SARS-CoV-2|invades|olfactory_bulb

Pathophysiology Process: Endocytosis  
Triples:  
SARS-CoV-2|enters|ACE2_receptors

Pathophysiology Process: Blood_Brain_Barrier_Disruption  
Triples:  
SARS-CoV-2|affects|endothelial_cells

Pathophysiology Process: Ren

https://www.frontiersin.org/files/Articles/1039427/fimmu-13-1039427-HTML/image_m/fimmu-13-1039427-g001.jpg Pathophysiology Process: Type_I-IFN_dysregulated_production  
Triples:  
SARS-CoV-2_infection|causes|Type_I-IFN_dysregulated_production  

Pathophysiology Process: Inflammatory_response  
Triples:  
Type_I-IFN_dysregulated_production|induces|Inflammatory_response  
Inflammatory_response|increases|IL-1α_IL-1β_IL-2_IL-8_IL_IL-18  
Inflammatory_response|increases|CXCL10_CXCL12_CCL1_CCL2  
Inflammatory_response|increases|GM-CSF_y_BAFF  

Pathophysiology Process: Neuro-long_COVID  
Triples:  
Inflammatory_response|leads_to|Neuro-long_COVID  

Pathophysiology Process: BBB_disruption  
Triples:  
Neuro-long_COVID|causes|BBB_disruption  
BBB_disruption|releases|Autoantigen  
BBB_disruption|increases|ROS  
BBB_disruption|activates|Microglia_cell  

Pathophysiology Process: Antibodies_generation_dysregulation  
Triples:  
BBB_disruption|leads_to|Antibodies_generation_dysregulation  
Antibod

https://content.iospress.com/media/jad/2022/88-2/jad-88-2-jad220105/jad-88-jad220105-g002.jpg Pathophysiology Process: Direct_viral_encephalitis  
Triples:  
SARS-CoV-2|causes|direct_viral_encephalitis  

Pathophysiology Process: Systemic_inflammation  
Triples:  
SARS-CoV-2|triggers|cytokine_storm  

Pathophysiology Process: Peripheral_organ_dysfunction  
Triples:  
SARS-CoV-2|leads_to|liver_kidney_lung_dysfunction  

Pathophysiology Process: Cerebrovascular_changes  
Triples:  
SARS-CoV-2|induces|cerebrovascular_changes  

Pathophysiology Process: Combination_of_changes  
Triples:  
SARS-CoV-2|results_in|combination_of_changes
https://europepmc.org/articles/PMC9453010/bin/OMCL2022-3012778.004.jpg Pathophysiology Process: Hypothalamic_Pituitary_Adrenal_Axis_Dysregulation  
Triples:  
COVID-19|induces|HPA_axis_dysregulation  
Lockdown_psychosocial_stress|affects|HPA_axis  

Pathophysiology Process: Glial_Cell_Activation  
Triples:  
COVID-19|induces|glial_cell_activation  

Pathophysio

https://earimediaprodweb.azurewebsites.net/Api/v1/Multimedia/85512ef4-c7e7-4c64-b4a7-2a611cde0abf/Rendition/low-res/Content/Public Pathophysiology Process: Vascular_Damage  
Triples:  
COVID-19|causes|vascular_damage  
Vascular_damage|leads_to|microglial_activation  
Vascular_damage|leads_to|T_cell_infiltration  
Vascular_damage|leads_to|alpha-synuclein_upregulation  

Pathophysiology Process: Systemic_Inflammation  
Triples:  
COVID-19|induces|systemic_inflammation  
Systemic_inflammation|leads_to|chronic_neuroinflammation  
Systemic_inflammation|leads_to|T_cell_infiltration  

Pathophysiology Process: Neuroinvasion  
Triples:  
COVID-19|results_in|neuroinvasion  
Neuroinvasion|leads_to|alpha-synuclein_aggregates  
Neuroinvasion|leads_to|neurodegeneration
https://pub.mdpi-res.com/biomolecules/biomolecules-13-01585/article_deploy/html/images/biomolecules-13-01585-ag.png?1698401169 Pathophysiology Process: Cytotoxicity
Triples:
SARS-CoV-2_infection|causes|cytotoxicity

Pathophysiology P

https://www.mdpi.com/cells/cells-12-00688/article_deploy/html/images/cells-12-00688-g001.png Pathophysiology Process: Infiltration_of_immune_cells  
Triples:  
SARS-CoV-2_infection|causes|infiltration_of_immune_cells

Pathophysiology Process: Activation_of_microglia  
Triples:  
SARS-CoV-2_infection|triggers|activation_of_microglia

Pathophysiology Process: Activation_of_astrocytes_and_neurons  
Triples:  
SARS-CoV-2_infection|induces|activation_of_astrocytes_and_neurons

Pathophysiology Process: BBB_disruption_and_increased_permeability  
Triples:  
SARS-CoV-2_infection|leads_to|BBB_disruption_and_increased_permeability

Pathophysiology Process: Virus_entry_into_the_brain  
Triples:  
SARS-CoV-2_spike_protein|facilitates|virus_entry_into_the_brain

Pathophysiology Process: Elevated_levels_of_proinflammatory_cytokines_and_chemokines  
Triples:  
SARS-CoV-2_infection|raises|levels_of_proinflammatory_cytokines_and_chemokines

Pathophysiology Process: Damage_of_blood_vessels  
Triples:  


https://www.frontiersin.org/files/Articles/867825/fnins-16-867825-HTML-r1/image_m/fnins-16-867825-g003.jpg Pathophysiology Process: Oxidative_stress  
Triples:  
Environmental_factors|increase|Oxidative_stress  
Oxidative_stress|affects|Age  
SARS-CoV-2_infection|induces|Oxidative_stress  
Oxidative_stress|contributes_to|Protein_aggregation  

Pathophysiology Process: Neuroinflammation  
Triples:  
Environmental_factors|trigger|Neuroinflammation  
Neuroinflammation|modulates|Age  
SARS-CoV-2_infection|causes|Neuroinflammation  
Neuroinflammation|leads_to|Protein_aggregation  

Pathophysiology Process: Protein_aggregation  
Triples:  
Oxidative_stress|promotes|Protein_aggregation  
Neuroinflammation|enhances|Protein_aggregation  
SARS-CoV-2_infection|results_in|Protein_aggregation  
Protein_aggregation|produces|p-Tau  
Protein_aggregation|produces|β-amyloid  
Protein_aggregation|produces|α-synuclein  
Protein_aggregation|causes|Neuronal_dysfunction_and_injury  

Pathophysiology Process:

https://europepmc.org/articles/PMC9877664/bin/JNC-9999-0-g002.jpg Pathophysiology Process: ACE2_Binding  
Triples:  
SARS-CoV-2|binds|ACE2_receptor  

Pathophysiology Process: Viral_Internalization  
Triples:  
SARS-CoV-2|undergoes|internalization  

Pathophysiology Process: Viral_Replication  
Triples:  
Viral_RNA|undergoes|replication  

Pathophysiology Process: Exosome_Formation  
Triples:  
Structural_proteins|form|exosomes  

Pathophysiology Process: Immune_Response  
Triples:  
Dendritic_cells|present|antigen  

Pathophysiology Process: CD8_T_Cell_Activation  
Triples:  
Dendritic_cells|activate|CD8_T_cells  

Pathophysiology Process: Microglial_Activation  
Triples:  
SARS-CoV-2|activates|microglia  

Pathophysiology Process: Reactive_Astrocytosis  
Triples:  
SARS-CoV-2|triggers|reactive_astrocytosis  

Pathophysiology Process: Fibril_Formation  
Triples:  
SARS-CoV-2|causes|accelerated_fibril_formation  

Pathophysiology Process: Immune_Cell_Recruitment  
Triples:  
Astrocytes

https://www.frontiersin.org/files/Articles/1406874/fmicb-15-1406874-HTML/image_m/fmicb-15-1406874-g001.jpg Pathophysiology Process: Brain_Lung_Crosstalk  
Triples:  
Oxidative_stress|induces|alveolar_barrier_impairment  
Alveolar_barrier_impairment|induces|impaired_gas_exchange  
Impaired_gas_exchange|leads_to|Hypoxia  

Pathophysiology Process: Blood_Brain_Barrier_Disruption  
Triples:  
SARS-CoV-2|crosses|Blood_Brain_Barrier  
Blood_Brain_Barrier|undergoes|damage  
Damage_to_BBB|allows|infiltration_of_neutrophils_and_leukocytes  

Pathophysiology Process: Cytokine_Storm  
Triples:  
SARS-CoV-2_infection|triggers|COVID-19-induced_cytokine_storm  
Cytokine_storm|activates|Reactive_astrocytes  
Cytokine_storm|activates|Activated_microglia  

Pathophysiology Process: Neuroinflammation  
Triples:  
Reactive_astrocytes|release|proinflammatory_cytokines  
Activated_microglia|release|proinflammatory_cytokines  
Proinflammatory_cytokines|include|TNFα_iNOS_IL-1β_IL-6_IL-12_IL-23  
Chemokines|i

https://www.mdpi.com/brainsci/brainsci-12-00190/article_deploy/html/images/brainsci-12-00190-g001.png 1. Pathophysiology Process: Nasal_Entry  
   Triples:  
   SARS-CoV-2|enters|Nasal_submucosa  
   Nasal_submucosa|leads_to|Olfactory_bulb  

2. Pathophysiology Process: BBB_Disruption  
   Triples:  
   SARS-CoV-2|causes|BBB_disruption  
   BBB_disruption|allows|entry_into_brain  

3. Pathophysiology Process: B-CSF_Barrier_Disruption  
   Triples:  
   SARS-CoV-2|causes|B-CSF_barrier_disruption  
   B-CSF_barrier_disruption|allows|entry_into_brain  

4. Pathophysiology Process: Endothelial_Cell_Activation  
   Triples:  
   SARS-CoV-2|activates|ACE-2_and_other_entry-associated_proteases  
   Endothelial_cell|expresses|ACE-2_and_other_entry-associated_proteases  

5. Pathophysiology Process: Cytokine_Production  
   Triples:  
   Endothelial_cell|induces|Cytokine_production  
   Cytokine_production|impacts|Neuron  

6. Pathophysiology Process: Neuron_Infection_and_Replication  
   Tripl

https://aini.it/wp-content/uploads/2020/10/41398_2020_949_Fig1_HTML.jpg Pathophysiology Process: Glial_Cells_Activation  
Triples:  
COVID-19|induces|glial_cells_activation  

Pathophysiology Process: HPA_Axis_Dysfunction  
Triples:  
COVID-19|affects|HPA  

Pathophysiology Process: Inflammatory_Cytokine_Release  
Triples:  
COVID-19|increases|inflammatory_cytokines  

Pathophysiology Process: Peripheral_Inflammation  
Triples:  
COVID-19|causes|peripheral_inflammation  

Pathophysiology Process: Impact_on_Circumventricular_Organs  
Triples:  
COVID-19|targets|circumventricular_organs  

Pathophysiology Process: Psychosocial_Stress  
Triples:  
COVID-19|generates|psychosocial_stress
https://media.springernature.com/lw685/springer-static/image/art%3A10.1038%2Fs41440-021-00800-4/MediaObjects/41440_2021_800_Fig3_HTML.png Pathophysiology Process: Imbalance_of_ACE2

Triples:
SARS-CoV-2|reduces|ACE2_activity
Reduced_ACE2_activity|increases|Ang_II
Increased_Ang_II|disrupts|balance

Pathophysi

https://www.mdpi.com/brainsci/brainsci-11-00305/article_deploy/html/images/brainsci-11-00305-g001.png Pathophysiology Process: Neuroinflammation  
Triples:  
SARS-CoV-2_infection|induces|neuroinflammation  

Pathophysiology Process: Oxidative_Stress_Within_Brain  
Triples:  
SARS-CoV-2_infection|causes|oxidative_stress_within_brain  

Pathophysiology Process: Endothelial_Dysfunction  
Triples:  
SARS-CoV-2_infection|leads_to|endothelial_dysfunction  

Pathophysiology Process: Loss_of_Pericytes  
Triples:  
SARS-CoV-2_infection|results_in|loss_of_pericytes  

Pathophysiology Process: Ischemic_White_Matter_Damage  
Triples:  
SARS-CoV-2_infection|contributes_to|ischemic_white_matter_damage  

Pathophysiology Process: Blood_Brain_Barrier_Damage  
Triples:  
SARS-CoV-2_infection|disrupts|blood_brain_barrier_damage  

Pathophysiology Process: Cerebral_Hypoperfusion  
Triples:  
SARS-CoV-2_infection|causes|cerebral_hypoperfusion  

Pathophysiology Process: Cerebral_Accumulation_of_Aβ  
Tripl

https://cdnintech.com/media/chapter/84633/1512345123/media/F1.png Pathophysiology Process: Olfactory_Route   
Triples:  
SARS-CoV-2|enters|olfactory_epithelium

Pathophysiology Process: Blood_Brain_Barrier   
Triples:  
Astrocyte|is_affected_by|SARS-CoV-2  
Infected_leukocyte|crosses|blood_brain_barrier  
Microglia|responds_to|infection

Pathophysiology Process: Ocular_Entry   
Triples:  
Spike_protein|binds_to|CD147  
Virus|enters_through|tear_film

Pathophysiology Process: Choroid_Plexus   
Triples:  
BCSFB|is_affected_by|SARS-CoV-2

Pathophysiology Process: Lungs_to_Brain   
Triples:  
Virus|travels_from|lungs_to_brain  
Mucosal_epithelium|is_infiltrated_by|SARS-CoV-2

Pathophysiology Process: Gastric_Route   
Triples:  
Leukocyte|carries|virus_via_vagus_nerve  
Cytokines|are_released_by|enterocytes

Pathophysiology Process: BBB_Leakage   
Triples:  
BBB|experiences|leakage

Pathophysiology Process: Astroglial_Degeneration   
Triples:  
Astrogliosis|leads_to|degeneration  
Cytokines

https://media.springernature.com/lw685/springer-static/image/art%3A10.1007%2Fs13577-023-00988-2/MediaObjects/13577_2023_988_Fig1_HTML.png Pathophysiology Process: Alpha-Synuclein_Aggregation  
Triples:  
Soluble_alpha-Synuclein|docks_to|RBD  
Docking_of_alpha-Synuclein|leads_to|Aggregated_alpha-Synuclein
https://media.springernature.com/lw1200/springer-static/image/art%3A10.1007%2Fs11302-024-09998-7/MediaObjects/11302_2024_9998_Fig1_HTML.png Pathophysiology Process: Viral_Invasion
Triples:
SARS-CoV-2|enters|airways

Pathophysiology Process: Brain_Entry
Triples:
Virus|reaches|Substantia_Nigra

Pathophysiology Process: Neuronal_Inflammation
Triples:
Virus|triggers|neuronal_inflammation

Pathophysiology Process: Dopaminergic_Neuron_Inflammation
Triples:
Inflammation|causes|infamed_dopaminergic_neuron

Pathophysiology Process: Mitochondrial_Damage
Triples:
Inflammation|causes|mitochondrial_damage

Pathophysiology Process: ROS_Production
Triples:
Mitochondrial_damage|produces|excessive_ROS


https://media.springernature.com/lw685/springer-static/image/art%3A10.1007%2Fs00415-022-11237-1/MediaObjects/415_2022_11237_Fig5_HTML.png Pathophysiology Process: Increased_Pro-inflammatory_Cytokines  
Triples:  
SARS-CoV-2|increases|expression_of_proinflammatory_mediators  
Increased_expression_of_proinflammatory_mediators|triggers|increased_pro-inflammatory_cytokines  

Pathophysiology Process: BBB_Permeability  
Triples:  
SARS-CoV-2|causes|further_dysfunction_of_the_BBB  
Further_dysfunction_of_the_BBB|leads_to|increased_permeability_of_the_BBB_by_MMP-9  

Pathophysiology Process: Demyelination  
Triples:  
Increased_migration_of_immune_cells|leads_to|increased_demyelination  
Demyelination|causes|cognitive_and_motor_impairment  
https://www.frontiersin.org/files/Articles/653694/fnagi-13-653694-HTML-r1/image_m/fnagi-13-653694-g009.jpg Pathophysiology Process: Cytokine_Storm  
Triples:  
Lung_Infection|leads_to|Cytokine_Storm  
Cytokine_Storm|increases|IL-1_IL-6_TNF-α_IFN-γ  

Patho

https://media.springernature.com/lw685/springer-static/image/art%3A10.1186%2Fs42466-021-00138-9/MediaObjects/42466_2021_138_Fig2_HTML.png Pathophysiology Process: Direct_invasion_and_damage  
Triples:  
SARS-CoV-2_infection|affects|central_nervous_system  
SARS-CoV-2_infection|affects|peripheral_nervous_system  
SARS-CoV-2_infection|affects|muscle_tissue  

Pathophysiology Process: Indirect_vascular_effects  
Triples:  
SARS-CoV-2_infection|causes|endothelial_dysfunction  
SARS-CoV-2_infection|causes|activation_of_coagulation  
SARS-CoV-2_infection|causes|thrombotic_microangiopathy  

Pathophysiology Process: Indirect_para-infectious_autoimmune_effects  
Triples:  
SARS-CoV-2_infection|induces|cytokine_storm  

Pathophysiology Process: Indirect_post-infectious_autoimmune_effects  
Triples:  
SARS-CoV-2_infection|induces|cellular_immunity  
SARS-CoV-2_infection|produces|autoantibodies  
https://dfzljdn9uc3pi.cloudfront.net/2022/14227/1/fig-3-full.png Pathophysiology Process: Transmucosa

https://www.mdpi.com/jcm/jcm-10-01947/article_deploy/html/images/jcm-10-01947-g001-550.jpg Pathophysiology Process: Loss_of_taste_and_smell  
Triples:  
SARS-CoV-2_infection|causes|loss_of_taste_and_smell  

Pathophysiology Process: Cephalgia  
Triples:  
SARS-CoV-2_infection|leads_to|cephalgia  

Pathophysiology Process: Dizziness  
Triples:  
SARS-CoV-2_infection|induces|dizziness  

Pathophysiology Process: Neuropsychiatric_disorders  
Triples:  
SARS-CoV-2_infection|triggers|neuropsychiatric_disorders  

Pathophysiology Process: Ischemic_stroke  
Triples:  
SARS-CoV-2_infection|results_in|ischemic_stroke  

Pathophysiology Process: Encephalitis  
Triples:  
SARS-CoV-2_infection|causes|encephalitis  
https://media.springernature.com/lw1200/springer-static/image/art%3A10.1007%2Fs00415-020-09974-2/MediaObjects/415_2020_9974_Fig3_HTML.png Pathophysiology Process: Neural_Retrograde_Invasion  
Triples:  
SARS-CoV-2|binds_to|ACE2  
SARS-CoV-2|invades|Olfactory_E.  
SARS-CoV-2|travels_to|O

https://media.springernature.com/lw1200/springer-static/image/art%3A10.1038%2Fs41392-021-00818-7/MediaObjects/41392_2021_818_Fig3_HTML.png Pathophysiology Process: Altered_Tight_Junctions  
Triples:  
SARS-CoV-2|alters|tight_junctions  

Pathophysiology Process: Increased_Permeability  
Triples:  
Altered_tight_junctions|cause|increased_permeability  

Pathophysiology Process: CNS_Complication  
Triples:  
Increased_permeability|leads_to|CNS_complication  

Pathophysiology Process: Viral_Entry  
Triples:  
SARS-CoV-2|enters_through|NRP1_and_other_possible_receptors  

Pathophysiology Process: Activation_by_TMPRSS2  
Triples:  
SARS-CoV-2|is_activated_by|TMPRSS2  

Pathophysiology Process: Membrane_Fusion_and_RNA_Release  
Triples:  
SARS-CoV-2|causes|membrane_fusion_and_viral_RNA_release  

Pathophysiology Process: Polypeptide_Synthesis  
Triples:  
Viral_RNA|undergoes|translation_to_polypeptides  

Pathophysiology Process: RNA_Dependent_RNA_Polymerase_Activity  
Triples:  
RdRp|synthe

https://media.springernature.com/full/springer-static/image/art%3A10.1038%2Fs41419-023-06102-6/MediaObjects/41419_2023_6102_Fig1_HTML.png Pathophysiology Process: BBB_damage  
Triples:  
SARS-CoV-2|binds_to|ACE2  
BBB|experiences|MMP9_increase  
BBB|experiences|tight_junction_disruption  

Pathophysiology Process: Inflammatory_molecule_production  
Triples:  
M1_microglia|produce|inflammatory_molecules  
Astrocyte|produce|inflammatory_molecules  

Pathophysiology Process: Microglia_activation  
Triples:  
SARS-CoV-2|activates|M1_microglia  
SARS-CoV-2|activates|M2_microglia  

Pathophysiology Process: Neuron_damage  
Triples:  
SARS-CoV-2|causes|mitochondrial_dysfunction  
Mitochondrial_dysfunction|leads_to|neuron_damage
https://uknow.uky.edu/sites/default/files/styles/uknow_story_image/public/COVID%20AD%20.jpg Pathophysiology Process: Neuroinflammation  
Triples:  
SARS-CoV-2_infection|induces|neuroinflammation  

Pathophysiology Process: Cytokine_Complements  
Triples:  
SARS-CoV-2_i

https://www.frontiersin.org/files/Articles/564882/fpsyt-11-00860-HTML/image_m/fpsyt-11-00860-g002.jpg Pathophysiology Process: Neuro-inflammation  
Triples:  
SARS-CoV-2_infection|results_in|cytokine_storm

Pathophysiology Process: Dysregulated_blood_pressure  
Triples:  
SARS-CoV-2_infection|disrupts|Renin-Angiotensin_System

Pathophysiology Process: Hypoxia  
Triples:  
Metabolic_changes|cause|hypoxia

Pathophysiology Process: Thrombotic_complications  
Triples:  
SARS-CoV-2_infection|leads_to|coagulation_disorders

Pathophysiology Process: Alleged_neurotropic_pathways_of_SARS-COV-2  
Triples:  
SARS-CoV-2|enters|olfactory_bulb  
SARS-CoV-2|travels_through|blood_circulation  
SARS-CoV-2|enters|lymphatic_system  
SARS-CoV-2|enters|cerebrospinal_fluid
https://media.springernature.com/lw685/springer-static/image/art%3A10.1007%2Fs10787-021-00806-x/MediaObjects/10787_2021_806_Fig4_HTML.png Pathophysiology Process: Astrocyte_Activation  
Triples:  
SARS-CoV-2_infection|triggers|astrocyte_a

https://media.springernature.com/lw1200/springer-static/image/art%3A10.1007%2Fs10072-021-05505-7/MediaObjects/10072_2021_5505_Fig3_HTML.png Pathophysiology Process: Hematogeneous_route  
Triples:  
Coronavirus|crosses|BBB  
Coronavirus|enters|CNS  
Coronavirus|impacts|Endothelial_cells  
Endothelial_cells|are|capillary

Pathophysiology Process: Neurotropism  
Triples:  
Coronavirus|reaches|Olfactory_nerve  
Coronavirus|crosses|Cribriform_plate  
Coronavirus|travels|axon  
Kinesin|transports|Coronavirus  
Neurotubule|contains|Coronavirus
https://media.springernature.com/lw685/springer-static/image/art%3A10.1186%2Fs12979-023-00341-z/MediaObjects/12979_2023_341_Fig2_HTML.png Pathophysiology Process: Microglia_Activation  
Triples:  
Aging|activates|microglia  
SARS-CoV-2_infection|activates|microglia  
Activated_microglia|produces|IL-1β_IL-6_TNF-α  
Activated_microglia|causes|neuronal_death  

Pathophysiology Process: Astrocyte_Activation  
Triples:  
SARS-CoV-2_infection|activates|astroc

https://media.springernature.com/lw685/springer-static/image/art%3A10.1038%2Fs41380-023-02161-5/MediaObjects/41380_2023_2161_Fig2_HTML.png Pathophysiology Process: EBV_Reactivation  
Triples:  
SARS-CoV-2_infection|causes|EBV_Reactivation  
EBV_Reactivation|stimulates|lytic_EBV_infected_B_cell  
lytic_EBV_infected_B_cell|produces|XBP-1  

Pathophysiology Process: B_Cell_Transformation  
Triples:  
EBV|infects|resting_B_cell  
resting_B_cell|transforms_to|EBV_infected_B_cell_clast  

Pathophysiology Process: Antigen_Expression  
Triples:  
EBV_infected_B_cell_clast|expresses|LMP-1  
EBV_infected_B_cell_clast|expresses|LMP-2  

Pathophysiology Process: Immune_Response  
Triples:  
Cytotoxic_T_cell|targets|EBV_infected_B_cell_clast  
NK_cell|targets|EBV_infected_B_cell_clast  

Pathophysiology Process: Long_COVID_Symptoms  
Triples:  
EBV_Reactivation|contributes_to|Fatigue  
EBV_Reactivation|contributes_to|Depression  
EBV_Reactivation|contributes_to|Insomnia  
https://media.springernatu

https://pub.mdpi-res.com/ijms/ijms-25-07169/article_deploy/html/images/ijms-25-07169-ag.png?1720701703 Pathophysiology Process: SG_assembly  
Triples:  
NCAP|induces|SG_assembly  

Pathophysiology Process: Recruitment_of_Tau  
Triples:  
SG|causes|recruitment_of_Tau  

Pathophysiology Process: Post-translational_modifications_of_Tau  
Triples:  
Tau|undergoes|post-translational_modifications  

Pathophysiology Process: Pathological_aggregation  
Triples:  
Tau|forms|pathological_aggregation  

Pathophysiology Process: SG_disassembly  
Triples:  
SUMO2|triggers|SG_disassembly
https://media.springernature.com/lw1200/springer-static/image/art%3A10.1007%2Fs13337-022-00793-9/MediaObjects/13337_2022_793_Fig4_HTML.png Pathophysiology Process: Cardiovascular_System

Triples:
SARS-CoV-2_infection|causes|arrhythmias
SARS-CoV-2_infection|leads_to|chest_pain
SARS-CoV-2_infection|results_in|thromboembolism

Pathophysiology Process: Nervous_System

Triples:
SARS-CoV-2_infection|causes|loss_of_smell_

https://www.mdpi.com/ijms/ijms-25-08715/article_deploy/html/images/ijms-25-08715-g003-550.jpg Pathophysiology Process: Endothelial_Cell_Infection  
Triples:  
SARS-CoV-2|infects|endothelial_cell  

Pathophysiology Process: ACE2_Receptor_Binding  
Triples:  
SARS-CoV-2|increases|ACE2_receptor_binding  

Pathophysiology Process: Neuroinflammation  
Triples:  
NLRP3_inflammasome|activates|neuroinflammation  

Pathophysiology Process: Microglia_Activation  
Triples:  
SARS-CoV-2_spikes|increase|Aβ_plaques  

Pathophysiology Process: Oxidative_Stress  
Triples:  
Oxidative_stress|leads_to|cell_damage  

Pathophysiology Process: Apoptosis  
Triples:  
SARS-CoV-2|induces|apoptosis  

Pathophysiology Process: Cytokine_Production  
Triples:  
IL-6_IL-1β_TNFα|increase|inflammation  

Pathophysiology Process: Neurodegeneration  
Triples:  
Neuroinflammation|leads_to|neurodegeneration
https://media.springernature.com/lw1200/springer-static/image/art%3A10.1007%2Fs12035-021-02318-9/MediaObjects/1203

Unnamed: 0,URL,Pathophysiology Process,Subject,Predicate,Object
0,https://media.springernature.com/lw685/springe...,Viral_Entry_via_Olfactory_Neurons,SARS-CoV-2,enters,olfactory_neurons
1,https://media.springernature.com/lw685/springe...,Viral_Entry_via_Olfactory_Neurons,Olfactory_neurons,transmit,virus_to_brain
2,https://media.springernature.com/lw685/springe...,Viral_Entry_via_Olfactory_Neurons,Virus,causes,cell_damage
3,https://media.springernature.com/lw685/springe...,Blood-Brain_Barrier_Disruption,SARS-CoV-2,binds_to,ACE2
4,https://media.springernature.com/lw685/springe...,Blood-Brain_Barrier_Disruption,Virus,disrupts,tight_junctions
...,...,...,...,...,...
2671,https://www.aging-us.com/article/202136/figure...,Large_Accumulation_of_AngII,Large_accumulation_of_AngII,triggers,inflammatory_cascade
2672,https://www.aging-us.com/article/202136/figure...,Large_Accumulation_of_AngII,Large_accumulation_of_AngII,induces,oxidative_stress
2673,https://www.aging-us.com/article/202136/figure...,CD147,CD147,triggers,pro-inflammatory_factors
2674,https://www.aging-us.com/article/202136/figure...,CD147,CD147,affects,vascular_permeability


In [23]:
# Save dataframe

#parsed_df.to_csv('Triples_Final.csv')
#parsed_df.to_excel(r"Triples_Final.xlsx", index=False)

In [11]:
import pandas as pd

# Load the data from both Excel files
df1 = pd.read_excel('covid_neurodegeneration_triples.xlsx')
df2 = pd.read_excel('covid_neurodegeneration_triples_2run.xlsx')

# Step 1: Extract the relevant columns from both dataframes (Subject, Predicate, Object)
triples_df1 = df1[['Subject', 'Predicate', 'Object']]
triples_df2 = df2[['Subject', 'Predicate', 'Object']]

# Step 2: Remove duplicate triples in both dataframes
unique_triples_df1 = triples_df1.drop_duplicates()
unique_triples_df2 = triples_df2.drop_duplicates()

# Step 3: Merge the two dataframes to find common triples based on Subject, Predicate, and Object
common_triples = pd.merge(unique_triples_df1, unique_triples_df2, on=['Subject', 'Predicate', 'Object'])

# Step 4: Output the common triples
print("Common triples:")
print(common_triples)

# If you want to save the common triples to a file, uncomment the following line
# common_triples.to_excel('common_triples.xlsx', index=False)


Common triples:
                 Subject  Predicate                                    Object
0             SARS-CoV-2    infects                              neural_cells
1             SARS-CoV-2    infects                          epithelial_cells
2             SARS-CoV-2     causes                         epithelium_damage
3             SARS-CoV-2     causes                       respiratory_failure
4    Respiratory_failure   leads_to                                  ischemia
..                   ...        ...                                       ...
108             COVID-19    induces                            elevated_IL-1β
109           SARS-CoV-2    reaches                            olfactory_bulb
110           SARS-CoV-2      binds                  NRP1_and_other_receptors
111            Viral_RNA  undergoes  translation_of_viral_structural_proteins
112           SARS-CoV-2    infects                                  alveolus

[113 rows x 3 columns]


# Separation into categories

In [168]:
parsed_df

Unnamed: 0,URL,Pathophysiological Process,Subject,Predicate,Object,Group
0,https://media.springernature.com/lw685/springe...,Viral_Entry_via_Olfactory_Neurons,SARS-CoV-2,enters,olfactory_neurons,Viral Entry and Neuroinvasion
1,https://media.springernature.com/lw685/springe...,Viral_Entry_via_Olfactory_Neurons,Olfactory_neurons,transmit,virus_to_brain,Viral Entry and Neuroinvasion
2,https://media.springernature.com/lw685/springe...,Viral_Entry_via_Olfactory_Neurons,Virus,causes,cell_damage,Viral Entry and Neuroinvasion
3,https://media.springernature.com/lw685/springe...,Blood-Brain_Barrier_Disruption,SARS-CoV-2,binds_to,ACE2,Viral Entry and Neuroinvasion
4,https://media.springernature.com/lw685/springe...,Blood-Brain_Barrier_Disruption,Virus,disrupts,tight_junctions,Viral Entry and Neuroinvasion
...,...,...,...,...,...,...
2671,https://www.aging-us.com/article/202136/figure...,Large_Accumulation_of_AngII,Large_accumulation_of_AngII,triggers,inflammatory_cascade,Uncategorized
2672,https://www.aging-us.com/article/202136/figure...,Large_Accumulation_of_AngII,Large_accumulation_of_AngII,induces,oxidative_stress,Uncategorized
2673,https://www.aging-us.com/article/202136/figure...,CD147,CD147,triggers,pro-inflammatory_factors,Viral Entry and Neuroinvasion
2674,https://www.aging-us.com/article/202136/figure...,CD147,CD147,affects,vascular_permeability,Viral Entry and Neuroinvasion


In [203]:
print(len(set_t))
set_t = set(parsed_df['Pathophysiological Process'])

1086


In [204]:
set_t

{'Prion_Infectious_and_Damaging_Protein',
 'Direct_Viral_Entry',
 'Inflammatory_response',
 'Autoimmune_disorders',
 'Reduced_Dopamine_Connectivity',
 'Respiratory_Dysfunction',
 'Unhealthy_Microbes',
 'Direct_cellular_injury',
 'BBB_Dysfunction',
 'Cerebellum_Damage',
 'Endothelium_and_Pericyte_Infection_or_Damage',
 'Gray-white_matter_&_functional_connectivity_changes',
 'Viral_Infection',
 'SG_assembly',
 'Fibrin_Degradation',
 'Muscle_Fiber_Atrophy',
 'Gut-Brain_Axis_Interaction',
 'Muscle_Mitochondrial_Dysfunction',
 'Microcoagulation',
 'Astrocyte_Interaction',
 'Altered_Da_synthesis_metabolism_and_receptor_signaling',
 'Glial_cell_activation',
 'Debris_Clearance',
 'Neurodegenerative_Diseases_Development',
 'Major_Depressive_Disorder',
 'Nervous_Spreading',
 'Watershed_Areas_Infarction',
 'Neutrophils_and_NETs',
 'Neural_Retrograde_Invasion',
 'Trans-Infection',
 'Mitochondrial_Dysfunction',
 'BBB_Breakdown',
 'Synaptic_Alteration',
 'Indirect_vascular_effects',
 'Damage_to_Neur

In [208]:
categories_dict = {
    'Viral Entry and Neuroinvasion': {
        'ACE2': 3, 'TMPRSS2': 3, 'CD147': 3, 'receptor': 2, 'binding': 2, 
        'entry': 3, 'invasion': 3, 'spike_protein': 3, 'olfactory': 3, 
        'hematogenous': 3, 'blood-brain_barrier': 3, 'BBB': 3, 'neuroinvasion': 3, 
        'retrograde': 2, 'transport': 2, 'direct_infection': 3, 'direct_viral':2, 'neural': 2, 
        'neuron': 2, 'enteric_nervous_system': 3, 'route': 2, 'replication': 2, 
        'pathway': 1, 'transmission': 2, 'route':3
    },
    'Immune and Inflammatory Response': {
        'immune': 2, 'cytokine': 3, 'storm': 3, 'IL': 2, 'interleukin': 2, 
        'TNFa': 3, 'T_cell': 3, 'B_cell': 3, 'inflammatory': 3, 'inflammation': 3, 
        'microglia': 2, 'astrocyte': 2, 'immune_response': 3, 'Th1': 2, 
        'Th17': 2, 'immunity': 2, 'macrophage': 2, 'leukocyte': 2, 
        'neutrophil': 2, 'antibody': 3, 'complement': 2, 'infection':2, 'signaling':3, 'glial_cell':3
    },
    'Cellular and Molecular Neurodegenerative Mechanisms': {
        'amyloid': 3, 'tau': 3, 'synuclein': 3, 'prion': 3, 'aggregation': 2, 
        'misfolding': 2, 'oxidative_stress': 3, 'mitochondria': 3, 
        'neurodegeneration': 3, 'neurotoxicity': 3, 'neuronal_death': 3, 
        'apoptosis': 2, 'cell_death': 2, 'synapse': 2, 'excitotoxicity': 3, 
        'neurotransmitter': 2, 'synaptic_loss': 3, 'bioenergetic': 2, 
        'atrophy': 2, 'degradation': 2, 'dopamine':3, 'signaling':2, 'glia':2, 'grey_matter': 3, 
        'white_matter': 3, 'cortex': 3, 'posterior_fossa': 2, 'piriform_cortex': 2, 'hippocampus': 3, 
        'parahippocampal_gyrus': 3, 'amygdala': 2, 'basal_ganglia': 3, 'thalamus': 2, 'cerebellum': 2, 
        'prefrontal_cortex': 3, 'temporal_lobe': 3, 'frontal_lobe': 3, 'occipital_lobe': 2, 'parietal_lobe': 2, 
        'olfactory_bulb': 2, 'brainstem': 2, 'globus_pallidus': 2, 'substantia_nigra': 3
    },
    'Vascular-Related Neurodegenerative Effects': {
        'endothelial': 3, 'vascular': 3, 'BBB_dysfunction': 3, 'coagulation': 3, 
        'fibrin': 2, 'thrombus': 3, 'microclot': 3, 'platelet': 2, 
        'thrombosis': 3, 'stroke': 3, 'ischemia': 3, 'hypoperfusion': 2, 
        'oxygenation': 2, 'microhemorrhage': 3
    },
    'Psychological and Neurological Symptoms': {
        'depression': 3, 'depressive':3, 'anxiety': 3, 'cognitive': 3, 'memory': 3, 
        'brain_fog': 3, 'behavior': 2, 'mental': 2, 'psychological': 3, 
        'stress': 2, 'mood': 2, 'fatigue': 2, 'confusion': 2, 
        'neurological': 3, 'neurocognitive': 3, 'headache': 2, 
        'anosmia': 3, 'ageusia': 3, 'smell': 2, 'psychosis': 3, 
        'psychiatric': 3, 'neuromuscular': 2
    },
    'Systemic Cross-Organ Effects': {
        'gut': 3, 'lung': 3, 'systemic': 3, 'cross-organ': 3, 
        'multi-organ': 3, 'metabolic': 2, 'blood': 2, 'microbiota': 3, 
        'dysbiosis': 3, 'liver': 2, 'gastrointestinal': 3, 'pulmonary': 2, 
        'kidney': 2, 'endocrine': 2, 'hormonal': 2, 'hormone':2, 'comorbidity': 2, 
        'microbiome': 3, 'organ': 2, 'HPA_axis': 3, 'alveol':2
    }
}

In [209]:
from rapidfuzz import fuzz  # or from fuzzywuzzy import fuzz

# Set a similarity threshold for fuzzy matching
SIMILARITY_THRESHOLD = 80

# Initialize the group dictionary with each category as a key and an empty list as the value
group = {key: [] for key in categories_dict.keys()}
group['Uncategorized'] = []

# Loop over each item in the set to categorize it based on fuzzy matching
for item in set_t:
    # Convert the item to lowercase for case-insensitive matching
    item_lower = item.lower()
    
    # Initialize category scores with 0 for each category; this will store the accumulated score
    # for each category based on fuzzy matched keywords
    category_scores = {key: 0 for key in categories_dict}
    
    # Calculate the score for each category by summing the weights of matching keywords
    for category, keywords in categories_dict.items():
        for keyword, weight in keywords.items():
            # Convert the keyword to lowercase to match against the lowercase item text
            # Use fuzzy matching to check if the keyword is similar to any part of the item text
            similarity_score = fuzz.partial_ratio(keyword.lower(), item_lower)
            if similarity_score >= SIMILARITY_THRESHOLD:
                category_scores[category] += weight * (similarity_score / 100)  # Scale weight by similarity

    # Find the maximum score among all categories
    max_score = max(category_scores.values())
    
    #pat_process = item.split('@')[0]
    pat_process = item

    # Determine all categories that have the maximum score (if above zero)
    if max_score > 0:
        # Add item to each category that has the highest score
        for category, score in category_scores.items():
            if score == max_score:
                group[category].append(pat_process)
    else:
        # If no keywords matched, add item to 'Uncategorized'
        group['Uncategorized'].append(pat_process)

# `group` now contains the categorized items, allowing items to be in multiple categories if they
# have the same highest score across different categories


In [210]:
len(group['Uncategorized'])

170

In [211]:
group['Uncategorized']

['Unhealthy_Microbes',
 'SG_assembly',
 'Debris_Clearance',
 'Nervous_Spreading',
 'Watershed_Areas_Infarction',
 'Parenchymal_Manifestations',
 'Melatonin_Administration_Effect',
 'NRP-3_Inflammasome',
 'Pericyte_Damage',
 'Damage',
 'post-COVID-19_syndrome',
 'Paralysis',
 'GBS_&_Variants',
 'Endocytosis_and_Exocytosis',
 'Aβ42_Elevation',
 'Impaired_Autophagy',
 'Vasoconstriction_through_Pericytes',
 'Abnormal_Insulin_Secretion',
 'Clockophagy',
 'Angiotensin_II_Accumulation',
 'Probiotic_Intervention',
 'Lymphopenia',
 'TDP-43_Elevation',
 'Lymphocytes_Unbalance',
 'Synctia_Formation',
 'Hit_and_run_hypothesis',
 'Peripheral_Nerve_Manifestations',
 'Antioxidant_Defense',
 'Loss_of_Consciousness',
 'Viral_Uptake',
 'Large_Accumulation_of_AngII',
 'Co-culture_on_chip',
 'Multiple_tissues_affected',
 'Antigen_Presentation',
 'Myelitis',
 'Cardiac_Arrhythmia',
 'Loneliness',
 'Transsynaptic_migration_of_Coronaviruses',
 'Encephalitis',
 'Hypothalamic-pituitary-adrenal_axis_effect',
 'A

In [212]:
reverse_mapping_group = {value: key for key, values in group.items() for value in values}

# Step 2: Map the values in 'existing_column' based on the reverse lookup
parsed_df['Group'] = parsed_df['Pathophysiological Process'].map(reverse_mapping_group)
parsed_df

Unnamed: 0,URL,Pathophysiological Process,Subject,Predicate,Object,Group
0,https://media.springernature.com/lw685/springe...,Viral_Entry_via_Olfactory_Neurons,SARS-CoV-2,enters,olfactory_neurons,Viral Entry and Neuroinvasion
1,https://media.springernature.com/lw685/springe...,Viral_Entry_via_Olfactory_Neurons,Olfactory_neurons,transmit,virus_to_brain,Viral Entry and Neuroinvasion
2,https://media.springernature.com/lw685/springe...,Viral_Entry_via_Olfactory_Neurons,Virus,causes,cell_damage,Viral Entry and Neuroinvasion
3,https://media.springernature.com/lw685/springe...,Blood-Brain_Barrier_Disruption,SARS-CoV-2,binds_to,ACE2,Viral Entry and Neuroinvasion
4,https://media.springernature.com/lw685/springe...,Blood-Brain_Barrier_Disruption,Virus,disrupts,tight_junctions,Viral Entry and Neuroinvasion
...,...,...,...,...,...,...
2671,https://www.aging-us.com/article/202136/figure...,Large_Accumulation_of_AngII,Large_accumulation_of_AngII,triggers,inflammatory_cascade,Uncategorized
2672,https://www.aging-us.com/article/202136/figure...,Large_Accumulation_of_AngII,Large_accumulation_of_AngII,induces,oxidative_stress,Uncategorized
2673,https://www.aging-us.com/article/202136/figure...,CD147,CD147,triggers,pro-inflammatory_factors,Viral Entry and Neuroinvasion
2674,https://www.aging-us.com/article/202136/figure...,CD147,CD147,affects,vascular_permeability,Viral Entry and Neuroinvasion


In [215]:
#parsed_df.to_excel(r"Triples_Final_Grouped.xlsx", index=False)