In [1]:
# Imports and BeautifulSoup Extraction
import requests
from bs4 import BeautifulSoup
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# URL and headers for request
url = "https://www.ninds.nih.gov/health-information/disorders/parkinsons-disease"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

In [2]:
# Fetch page content
response = requests.get(url, headers=headers)
if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract titles and paragraphs
    article_content = []
    current_title = None
    for element in soup.find_all(['h1', 'h2', 'h3', 'p']):
        if element.name in ['h1', 'h2', 'h3']:
            current_title = element.get_text(strip=True)
        elif element.name == 'p' and current_title:
            article_content.append({"title": current_title, "paragraph": element.get_text(strip=True)})

    # Print the extracted content
    # print("Extracted Content:")
    # The 'for' loop ends here
    #for item in article_content:
        # print(f"Title: {item['title']}")
        # print(f"Paragraph: {item['paragraph']}")
        # print("----")

else:
  # print(f"Failed to retrieve the page. Status code: {response.status_code}")
  article_content = []

In [3]:
# Filter paragraphs based on specific titles and words
target_titles = ["Treating PD"]  # titles to focus on
keywords = ["Carbidopa-Levodopa", "levodopa", "Dopamine agonists", "MAO-B inhibitors", "COMT inhibitors", "Anticholinergics", "Amantadine"]  # Add specific keywords you want to focus on

# Filter paragraphs by target titles
filtered_content = [item for item in article_content if item['title'] in target_titles]

In [4]:
filtered_paragraphs = []
for item in filtered_content:
    paragraph = item['paragraph']
    # Check if any keyword exists in the paragraph
    if any(keyword.lower() in paragraph.lower() for keyword in keywords):
        filtered_paragraphs.append(item)

# Debug: Print filtered paragraphs
for item in filtered_paragraphs:
    print(f"Title: {item['title']}")
    print(f"Paragraph: {item['paragraph']}\n")

Title: Treating PD
Paragraph: Carbidopa-Levodopa.The cornerstone of PD therapy is a medication called levodopa (also known as L-dopa). Levodopa can reduce the movement-related symptoms of PD, but it does not replace lost nerve cells or stop its progression. Nerve cells can use levodopa to make dopamine and replenish the brain's reduced supply. People cannot simply take dopamine pills because dopamine does not easily cross theblood-brain barrier, a protective lining of cells inside blood vessels that regulate the transport of oxygen, glucose, medications, and other substances in the brain. People with PD are given levodopa combined with another substance called carbidopa. When added to levodopa, carbidopa prevents the conversion of levodopa into dopamine except for in the brain. This stops or diminishes the side effects of  excess dopamine in the bloodstream, such as nausea. Carbidopa-levodopa is often very successful at reducing or eliminating the tremors and other motor symptoms of PD

In [5]:
import spacy

nlp = spacy.load("en_core_web_sm")




def extract_svo(sentence):
    """
    Extract subject-verb-object triplets from a sentence.
    """
    doc = nlp(sentence)
    svos = []

    for token in doc:
        if token.dep_ in {"nsubj", "nsubjpass"}:  # Subject
            subject = token.text
            verb = token.head.text  # Verb is the head of the subject
            obj = None

            for child in token.head.children:  # Look for objects
                if child.dep_ in {"dobj", "pobj", "attr"}:
                    obj = child.text
                    svos.append((subject, verb, obj))
                elif child.dep_ == "conj":  # Handle conjunctions (e.g., X and Y)
                    obj = child.text
                    svos.append((subject, verb, obj))

    return svos




In [6]:
def convert_to_svo_format(paragraph):
    """
    Convert a paragraph into simplified SVO sentences.
    """
    doc = nlp(paragraph)
    svo_sentences = []

    for sent in doc.sents:
        svos = extract_svo(sent.text)
        for subject, verb, obj in svos:
            svo_sentences.append(f"{subject} {verb} {obj}.")

    return " ".join(svo_sentences)


In [7]:
# Convert to SVO format
# Iterate through each item in the filtered_paragraphs list
for item in filtered_paragraphs:
    paragraph_text = item['paragraph'] # Get the actual paragraph text
    svo_paragraph = convert_to_svo_format(paragraph_text) # Pass the text to the function
    print(f"Original Paragraph:\n{paragraph_text}\n") # Print the original paragraph text
    print(f"SVO Format:\n{svo_paragraph}\n") # Print the SVO format

Original Paragraph:
Carbidopa-Levodopa.The cornerstone of PD therapy is a medication called levodopa (also known as L-dopa). Levodopa can reduce the movement-related symptoms of PD, but it does not replace lost nerve cells or stop its progression. Nerve cells can use levodopa to make dopamine and replenish the brain's reduced supply. People cannot simply take dopamine pills because dopamine does not easily cross theblood-brain barrier, a protective lining of cells inside blood vessels that regulate the transport of oxygen, glucose, medications, and other substances in the brain. People with PD are given levodopa combined with another substance called carbidopa. When added to levodopa, carbidopa prevents the conversion of levodopa into dopamine except for in the brain. This stops or diminishes the side effects of  excess dopamine in the bloodstream, such as nausea. Carbidopa-levodopa is often very successful at reducing or eliminating the tremors and other motor symptoms of PD during th

In [8]:
from collections import defaultdict

def parse_svo_to_mappings(svo_text):
    """
    Parse SVO text into Drug-Aspects and Drug-Side Effects mappings.
    """
    aspect_relations = {"reduce", "delay", "increase"}
    side_effect_relations = {"include", "cause", "leads to", "experience"}
    drug_aspects = defaultdict(list)
    drug_side_effects = defaultdict(list)

    for sentence in svo_text.split(". "):
        tokens = sentence.strip().split(" ")
        if len(tokens) < 3:
            continue

        subject, verb, obj = tokens[0], tokens[1], " ".join(tokens[2:])
        if verb in aspect_relations:
            drug_aspects[subject].append((verb, obj))
        elif verb in side_effect_relations:
            drug_side_effects[subject].append((verb, obj))

    return drug_aspects, drug_side_effects



In [9]:
# Debugging SVO extraction
for item in filtered_paragraphs:
    paragraph_text = item['paragraph']
    svo_paragraph = convert_to_svo_format(paragraph_text)


    # Parse SVOs to mappings
    drug_aspects, drug_side_effects = parse_svo_to_mappings(svo_paragraph)

    print("\nDrug-Aspects Mapping:")
    for drug, aspects in drug_aspects.items():
        print(f"Drug: {drug}")
        for relation, aspect in aspects:
            print(f"  - Relation: {relation} | Aspect: {aspect}")
        print()

    print("\nDrug-Side Effects Mapping:")
    for drug, side_effects in drug_side_effects.items():
        print(f"Drug: {drug}")
        for relation, side_effect in side_effects:
            print(f"  - Relation: {relation} | Side Effect: {side_effect}")
        print()



Drug-Aspects Mapping:
Drug: Levodopa
  - Relation: reduce | Aspect: symptoms
  - Relation: reduce | Aspect: replace


Drug-Side Effects Mapping:

Drug-Aspects Mapping:

Drug-Side Effects Mapping:
Drug: effects
  - Relation: include | Side Effect: nausea

Drug: person
  - Relation: experience | Side Effect: dyskinesia.


Drug-Aspects Mapping:

Drug-Side Effects Mapping:
Drug: People
  - Relation: experience | Side Effect: periods.


Drug-Aspects Mapping:

Drug-Side Effects Mapping:
Drug: they
  - Relation: cause | Side Effect: desire

Drug: drugs
  - Relation: include | Side Effect: apomorphine.


Drug-Aspects Mapping:
Drug: selegiline
  - Relation: delay | Aspect: need


Drug-Side Effects Mapping:
Drug: medications
  - Relation: include | Side Effect: selegiline

Drug: effects
  - Relation: include | Side Effect: nausea.


Drug-Aspects Mapping:

Drug-Side Effects Mapping:
Drug: effects
  - Relation: include | Side Effect: diarrhea


Drug-Aspects Mapping:

Drug-Side Effects Mapping:
Dr