In [1]:
!pip install requests rdflib SPARQLWrapper nltk drug-named-entity-recognition transformers

Defaulting to user installation because normal site-packages is not writeable




Collecting rdflib
  Downloading rdflib-7.1.1-py3-none-any.whl.metadata (11 kB)
Collecting SPARQLWrapper
  Downloading SPARQLWrapper-2.0.0-py3-none-any.whl.metadata (2.0 kB)
Collecting drug-named-entity-recognition
  Downloading drug_named_entity_recognition-2.0.4-py3-none-any.whl.metadata (22 kB)
Collecting transformers
  Downloading transformers-4.47.1-py3-none-any.whl.metadata (44 kB)
Collecting huggingface-hub<1.0,>=0.24.0 (from transformers)
  Downloading huggingface_hub-0.27.0-py3-none-any.whl.metadata (13 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Downloading tokenizers-0.21.0-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.4.5-cp312-none-win_amd64.whl.metadata (3.9 kB)
Downloading rdflib-7.1.1-py3-none-any.whl (562 kB)
   ---------------------------------------- 0.0/562.4 kB ? eta -:--:--
   ------------------------------------- -- 524.3/562.4 kB 5.6 MB/s eta 0:00:01
   --------------------

In [11]:
# Import required libraries
import tkinter as tk
from tkinter import scrolledtext, messagebox
import requests
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, RDFS
from SPARQLWrapper import SPARQLWrapper, JSON
from nltk.tokenize import word_tokenize
from drug_named_entity_recognition import find_drugs

# Define Namespace and SPARQL endpoint
EX = Namespace("http://example.org/drugs#")
sparql = SPARQLWrapper("http://localhost:3030/drugdb/sparql")


In [12]:
# Corrected Function with Namespace Declaration
def check_drug_existence(drug_name):
    query = f"""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    
    ASK WHERE {{
        ?drug rdfs:label "{drug_name}" .
    }}
    """
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    response = sparql.query().convert()
    return response['boolean']



In [13]:
def fetch_drug_details_from_pubchem(drug_name):
    standardized_drug_name = drug_name.lower()
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{standardized_drug_name}/property/MolecularWeight,CanonicalSMILES,InChIKey/JSON"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()['PropertyTable']['Properties'][0]
        data['DrugName'] = drug_name
        return data
    return None


In [14]:
def save_drug_to_rdf(drug_details, side_effects, interactions):
    g = Graph()
    drug_uri = URIRef(f"http://example.org/drugs/{drug_details['DrugName'].replace(' ', '_')}")

    # Add Basic Drug Info
    g.add((drug_uri, RDF.type, EX.Drug))
    g.add((drug_uri, RDFS.label, Literal(drug_details['DrugName'])))
    g.add((drug_uri, EX.MolecularWeight, Literal(drug_details['MolecularWeight'])))
    g.add((drug_uri, EX.CanonicalSMILES, Literal(drug_details['CanonicalSMILES'])))
    g.add((drug_uri, EX.InChIKey, Literal(drug_details['InChIKey'])))

    # Add Side Effects
    for effect in side_effects:
        g.add((drug_uri, EX.hasSideEffect, Literal(effect)))

    # Add Interactions
    for interacting_drug, interaction_type in interactions:
        interaction_uri = URIRef(f"http://example.org/drugs/{interacting_drug.replace(' ', '_')}")
        g.add((drug_uri, EX.hasInteractionWith, interaction_uri))
        g.add((drug_uri, EX.interactionType, Literal(interaction_type)))

    # Save RDF to Turtle Format
    rdf_file = f"{drug_details['DrugName'].replace(' ', '_')}.ttl"
    g.serialize(rdf_file, format="turtle")
    print(f"Saved {drug_details['DrugName']} to RDF file: {rdf_file}")
    return rdf_file



In [15]:
def upload_rdf_to_fuseki(rdf_file):
    url = "http://localhost:3030/drugdb/data"
    headers = {"Content-Type": "text/turtle"}
    
    # Read the RDF content from the file
    with open(rdf_file, "rb") as file:
        rdf_data = file.read()

    # Send POST request with correct headers
    response = requests.post(url, data=rdf_data, headers=headers)
    
    if response.status_code == 200:
        print(f"Uploaded {rdf_file} successfully.")
        return True
    else:
        print(f"Failed to upload {rdf_file}. Status Code: {response.status_code}")
        print(f"Error: {response.text}")
        return False


In [16]:
def extract_drug_names(user_input):
    tokens = word_tokenize(user_input)
    drugs = find_drugs(tokens)
    return [drug[0]['name'] for drug in drugs]


In [17]:
# Query drug details using SPARQL
def query_drug_details(drug_name):
    query = f"""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX ex: <http://example.org/drugs#>

    SELECT ?property ?value
    WHERE {{
        ?drug rdfs:label "{drug_name}" .
        ?drug ?property ?value .
    }}
    """
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    # Extract and format results
    drug_details = {}
    for result in results["results"]["bindings"]:
        prop = result["property"]["value"].split("#")[-1]
        value = result["value"]["value"]
        drug_details[prop] = value

    return drug_details


In [18]:
# Fetch Side Effects and Summarize
def fetch_side_effects_from_pubchem(cid):
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/{cid}/JSON"
    response = requests.get(url)
    side_effects = []

    if response.status_code == 200:
        data = response.json()
        for section in data['Record']['Section']:
            if section['TOCHeading'] == "Pharmacology and Biochemistry":
                for subsection in section['Section']:
                    if 'Information' in subsection:
                        for info in subsection['Information']:
                            if 'Value' in info and 'StringWithMarkup' in info['Value']:
                                side_effects.append(info['Value']['StringWithMarkup'][0]['String'])

    # Return Summary
    return clean_api_response(side_effects) or "No known side effects."


In [19]:
# Query side effects from RDF store
def query_drug_side_effects(drug_name):
    query = f"""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX ex: <http://example.org/drugs#>

    SELECT ?sideEffect
    WHERE {{
        ?drug rdfs:label "{drug_name}" .
        ?drug ex:hasSideEffect ?sideEffect .
    }}
    """
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    side_effects = [result["sideEffect"]["value"] for result in results["results"]["bindings"]]
    return side_effects


In [20]:
# Simulated drug interactions (could be extended)
def fetch_drug_interactions(drug_name):
    interactions = {
        "Ibuprofen": [("Warfarin", "Increased Bleeding Risk"), ("Aspirin", "Reduced Effectiveness")],
        "Acetaminophen": [("Alcohol", "Increased Liver Toxicity")]
    }
    return interactions.get(drug_name, [])


In [21]:
# Query drug interactions
def query_drug_interactions(drug_name):
    query = f"""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX ex: <http://example.org/drugs#>

    SELECT ?interactingDrug ?interactionType
    WHERE {{
        ?drug1 rdfs:label "{drug_name}" .
        ?drug1 ex:hasInteractionWith ?interactingDrug .
        ?drug1 ex:interactionType ?interactionType .
    }}
    """
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    interactions = [
        (result["interactingDrug"]["value"], result["interactionType"]["value"])
        for result in results["results"]["bindings"]
    ]
    return interactions


In [31]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load Hugging Face DistilGPT-2 (lightweight model)
#tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
#model = AutoModelForCausalLM.from_pretrained("distilgpt2")

model_name = "bigscience/bloom-560m"
model = BloomForCausalLM.from_pretrained(model_name)
tokenizer = BloomTokenizerFast.from_pretrained(model_name)


In [32]:
# Format Drug Details Before LLM Prompt
def format_drug_details(drug_name, drug_details, side_effects, interactions):
    details_text = (
        f"Drug Name: {drug_details.get('DrugName', drug_name)}\n"
        f"Molecular Weight: {drug_details.get('MolecularWeight', 'N/A')} g/mol\n"
        f"Chemical Structure (SMILES): {drug_details.get('CanonicalSMILES', 'N/A')}\n"
        f"Unique Key (InChIKey): {drug_details.get('InChIKey', 'N/A')}\n"
    )

    # Format Side Effects and Interactions
    side_effects_text = f"Drug Information: {side_effects}"
    interactions_text = f"Drug Interactions: {', '.join([f'{drug} ({desc})' for drug, desc in interactions])}" if interactions else "No known interactions."

    # Generate Clean Prompt
    prompt = f"""
    Provide a detailed explanation about the drug "{drug_name}" based on the following data:

    {details_text}
    {side_effects_text}
    {interactions_text}

    Explain this information in user-friendly language.
    """
    return prompt



In [33]:
def clean_api_response(data_list, max_length=300):
    cleaned_data = []
    for entry in data_list:
        if isinstance(entry, str):
            # Limit Entry Length
            cleaned_data.append(entry.strip().replace("\n", " "))
    # Return Shortened Summary
    return " ".join(cleaned_data)[:max_length] + ("..." if len(" ".join(cleaned_data)) > max_length else "")

In [34]:
# Query Existing Drug Data
def fetch_existing_rdf_data(drug_name):
    query = f"""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX ex: <http://example.org/drugs#>

    SELECT ?property ?value
    WHERE {{
        ?drug rdfs:label "{drug_name}" .
        ?drug ?property ?value .
    }}
    """
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    existing_data = {result["property"]["value"].split("#")[-1]: result["value"]["value"]
                     for result in results["results"]["bindings"]}
    return existing_data


In [35]:
# Compare Existing RDF Data with Latest Data from PubChem
def is_data_updated(existing_data, latest_data):
    # Define fields to compare
    fields_to_check = ["MolecularWeight", "CanonicalSMILES", "InChIKey"]

    # Check if any field has been updated
    for field in fields_to_check:
        if field not in existing_data or str(existing_data[field]) != str(latest_data[field]):
            return True
    return False


In [36]:
# Delete Existing RDF Data from Apache Jena Fuseki
def delete_existing_rdf_data(drug_name):
    delete_query = f"""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX ex: <http://example.org/drugs#>

    DELETE WHERE {{
        ?drug rdfs:label "{drug_name}" .
        ?drug ?property ?value .
    }}
    """
    sparql.setMethod("POST")
    sparql.setQuery(delete_query)
    sparql.query()
    print(f"Deleted existing RDF data for {drug_name}.")


In [37]:
# Generate Text Response Using Lightweight LLM
def generate_text_response(drug_name, drug_details, side_effects, interactions):
    # Format the prompt text
    prompt = format_drug_details(drug_name, drug_details, side_effects, interactions)

    # Tokenize and Generate Text Using the LLM
    inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(inputs["input_ids"], max_new_tokens=150, temperature=0.7)

    # Decode and Return the Generated Text
    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(response_text)
    return response_text


In [38]:
def process_input():
    user_input = input_text.get("1.0", tk.END).strip()

    if not user_input:
        messagebox.showerror("Error", "Please enter a query.")
        return

    drug_names = extract_drug_names(user_input)

    if not drug_names:
        result_text.delete("1.0", tk.END)
        result_text.insert(tk.END, "No drugs found in the input text.\n")
        return

    result_text.delete("1.0", tk.END)
    result_text.insert(tk.END, "--- Generated Drug Information ---\n\n")

    for drug_name in drug_names:
        # Fetch Data from PubChem and RDF Store
        drug_details = fetch_drug_details_from_pubchem(drug_name)
        if not drug_details:
            result_text.insert(tk.END, f"No details found for {drug_name} on PubChem.\n")
            continue

        # Fetch Side Effects and Interactions
        side_effects = fetch_side_effects_from_pubchem(drug_details["CID"])
        interactions = fetch_drug_interactions(drug_name)

        # Save Data to RDF Store if Missing
        rdf_file = save_drug_to_rdf(drug_details, side_effects, interactions)
        if upload_rdf_to_fuseki(rdf_file):
            result_text.insert(tk.END, f"{drug_name} saved successfully to the RDF store.\n")
        else:
            result_text.insert(tk.END, f"Failed to save {drug_name} to the RDF store.\n")

        # Generate and Display Text
        prompt = format_drug_details(drug_name, drug_details, side_effects, interactions)
        generated_response = generate_text_response(drug_name, drug_details, side_effects, interactions)
        result_text.insert(tk.END, f"\n--- Response for {drug_name} ---\n")
        result_text.insert(tk.END, generated_response + "\n\n")


In [39]:
# Create the UI window
window = tk.Tk()
window.title("Drug Interaction System")
window.geometry("700x600")

# Input Field
tk.Label(window, text="Enter Your Query:", font=("Arial", 14)).pack(pady=10)
input_text = scrolledtext.ScrolledText(window, wrap=tk.WORD, width=70, height=5, font=("Arial", 12))
input_text.pack(pady=5)

# Process Button
submit_button = tk.Button(window, text="Process", command=process_input, font=("Arial", 14), width=10)
submit_button.pack(pady=10)

# Result Display Field
tk.Label(window, text="Results:", font=("Arial", 14)).pack(pady=10)
result_text = scrolledtext.ScrolledText(window, wrap=tk.WORD, width=70, height=20, font=("Arial", 12))
result_text.pack(pady=5)

# Start the Tkinter Main Loop
window.mainloop()


Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Users\saura\AppData\Local\Programs\Python\Python312\Lib\site-packages\urllib3\connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\saura\AppData\Local\Programs\Python\Python312\Lib\site-packages\urllib3\util\connection.py", line 60, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\saura\AppData\Local\Programs\Python\Python312\Lib\socket.py", line 963, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
socket.gaierror: [Errno 11001] getaddrinfo failed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\saura\Ap

Saved Acetaminophen to RDF file: Acetaminophen.ttl
Uploaded Acetaminophen.ttl successfully.





    Provide a detailed explanation about the drug "Acetaminophen" based on the following data:

    Drug Name: Acetaminophen
Molecular Weight: 151.16 g/mol
Chemical Structure (SMILES): CC(=O)NC1=CC=C(C=C1)O
Unique Key (InChIKey): RZVAJINKPMORJF-UHFFFAOYSA-N

    Drug Information: Animal and clinical studies have determined that acetaminophen has both antipyretic and analgesic effects. This drug has been shown to lack anti-inflammatory effects. As opposed to the _salicylate_ drug class, acetaminophen does not disrupt tubular secretion of uric acid and does not affect acid-bas...
    Drug Interactions: Alcohol (Increased Liver Toxicity)

    Explain this information in user-friendly language.
    (1) Acetaminophen is a non-steroidal anti-inflammatory drug. It is a non-steroidal anti-inflammatory drug that is used to treat pain, inflammation, and inflammation of the skin, joints, and lungs. It is used to treat pain, inflammation, and inflammation of the skin, joints, and lungs. It is use