####  This is prompt to generate the disease description

In [51]:
import csv
import openai

openai.api_key = open("key.txt","r").read().strip('\n')

# Define the model to use
model_id = "gpt-4-1106-preview"

# File paths
input_csv_path = 'disease.csv'  
output_csv_path = 'desease_desc.csv' 

# Function to create the user message for the conversation
def create_user_message(disease_name, omim_id):
    return (
        f"Generate a single, cohesive, narrative paragraph for the disease '{disease_name}' associated with OMIM ID '{omim_id}'." 
        f"The response should include 10 key information as follows:\n"
        "1) associated genes, proteins, or mutations, with at least 3 examples.\n"
        "2) associated signal pathway, including key molecular or cellular components.\n"
        "3) associated drugs commonly used for treatment, with at least 3 examples and their mechanisms of action.\n"
        "4) any linked comorbidities and complications.\n"
        "5) nature of the disease.\n"
        "6) typical clinical symptoms and signs.\n"
        "7) types of the disease.\n"
        "8) inheritance patterns and any known genetic component, with examples.\n"
        "9) diagnostic criteria and testing methods.\n\n"
        "If no specific answer, just return not available."
        "The information does not need to be current or from a live database. "
        "Ensure the final summary is precise, evidence-based, suitable for a professional medical audience, and condense all the points above into a coherent narrative."
    )



# Read the input CSV and process each disease
with open(input_csv_path, newline='', encoding='utf-8') as infile, \
        open(output_csv_path, 'w', newline='', encoding='utf-8') as outfile:
    
    # Set up CSV reader and writer
    reader = csv.DictReader(infile)
    writer = csv.DictWriter(outfile, fieldnames=['Name', 'Disease', 'Description'])
    writer.writeheader()

    # Process each row in the input CSV
    for row in reader:
        disease_name = row['Name']
        omim_id = row['Disease']

        conversation = [
            {
                "role": "system",
                "content": "You are an expert in medical research, genetics, and pharmacology."
            },
            {
                "role": "user",
                "content": create_user_message(disease_name, omim_id)
            }
        ]

        # Send request to OpenAI API
        try:
            response = openai.ChatCompletion.create(
                model=model_id,
                messages=conversation,
                max_tokens=450  
            )
            # Extract the response content
            description = response['choices'][0]['message']['content']
            # print(description)

        except Exception as e:
            print(f"An error occurred while processing {disease_name}: {e}")
            description = "Error retrieving information"

        # Write the result to the output CSV
        writer.writerow({'Name': disease_name, 'Disease': omim_id, 'Description': description})

print(f"Processing complete. Output saved to {output_csv_path}")


Processing complete. Output saved to desease_desc.csv


#### This is prompt to generate the drug description

In [2]:
import csv
import openai

openai.api_key = open("key.txt","r").read().strip('\n')

# Define the model to use
model_id = "gpt-4-1106-preview"

# File paths
input_csv_path = 'drug.csv'  
output_csv_path = 'drug_desc.csv' 

# Function to create the user message for the conversation
def create_user_message(drug_name, drug_id, SMILES_note):
    return (
        f"Generate a single, comprehensive paragraph for the drug '{drug_name}' "
        f"associated with its DrugBank ID '{drug_id}', "
        f"and its SMILES (Simplified Molecular Input Line Entry System) notation '{SMILES_note}'. "
        "The response should include 10 key pieces of information as follows: "
        "1) detailed description of its chemical structure; "
        "2) its chemical category; "
        "3) its chemical scaffold; "
        "4) any known similar drugs, with examples; "
        "5) detailed description of its pharmacokinetics, including absorption, distribution, metabolism, and excretion; "
        "6) details of its toxicity, with examples; "
        "7) list of any known target proteins; "
        "8) indication of this drug, with specific examples of diseases or symptoms; "
        "9) side effects of this drug, with examples; "
        "10) clinical usage of this drug, with examples. "
        "If no specific answer, just return not available."
        "The information does not need to be current or from a live database. "
        "Ensure the final summary is precise, evidence-based, suitable for a professional pharmacological or chemical audience, and condenses all the points above into a coherent narrative."
    )




# Read the input CSV and process each drug
with open(input_csv_path, newline='', encoding='utf-8') as infile, \
        open(output_csv_path, 'w', newline='', encoding='utf-8') as outfile:
    
    # Set up CSV reader and writer
    reader = csv.DictReader(infile)
    writer = csv.DictWriter(outfile, fieldnames=['Name', 'Drug', 'Description'])
    writer.writeheader()

    # Process each row in the input CSV
    for row in reader:
        drug_name = row['Name']
        drug_id = row['Drug']
        SMILES_note = row['SMILES']

        conversation = [
            {
                "role": "system",
                "content": "You are an expert in medical research, genetics, chemistry, and pharmacology."
            },
            {
                "role": "user",
                "content": create_user_message(drug_name, drug_id, SMILES_note)
            }
        ]

        # Send request to OpenAI API
        try:
            response = openai.ChatCompletion.create(
                model=model_id,
                messages=conversation,
                max_tokens=450  
            )
            # Extract the response content
            description = response['choices'][0]['message']['content']
            # print(description)

        except Exception as e:
            print(f"An error occurred while processing {drug_name}: {e}")
            description = "Error retrieving information"

        # Write the result to the output CSV
        writer.writerow({'Name': drug_name, 'Drug': drug_id, 'Description': description})

print(f"Processing complete. Output saved to {output_csv_path}")


Processing complete. Output saved to drug_desc.csv


In [7]:
import os
import numpy as np

# Path to the result/result_LLM directory
result_dir = 'C:/Users/admin/Documents/Courses/2023fall/E4040/finaL/LLM_DDA/result/result_origin'

# List all .npy files in the directory
npy_files = [f for f in os.listdir(result_dir) if f.endswith('.npy')]

# Process each .npy file
for file in npy_files:
    # Construct the full file path
    file_path = os.path.join(result_dir, file)

    # Load the .npy file
    data = np.load(file_path)

    # Perform your processing here
    # For example, you can print the shape of the data
    print(f"Processing {file}, data shape: {data.shape}")

    # Rewrite the .npy file (or save a modified version)
    # If you modify the data, save it using:
    # np.save(file_path, modified_data)
    # Otherwise, to simply rewrite:
    np.save(file_path, data)

print("Processing complete.")


Processing b_0.npy, data shape: (269, 598)
Processing b_1.npy, data shape: (269, 598)
Processing b_2.npy, data shape: (269, 598)
Processing b_3.npy, data shape: (269, 598)
Processing b_4.npy, data shape: (269, 598)
Processing b_5.npy, data shape: (269, 598)
Processing b_6.npy, data shape: (269, 598)
Processing b_7.npy, data shape: (269, 598)
Processing b_8.npy, data shape: (269, 598)
Processing b_9.npy, data shape: (269, 598)
Processing complete.
