# Load enviroment and modules

In [1]:
!source activate /ix/rboyce/iod4/envs/FM
!export HF_HOME="/ix/rboyce/iod4/.cache/huggingface/"

## Install or update models

In [2]:
# !pip install transformers
# !pip install accelerate
# !huggingface-cli download meta-llama/Meta-Llama-3-8B-Instruct --include "original/*" --local-dir meta-llama/Meta-Llama-3-8B-Instruct
# !pip install --upgrade transformers optimum optimum-intel
# !pip install --upgrade torch torchvision torchaudio

In [3]:
import transformers
import torch

# Model Prompts

In [4]:
# Ideas:
# How can we prompt the models to mention how recent the information cited is?
# How can we accurately ask the model to convey if the results can be extrapolated between NPs?
# Can we craft the LLM's role in a way that represents the intersection between the three personas? (clinical pharmacists, drug-drug interaction researchers, and drug interaction compendium editors)

# Alternative queries:
# "What are the potential mechanisms for the interaction between cranberry natural products and the drug warfarin as detected by an increase in their International Normalized Ratio (INR) blood test?"

In [5]:
instruction_header = "### Instructions ###\n\n"
query_header = "### Query ###\n\n"

In [6]:
prompt =  ""
with open('./instruction_prompt.txt', 'r') as f:
    prompt = f.read()

In [7]:
from langchain_core.prompts import PromptTemplate
import pandas as pd

In [8]:
query = "What are the potential mechanisms that cause an interaction between {NP} products and {Drug} that may cause {AE}?"

In [9]:
query_template = PromptTemplate.from_template(query)

In [10]:
mappings = pd.read_csv("./Data/CaseStudyMappings.csv")

In [11]:
queries = []
for row in mappings[["NP", "Drug", "AE"]].iterrows():
    queries.append(query_template.invoke(row[1].to_dict()))

-------------------

# Results collections

In [12]:
results_dir = './Results/Baseline-Results/'

--------------------------

# Baseline LLAMA 3.1 LLM Generation

In [13]:
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
dtype = torch.bfloat16

In [14]:
pipeline = transformers.pipeline(
    "text-generation", 
    model=model_id,
    torch_dtype =dtype,
    device_map="auto"
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [15]:
for i in range(len(queries)):
    messages = [
        {
            "role": "system", 
            "content": ''.join([instruction_header, prompt])
        },
        {
            "role": "user", 
            "content": ''.join([query_header, queries[i].text])
        },
    ]
    outputs = pipeline(
        messages,
        max_new_tokens=3500,
        temperature=1.0
    )
    with open(f'{results_dir}llama/llama_3.1_baseline_({mappings.loc[i].NP},{mappings.loc[i].Drug},{mappings.loc[i].AE}).md', 'w') as f:
        f.write('# Prompts to LLM:\n\n')
        f.write(messages[0]['content'])
        f.write("\n\n")
        f.write(messages[1]['content'])
        f.write('\n\n-------\n\n')
        f.write('# Response from LLM:\n\n')
        f.write(str(outputs[0]["generated_text"][-1]["content"]))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [16]:
# print(outputs[0]["generated_text"][-1]["content"])

------------------------

# Baseline GEMMA 2 LLM Generation

In [17]:
model_id = "google/gemma-2-9b-it"
dtype = torch.bfloat16

In [18]:
pipeline = transformers.pipeline(
    "text-generation", 
    model=model_id,
    torch_dtype =dtype,
    device_map="auto"
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [19]:
# print(''.join([instruction_header, prompt,'\n\n', query_header, queries[0].text]))

In [20]:
for i in range(len(queries)):
    messages = [{
        "role": "user",
        "content": ''.join([instruction_header, prompt, '\n\n', query_header, queries[i].text])
    }]
    outputs = pipeline(
        messages,
        max_new_tokens=3500,
        temperature=1.0
    )
    with open(f'{results_dir}gemma/gemma_2_baseline_({mappings.loc[i].NP},{mappings.loc[i].Drug},{mappings.loc[i].AE}).md', 'w') as f:
        f.write('# Prompts to LLM:\n\n')
        f.write(messages[0]['content'])
        f.write('\n\n-------\n\n')
        f.write('# Response from LLM:\n\n')
        f.write(str(outputs[0]["generated_text"][-1]["content"]))

In [21]:
# print(outputs[0]["generated_text"][-1]["content"])

------------------------

# Baseline GPT4o-mini LLM Generation

In [22]:
# import getpass
import os

In [23]:
# if not os.environ.get("OPENAI_API_KEY"):
#     os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

In [24]:
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_PERSONAL_API_KEY", default=False)

In [25]:
from langchain_openai import ChatOpenAI

In [26]:
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=1.0,
    max_tokens=3500,
)

In [27]:
for i in range(len(queries)):
    messages = [
        {
            "role": "system", 
            "content": ''.join([instruction_header, prompt])
        },
        {
            "role": "user", 
            "content": ''.join([query_header, queries[i].text])
        },
    ]
    outputs = llm.invoke(messages)
    with open(f'{results_dir}gpt/gpt4o-mini_baseline_({mappings.loc[i].NP},{mappings.loc[i].Drug},{mappings.loc[i].AE}).md', 'w') as f:
        f.write('# Prompts to LLM:\n\n')
        f.write(messages[0]['content'])
        f.write("\n\n")
        f.write(messages[1]['content'])
        f.write('\n\n-------\n\n')
        f.write('# Response from LLM:\n\n')
        f.write(str(outputs.content))

----------------

# Baseline BLOOM LLM Generation

In [14]:
model_id = "bigscience/bloomz-7b1"
dtype = torch.bfloat16

In [37]:
# print(''.join([instruction_header, prompt,'\n\n', query_header, queries[0].text]))

In [21]:
for i in range(len(queries)):
    messages = [
        {
            "role": "system", 
            "content": ''.join([instruction_header, prompt])
        },
        {
            "role": "user", 
            "content": ''.join([query_header, queries[i].text])
        },
    ]
    outputs = pipeline(
        messages,
        max_new_tokens=3500,
        temperature=1.0
    )
    with open(f'{results_dir}bloomz/bloomz_baseline_({mappings.loc[i].NP},{mappings.loc[i].Drug},{mappings.loc[i].AE}).md', 'w') as f:
        f.write('# Prompts to LLM:\n\n')
        f.write(messages[0]['content'])
        f.write("\n\n")
        f.write(messages[1]['content'])
        f.write('\n\n-------\n\n')
        f.write('# Response from LLM:\n\n')
        f.write(str(outputs[0]["generated_text"][-1]["content"]))

AttributeError: 'list' object has no attribute 'content'

In [None]:
# print(outputs[0]["generated_text"][-1]["content"])

------------------------

# Baseline PHI LLM Generation

In [13]:
# !pip install einops
# !pip install pytest
# !pip install flash-attn

In [13]:
model_id = "microsoft/Phi-3-small-128k-instruct"
dtype = torch.bfloat16

In [14]:
# Load model directly
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype = dtype, trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [15]:
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

In [16]:
device = torch.cuda.current_device()
model = model.to(device)

In [17]:
pipeline = transformers.pipeline(
    "text-generation", 
    model=model,
    torch_dtype =dtype,
    tokenizer=tokenizer,
    device=device
)

In [18]:
generation_args = {
    "max_new_tokens": 3500,
    "return_full_text": False,
    "temperature": 1.0,
    "do_sample": False,
}

In [20]:
messages = [{
    "role": "user", 
    "content":''.join([instruction_header, prompt,'\n\n', query_header, queries[0].text])
}]

In [21]:
output = pipeline(messages, **generation_args)

  x = [xi.to_sparse_csr() for xi in x]
/scratch/slurm-959740/tmpufnecqsm/main.c:6:23: fatal error: stdatomic.h: No such file or directory
 #include <stdatomic.h>
                       ^
compilation terminated.


CalledProcessError: Command '['/bin/gcc', '/scratch/slurm-959740/tmpufnecqsm/main.c', '-O3', '-shared', '-fPIC', '-o', '/scratch/slurm-959740/tmpufnecqsm/cuda_utils.cpython-310-x86_64-linux-gnu.so', '-lcuda', '-L/ihome/rboyce/iod4/.local/lib/python3.10/site-packages/triton/backends/nvidia/lib', '-L/lib64', '-I/ihome/rboyce/iod4/.local/lib/python3.10/site-packages/triton/backends/nvidia/include', '-I/scratch/slurm-959740/tmpufnecqsm', '-I/ihome/crc/install/pytorch/2.0.1/python3.10/include/python3.10']' returned non-zero exit status 1.

In [None]:
print(output[0]['generated_text'])

In [None]:
# print(''.join([instruction_header, prompt,'\n\n', query_header, queries[0].text]))

In [26]:

# outputs = pipeline(
#     messages,
#     max_new_tokens=3500,
#     temperature=1.0
# )

AttributeError: 'NoneType' object has no attribute 'apply_chat_template'

In [25]:
for i in range(len(queries)):
    # messages = [
    #     {
    #         "role": "system", 
    #         "content": ''.join([instruction_header, prompt])
    #     },
    #     {
    #         "role": "user", 
    #         "content": ''.join([query_header, queries[i].text])
    #     },
    # ]
    messages = [{
        "role": "user", 
        "content":''.join([instruction_header, prompt,'\n\n', query_header, queries[i].text])
    }]
    outputs = pipeline(
        messages,
        max_new_tokens=3500,
        temperature=1.0
    )
    with open(f'{results_dir}phi/phi_baseline_({mappings.loc[i].NP},{mappings.loc[i].Drug},{mappings.loc[i].AE}).md', 'w') as f:
        f.write('# Prompts to LLM:\n\n')
        f.write(messages[0]['content'])
        f.write("\n\n")
        f.write(messages[1]['content'])
        f.write('\n\n-------\n\n')
        f.write('# Response from LLM:\n\n')
        f.write(str(outputs.content))
    break

NameError: name 'I' is not defined

In [None]:
# print(outputs[0]["generated_text"][-1]["content"])

------------------------

# Baseline DeciLM LLM Generation

In [22]:
model_id = "Deci/DeciLM-7B-instruct"
dtype = torch.bfloat16

In [24]:
pipeline = transformers.pipeline(
    "text-generation", 
    model=model_id,
    torch_dtype =dtype,
    device_map="auto",
    trust_remote_code=True
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [37]:
# print(''.join([instruction_header, prompt,'\n\n', query_header, queries[0].text]))

In [31]:
for i in range(len(queries)):
    messages = [
        {
            "role": "system", 
            "content": ''.join([instruction_header, prompt])
        },
        {
            "role": "user", 
            "content": ''.join([query_header, queries[i].text])
        },
    ]
    outputs = pipeline(
        messages,
        max_new_tokens=3500,
        temperature=1.0
    )
    with open(f'{results_dir}deci/deci_baseline_({mappings.loc[i].NP},{mappings.loc[i].Drug},{mappings.loc[i].AE}).md', 'w') as f:
        f.write('# Prompts to LLM:\n\n')
        f.write(messages[0]['content'])
        f.write("\n\n")
        f.write(messages[1]['content'])
        f.write('\n\n-------\n\n')
        f.write('# Response from LLM:\n\n')
        f.write(str(outputs[0]["generated_text"][-1]["content"]))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [29]:
# outputs

In [32]:
# print(outputs[0]["generated_text"][-1]["content"])

------------------------

# Baseline Mixtral LLM Generation

In [33]:
model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
dtype = torch.bfloat16

In [34]:
pipeline = transformers.pipeline(
    "text-generation", 
    model=model_id,
    torch_dtype =dtype,
    device_map="auto"
)

config.json:   0%|          | 0.00/720 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/92.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/19 [00:00<?, ?it/s]

model-00001-of-00019.safetensors:   0%|          | 0.00/4.89G [00:00<?, ?B/s]

model-00002-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00005-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00006-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00007-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00008-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00009-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00010-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00011-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00012-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00013-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00014-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00015-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00016-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00017-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00018-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00019-of-00019.safetensors:   0%|          | 0.00/4.22G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [37]:
# print(''.join([instruction_header, prompt,'\n\n', query_header, queries[0].text]))

In [36]:
for i in range(len(queries)):
    messages = [
        {
            "role": "system", 
            "content": ''.join([instruction_header, prompt])
        },
        {
            "role": "user", 
            "content": ''.join([query_header, queries[i].text])
        },
    ]
    outputs = pipeline(
        messages,
        max_new_tokens=3500,
        temperature=1.0
    )
    with open(f'{results_dir}mixtral/mixtral_baseline_({mappings.loc[i].NP},{mappings.loc[i].Drug},{mappings.loc[i].AE}).md', 'w') as f:
        f.write('# Prompts to LLM:\n\n')
        f.write(messages[0]['content'])
        f.write("\n\n")
        f.write(messages[1]['content'])
        f.write('\n\n-------\n\n')
        f.write('# Response from LLM:\n\n')
        f.write(str(outputs.content))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


KeyboardInterrupt: 

In [None]:
# print(outputs[0]["generated_text"][-1]["content"])

------------------------

# Baseline Mistral LLM Generation

In [37]:
model_id = "mistralai/Mistral-7B-Instruct-v0.3"
dtype = torch.bfloat16

In [38]:
pipeline = transformers.pipeline(
    "text-generation", 
    model=model_id,
    torch_dtype =dtype,
    device_map="auto"
)

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/141k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [39]:
# print(''.join([instruction_header, prompt,'\n\n', query_header, queries[0].text]))

In [42]:
for i in range(len(queries)):
    messages = [
        {
            "role": "system", 
            "content": ''.join([instruction_header, prompt])
        },
        {
            "role": "user", 
            "content": ''.join([query_header, queries[i].text])
        },
    ]
    outputs = pipeline(
        messages,
        max_new_tokens=3500,
        temperature=1.0
    )
    with open(f'{results_dir}mistral/mistral_baseline_({mappings.loc[i].NP},{mappings.loc[i].Drug},{mappings.loc[i].AE}).md', 'w') as f:
        f.write('# Prompts to LLM:\n\n')
        f.write(messages[0]['content'])
        f.write("\n\n")
        f.write(messages[1]['content'])
        f.write('\n\n-------\n\n')
        f.write('# Response from LLM:\n\n')
        f.write(str(outputs[0]["generated_text"][-1]["content"].encode('utf-8')))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [None]:
# print(outputs[0]["generated_text"][-1]["content"])

------------------------