In [1]:

import transformers
import torch
from vllm import LLM, SamplingParams
import os

os.environ["LIBRARY_PATH"]="/usr/local/cuda-12.2/lib64/stubs:$LIBRARY_PATH"
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
model_id = "meta-llama/Meta-Llama-3.1-70B-Instruct"

In [2]:


from src.llm.PredictionUtils import init_model_and_tokenizer, llm_gen


model, tokenizer = init_model_and_tokenizer(model_id,
                                            enable_lora=True,
                                            )

INFO 09-19 15:53:29 config.py:904] Defaulting to use mp for distributed inference
INFO 09-19 15:53:29 llm_engine.py:223] Initializing an LLM engine (v0.6.1.post2) with config: model='meta-llama/Meta-Llama-3.1-70B-Instruct', speculative_config=None, tokenizer='meta-llama/Meta-Llama-3.1-70B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=2000, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=8, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=meta-llam

Loading safetensors checkpoint shards:   0% Completed | 0/30 [00:00<?, ?it/s]


INFO 09-19 15:53:48 model_runner.py:1008] Loading model weights took 16.4634 GB
[1;36m(VllmWorkerProcess pid=677201)[0;0m INFO 09-19 15:53:49 model_runner.py:1008] Loading model weights took 16.4634 GB
[1;36m(VllmWorkerProcess pid=677199)[0;0m INFO 09-19 15:53:49 model_runner.py:1008] Loading model weights took 16.4634 GB
[1;36m(VllmWorkerProcess pid=677200)[0;0m INFO 09-19 15:53:49 model_runner.py:1008] Loading model weights took 16.4634 GB
[1;36m(VllmWorkerProcess pid=677197)[0;0m INFO 09-19 15:53:50 model_runner.py:1008] Loading model weights took 16.4634 GB
[1;36m(VllmWorkerProcess pid=677203)[0;0m INFO 09-19 15:53:50 model_runner.py:1008] Loading model weights took 16.4634 GB
[1;36m(VllmWorkerProcess pid=677202)[0;0m INFO 09-19 15:53:51 model_runner.py:1008] Loading model weights took 16.4634 GB
[1;36m(VllmWorkerProcess pid=677198)[0;0m INFO 09-19 15:53:51 model_runner.py:1008] Loading model weights took 16.4634 GB
INFO 09-19 15:53:58 distributed_gpu_executor.py:57] 

In [3]:

from datasets import load_dataset

ds=load_dataset("kanhatakeyama/material-properties",split="Bradley")
ds=ds.shuffle(seed=1)
len(ds)

24889

In [4]:
train_ds = ds.select(range(24000))
test_ds = ds.select(range(24000, 24800))
train_ds[0]



{'CompName': '(2Z)-2-(1,3-Benzodioxol-5-ylmethylene)-6,7-dihydro-5H-[1,3]thiazolo[3,2-a]pyrimidin-3(2H)-one',
 'SMILES': 'O=C1C(SC2=NCCCN12)=Cc4ccc3OCOc3c4',
 'Property': 'Melting temperature',
 'Value': 151.35,
 'unit': '[oC]',
 'Source': 'BradleyMeltingPointDataset'}

In [5]:
import random
prompt_list=[]
problems=[]
n_records=200
target_ds=test_ds
n_records=min(n_records,len(target_ds))
for i in range(n_records):
    record=random.choice(target_ds)
    q=f"Predict "+record["Property"]+" "+record["unit"]+" for "+record["CompName"]+" (Compound X) with SMILES "+str(record["SMILES"])+". The prediction consists of #Reason and #Prediction. The #Reason is the quantitative explanation of the prediction. The #Prediction is the predicted value and the unit of the prediction."
    messages=[
        {"role": "user", "content": q},
    ]
    prompt=tokenizer.apply_chat_template(messages,tokenize=False)
    prompt_list.append(prompt+ "assistant\n\n#Reason\n")
    problems.append(record)


In [7]:
import glob
from tqdm import tqdm
from src.prop.utils import parse_Q_R_A_prediction
lora_model_list=glob.glob("output0919_threshold_0.1_lora_kqvo_proj/checkpoint-*")
lora_model_list= sorted(lora_model_list, key=lambda x: int(x.split('-')[-1]))

prediction_dict={}

cnt=0
for lora_model in tqdm(lora_model_list[:]):
    cnt+=1
    predicted_texts=llm_gen(model,prompt_list,
                            enable_lora=True,
                            lora_path=lora_model,
                            lora_id=cnt)
    good_records=parse_Q_R_A_prediction(predicted_texts,problems)
    prediction_dict[lora_model]=good_records

Processed prompts: 100%|██████████| 200/200 [01:03<00:00,  3.13it/s, est. speed input: 374.35 toks/s, output: 789.15 toks/s]
  0%|          | 0/4 [01:03<?, ?it/s]


TypeError: parse_Q_R_A_prediction() takes 1 positional argument but 2 were given

In [None]:

predicted_texts=llm_gen(model,prompt_list,
                        enable_lora=False,
                        )
good_records=parse_Q_R_A_prediction(predicted_texts)
prediction_dict["original"]=good_records

In [12]:
#import json 
#with open("data/predict/0919prediction_dict_kqvo_proj.json","w") as f:
#    json.dump(prediction_dict,f,indent=2)

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

for lora_name,good_records in prediction_dict.items():

    df=pd.DataFrame(good_records)

    plt.figure(figsize=(15, 5))
    sns.histplot(df[df["error_rate"]<2], x="error_rate", hue="Property", multiple="stack", 
                bins=20)


    s_df=df[df["error_rate"]<1]
    median1=s_df["error_rate"].median()
    median2=df["error_rate"].median()
    plt.title(f"lora model {lora_name} {median1} {median2}")