In [2]:
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM


In [3]:
#load the model
model_path = 'darwin-7B'
print("loading model, path:", model_path)
tokenizer = LlamaTokenizer.from_pretrained(model_path)

model = LlamaForCausalLM.from_pretrained(
    model_path,
    load_in_8bit=False,
    torch_dtype=torch.float16,
    device_map="auto"
)

loading model, path: darwin/mix_training2


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
def generate_prompt(instruction, input=None):
    if input:
        return f"""The following is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
    ### Instruction:
    {instruction}
    ### Input:
    {input}
    ### Response:"""
    else:
        return f"""The following is an instruction that describes a task. Write a response that appropriately completes the request.
    ### Instruction:
    {instruction}
    ### Response:"""

def process_response(response):
    response = response.split('Response: ')[1].split('\n')[0]
    return response

def evaluate(instruction,
             input = None,
             temperature = 0.8,
             top_p = 0.75,
             top_k=40,
             do_sample=True,
             repetition_penalty=1.0,
             max_new_tokens=256,
             **kwargs):
    prompt = generate_prompt(instruction,input)
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
    generated_ids = model.generate(
        input_ids, 
        max_new_tokens=max_new_tokens, 
        do_sample=do_sample, 
        repetition_penalty=repetition_penalty, 
        temperature=temperature, 
        top_p=top_p, 
        top_k=top_k,
        **kwargs
    )
    response = tokenizer.decode(generated_ids[0])
    response = process_response(response)
    return response


In [5]:
for instruction in [
    'Write lipophilicity of given SMILES. CC(C)C(NC(=O)CN1C(=O)C(=CN=C1C2CCCCC2)NC(=O)OCc3ccccc3)C(=O)C(F)(F)F',
    'Given compound, write its potential SELFIES. Decalin',
    'What is water solubility expressed as a logarithm in mol/L of given compound in room temperature? Methyl acrylate',
    'Tell me if given composition has glass formation ability. Ni53.5B44C2.5',
    'Is composition metal? InSb2S4Cl'
]:
    print("Instruction:",instruction)
    print('Response:',evaluate(instruction))
    print('------------------')

Instruction: Write lipophilicity of given SMILES. CC(C)C(NC(=O)CN1C(=O)C(=CN=C1C2CCCCC2)NC(=O)OCc3ccccc3)C(=O)C(F)(F)F




Response: 2.26
------------------
Instruction: Given compound, write its potential SELFIES. Decalin
Response: [C][C][C][C][C][C][C][C][C][Ring1][Branch1][C][Ring1][=Branch2][C][Ring1][=C][C][Ring1][#Branch2][C][Ring1][=C][C][Ring1][=C][Ring2][Ring1][Ring1][Ring2][Ring1][Ring1][C]
------------------
Instruction: What is water solubility expressed as a logarithm in mol/L of given compound in room temperature? Methyl acrylate
Response: -0.22
------------------
Instruction: Tell me if given composition has glass formation ability. Ni53.5B44C2.5
Response: No, Ni53.5B44C2.5 does not have glass formation ability.
------------------
Instruction: Is composition metal? InSb2S4Cl
Response: No, InSb2S4Cl is not metal.
------------------
