In [None]:
import torch
import time
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.xpu.is_available():
    device = torch.device('xpu')
else:
    device = torch.device('cpu')

print(f'Using device: {device}')

In [None]:
# RUN THIS ONLY ONCE TO DOWNLOAD THE MODEL
# IF YOU HAVE THE MODEL ALREADY, SKIP THIS CELL AND RUN load_model cell
# https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct for llama3.2-3B-Instruct model
def download_model(device):
    '''
    Downloads the model and tokenizer from the Hugging Face model hub and save it in current directory.
    - need to specify the hugginface repo and model name
    '''
    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
    model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")  
    tokenizer.save_pretrained(".")
    model.save_pretrained(".")
    return tokenizer, model

tokenizer, model = download_model(device)
print(f"Model loaded on {device} in {time.time():.2f} seconds.")

In [None]:
# RUN THIS CELL IF YOU HAVE THE MODEL ALREADY
# Change the path to where you saved the model (should be a folder with 9 files for Llama 3.2-3B-Instruct-model)
model_path = "C:/Users/aivan/Desktop/BIOIN 401/Prompt Engineering/Llama 3.2-3B-Instruct-model"
def load_model():
    """
    Downloads the LLaMA model and tokenizer from the Hugging Face Hub.
    """
    tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
    model = AutoModelForCausalLM.from_pretrained(model_path, local_files_only=True).to(device)
    
    return tokenizer, model

tokenizer, model = load_model()
print(f"Model loaded on {device} in {time.time():.2f} seconds.")

In [4]:
# Cell 4: Run LLaMA Prompt
def run_llama_prompt(model, tokenizer, prompt, max_new_tokens=256):
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
    input_ids = inputs["input_ids"].to(model.device)
    attention_mask = inputs["attention_mask"].to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_k=50,
            pad_token_id=tokenizer.eos_token_id
        )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    completion = generated_text[len(prompt):].strip()
    return completion

In [None]:
# Cell 5: Define Prompt and Execute Inference
# you can use open() to read a file

# scientific_text = (
#     "Phosphorylation of the C-terminal tail of the Drosophila protein TFIID is required for its activation "
#     "as a transcription factor. Mutations in the taf7 gene that affect the phosphorylation of the C-terminal "
#     "tail of TFIID impair its ability to bind to DNA and regulate transcription. The taf7 gene encodes a subunit "
#     "of the TFIID complex that is essential for the assembly of the transcription factor on DNA. TFIID is a "
#     "heterotrimeric complex composed of the TAF7 subunit, the TAF2 subunit, and the TBP subunit."
# )
scientific_text = open("C:/Users/aivan/Desktop/BIOIN 401/GOLLM/data/fetched_articles_subsets/subset_1/291061.txt", "r").read()

prompt = (
    "-Goal-\n"
    "You will read a short abstract or scientific text describing one or more genes or proteins.\n"
    "Determine if it explicitly describes or suggests 'transcription regulator activity' (GO:0140110) "
    "or any closely related sub-activity (e.g., transcription repressor, transcription activator).\n\n"
    "-Steps-\n"
    "1. Identify all gene/protein mentions in the text (e.g., official symbols, synonyms).\n"
    "2. For each identified gene/protein, check if it is associated with transcription regulator activity "
    "or a direct sub-activity (like DNA-binding transcription factor activity).\n"
    "3. Output the results in JSON with the structure\n\n"
    "{\n"
    "  \"genes\": [\n"
    "    {\n"
    "      \"gene_symbol\": \"string\",\n"
    "      \"transcription_regulator_activity\": \"true/false\",\n"
    "      \"evidence_snippet\": \"text that suggests the activity\"\n"
    "    }\n"
    "  ]\n"
    "}\n\n"
    "-Example Format-\n"
    "{\n"
    "  \"genes\": [\n"
    "    {\n"
    "      \"gene_symbol\": \"TP53\",\n"
    "      \"transcription_regulator_activity\": \"true\",\n"
    "      \"evidence_snippet\": \"p53 is a transcription factor controlling cell cycle genes...\"\n"
    "    }\n"
    "  ]\n"
    "}\n\n"
    "--Important--\n"
    "Return only the JSON, with no extra commentary, explanation, or bullet points.\n"
    "\n"
    "-Scientific Text-\n"
    f"{scientific_text}\n"
)

start_time = time.time()
output = run_llama_prompt(model, tokenizer, prompt, max_new_tokens=256)
end_time = time.time()

print("LLM Response")
print(output)
print(f"Time taken: {end_time - start_time:.2f} seconds")