In [1]:
!nvidia-smi

Tue May  6 10:31:14 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.230.02             Driver Version: 535.230.02   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GH200 120GB             On  | 00000009:01:00.0 Off |                    0 |
| N/A   27C    P0              88W / 900W |   1813MiB / 97871MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
import json
from pprint import pprint

import torch
import transformers
from environs import env
from transformers import AutoModelForCausalLM, AutoTokenizer, QuantoConfig, AutoProcessor, Llama4ForConditionalGeneration

from local_funcs import chat_funcs, prompt_funcs
from yiutils.project_utils import find_project_root

4.51.3
2.6.0


In [None]:
print(transformers.__version__)
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.version.cuda)

In [None]:
proj_root = find_project_root("justfile")
data_dir = proj_root / "data"

env.read_env(proj_root / ".env")
access_token = env("HUGGINGFACE_TOKEN")

True

# data load

In [6]:
path_to_mr_pubmed_data = (
    data_dir / "intermediate" / "mr-pubmed-data" / "mr-pubmed-data.json"
)
assert path_to_mr_pubmed_data.exists(), (
    f"Data file {path_to_mr_pubmed_data} does not exist."
)

with open(path_to_mr_pubmed_data, "r") as f:
    mr_pubmed_data = json.load(f)

In [7]:
article_data = mr_pubmed_data[0]
article_data

{'pmid': '38794754',
 'ab': "Alcohol consumption significantly impacts disease burden and has been linked to various diseases in observational studies. However, comprehensive meta-analyses using Mendelian randomization (MR) to examine drinking patterns are limited. We aimed to evaluate the health risks of alcohol use by integrating findings from MR studies. A thorough search was conducted for MR studies focused on alcohol exposure. We utilized two sets of instrumental variables-alcohol consumption and problematic alcohol use-and summary statistics from the FinnGen consortium R9 release to perform de novo MR analyses. Our meta-analysis encompassed 64 published and 151 de novo MR analyses across 76 distinct primary outcomes. Results show that a genetic predisposition to alcohol consumption, independent of smoking, significantly correlates with a decreased risk of Parkinson's disease, prostate hyperplasia, and rheumatoid arthritis. It was also associated with an increased risk of chronic 

# model init

## llama

### llama3

In [None]:
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"

device = "cuda"
dtype = torch.bfloat16
quantization_config = QuantoConfig(weights="int4")

llama3_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=access_token)
llama3_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=dtype,
    device_map=device,
    token=access_token,
    quantization_config=quantization_config,
)

Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████| 4/4 [00:07<00:00,  1.76s/it]


### llama4

In [None]:
MODEL_ID = "RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic"

device = "cuda"
dtype = torch.bfloat16
quantization_config = QuantoConfig(weights="int4")

llama4_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=access_token)
llama4_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=dtype,
    device_map=device,
    token=access_token,
    quantization_config=quantization_config,
)

Fetching 50 files:   0%|                                                                                     | 0/50 [00:00<?, ?it/s]Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `p

KeyboardInterrupt: 

## deepseek

### ds r1

In [None]:
# deepseek r1
MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"


device = "cuda"
dtype = torch.bfloat16
quantization_config = QuantoConfig(weights="int4")

ds_r1_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=access_token)
ds_r1_model = AutoModelForCausalLM.from_pretrained(
  MODEL_ID,
  torch_dtype=dtype,
  device_map=device,
  token=access_token,
  trust_remote_code=True,
  quantization_config=quantization_config,
)

### ds prover

In [None]:
# prover
MODEL_ID = "deepseek-ai/DeepSeek-Prover-V2-7B"


device = "cuda"
dtype = torch.bfloat16
quantization_config = QuantoConfig(weights="int4")

ds_prover_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=access_token)
ds_prover_model = AutoModelForCausalLM.from_pretrained(
  MODEL_ID,
  torch_dtype=dtype,
  device_map=device,
  token=access_token,
  trust_remote_code=True,
  quantization_config=quantization_config,
)

OSError: DeepSeek-Prover-V2-7B is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

## falcon

### falcon lite

In [None]:
MODEL_ID = "tiiuae/falcon-7b-instruct"


device = "cuda"
dtype = torch.bfloat16
quantization_config = QuantoConfig(weights="int4")

ds_prover_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=access_token)
ds_prover_model = AutoModelForCausalLM.from_pretrained(
  MODEL_ID,
  torch_dtype=dtype,
  device_map=device,
  token=access_token,
  trust_remote_code=True,
  quantization_config=quantization_config,
)

### falcon large

In [None]:
MODEL_ID = "tiiuae/falcon-40b-instruct"


device = "cuda"
dtype = torch.bfloat16
quantization_config = QuantoConfig(weights="int4")

ds_prover_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=access_token)
ds_prover_model = AutoModelForCausalLM.from_pretrained(
  MODEL_ID,
  torch_dtype=dtype,
  device_map=device,
  token=access_token,
  trust_remote_code=True,
  quantization_config=quantization_config,
)