In [1]:
import torch
torch.cuda.is_available()

True

In [2]:
import torch
import os
import sys

print("Torch version:", torch.__version__)
print("Python:", sys.version)

# ---- Basic CUDA availability ----
print("CUDA available:", torch.cuda.is_available())
print("CUDA device count:", torch.cuda.device_count())

if torch.cuda.is_available():
    print("Current CUDA device:", torch.cuda.current_device())
    print("Device name:", torch.cuda.get_device_name(0))
else:
    print("No CUDA detected.")

# ---- Safeguards / fallback attempts ----

# 1. Ensure TORCH_USE_CUDA_DSA is off (can break some GPUs)
os.environ["TORCH_USE_CUDA_DSA"] = "0"

# 2. Try to globally set default tensor type if CUDA exists
try:
    if torch.cuda.is_available():
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
        print("Default tensor type set to CUDA FloatTensor.")
    else:
        print("Running on CPU tensors.")
except Exception as e:
    print("Failed to set default tensor type:", e)

# 3. Create a test tensor and move to GPU (if possible)
try:
    x = torch.randn(3, 3)
    if torch.cuda.is_available():
        x = x.to("cuda")
        print("Test tensor successfully moved to GPU.")
    else:
        print("Test tensor on CPU (no GPU).")
except Exception as e:
    print("Tensor GPU test failed:", e)

# 4. Check for bfloat16 support
if torch.cuda.is_available():
    try:
        bf16_supported = torch.cuda.is_bf16_supported()
        print("bfloat16 supported:", bf16_supported)
    except Exception:
        print("bfloat16 support check not available in this PyTorch version.")

# 5. Check GPU memory
if torch.cuda.is_available():
    try:
        alloc = torch.cuda.memory_allocated()
        reserved = torch.cuda.memory_reserved()
        print("Memory allocated:", alloc)
        print("Memory reserved :", reserved)
    except Exception as e:
        print("GPU memory query failed:", e)

print("\nFinal device being used:", "cuda" if torch.cuda.is_available() else "cpu")


Torch version: 2.9.1+cu130
Python: 3.13.5 | packaged by Anaconda, Inc. | (main, Jun 12 2025, 16:37:03) [MSC v.1929 64 bit (AMD64)]
CUDA available: True
CUDA device count: 1
Current CUDA device: 0
Device name: NVIDIA GeForce RTX 3080
Default tensor type set to CUDA FloatTensor.
Test tensor successfully moved to GPU.
bfloat16 supported: True
Memory allocated: 512
Memory reserved : 2097152

Final device being used: cuda


  _C._set_default_tensor_type(t)


In [3]:
import torch
# from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

model_name_or_id = "OpenDFM/ChemDFM-v1.5-8B"
# tokenizer = LlamaTokenizer.from_pretrained(model_name_or_id)
# model = LlamaForCausalLM.from_pretrained(model_name_or_id, torch_dtype=torch.float16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name_or_id, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)

input_text = "Can you please give detailed descriptions of the molecule below?\nCl.O=C1c2c(O)cccc2-c2nn(CCNCCO)c3ccc(NCCNCCO)c1c23"
input_text = f"[Round 0]\nHuman: {input_text}\nAssistant:"

inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
generation_config = GenerationConfig(
    do_sample=True,
    top_k=20,
    top_p=0.9,
    temperature=0.9,
    max_new_tokens=1024,
    repetition_penalty=1.05,
    eos_token_id=tokenizer.eos_token_id
)

outputs = model.generate(**inputs, generation_config=generation_config)
generated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0][len(input_text):]
print(generated_text.strip())

  from .autonotebook import tqdm as notebook_tqdm
`torch_dtype` is deprecated! Use `dtype` instead!
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. 

The molecule is the hydrochloride salt form of losoxantrone, an anthrapyrazole-based antineoplastic antibiotic. Losoxantrone intercalates into DNA, induces single- and double-stranded DNA breaks and inhibits topoisomerase II, thereby inhibiting DNA replication and repair as well as RNA and protein synthesis. Losoxantrone is less cardiotoxic than doxorubicin.
