In [None]:
import os
os.environ.get("HF_MODEL")

In [2]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
from transformers import BitsAndBytesConfig, AutoModelForCausalLM
import torch, os

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
hf_home = os.environ.get("HF_MODEL")
if not hf_home:
    raise EnvironmentError("HF_MODEL environment variable is not set")

model_path = os.path.join(hf_home, model_name)

if not os.path.exists(model_path):
    os.makedirs(model_path)
    print(f"Created directory: {model_path}")
else:
    print(f"Directory already exists: {model_path}")
    
bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
   # quantization_config=bnb_config,
    token=os.environ["HF_TOKEN"]
)

In [None]:
model.save_pretrained(model_path, from_pt=True) 

In [None]:
import os
hf_tokenizer=os.environ.get("HF_TOKENIZER")
tokenizers_path = os.path.join(hf_tokenizer, model_name)

In [None]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        cache_dir=model_path
    )
tokenizer.save_pretrained(tokenizers_path)