In [34]:
!pip install transformers torch optimum

Collecting optimum
  Downloading optimum-1.25.3-py3-none-any.whl.metadata (16 kB)
Downloading optimum-1.25.3-py3-none-any.whl (429 kB)
Installing collected packages: optimum
Successfully installed optimum-1.25.3


In [35]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
import shutil

In [37]:
model_name = "distilgpt2"

In [38]:
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
 
orig_dir = "./distilgpt2_original"
float16_dir = "./distilgpt2_float16"
 
# Clean up folders if they exist
if os.path.exists(orig_dir):
    shutil.rmtree(orig_dir)
if os.path.exists(float16_dir):
    shutil.rmtree(float16_dir)
 
# Save original model and tokenizer
model.save_pretrained(orig_dir)
tokenizer.save_pretrained(orig_dir)
 
def get_size(path):
    total_size = 0
    for dirpath, _, filenames in os.walk(path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total_size += os.path.getsize(fp)
    return total_size / (1024 * 1024)  # MB
 
print(f"Original model size: {get_size(orig_dir):.2f} MB")
 
# Print dtype of the first model parameter before conversion
print("Dtype before float16 conversion:", next(model.parameters()).dtype)
 
# Convert model weights to float16
model.half()
 
# Print dtype after conversion
print("Dtype after float16 conversion:", next(model.parameters()).dtype)
 
# Save float16 model weights and tokenizer
model.save_pretrained(float16_dir)
tokenizer.save_pretrained(float16_dir)
 
print(f"Float16 model size: {get_size(float16_dir):.2f} MB")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


Original model size: 315.69 MB
Dtype before float16 conversion: torch.float32
Dtype after float16 conversion: torch.float16
Float16 model size: 159.45 MB
