In [None]:
! pip install transformers torch accelerate huggingface-hub huggingface-cli hf-transfer

In [None]:
from transformers import TextStreamer

def count_parameters(model):
    # Calculate the number of parameters in billions
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) / 10**9
    print(f"Model size: {num_params:.3f}B parameters")
    return int(num_params)

def generate(model, tokenizer, inputs, max_new_tokens=50):
    text_streamer = TextStreamer(tokenizer)
    _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = max_new_tokens)

<img src="./assets/model_scores_llama_3_8B.png" width=500>

## Load Untrained Downcycled Model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import os

os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

# Load model, config and tokenizer
model_name = "prince-canuma/Llama-3-6B-v0"
untrained_model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

## Load Pretrained Downcycled Model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer


# Load model, config and tokenizer
model_name = "prince-canuma/Llama-3-6B-v0.1"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
count_parameters(model)

In [None]:
inputs = tokenizer(
[
   "The Eifel tower is located in"
], return_tensors = "pt")


In [None]:
generate(untrained_model, tokenizer, inputs)

In [None]:
generate(model, tokenizer, inputs)