In [None]:
! pip install transformers torch accelerate huggingface-hub huggingface-cli hf-transfer

In [None]:
def count_parameters(model):
    # Calculate the number of parameters in billions
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) / 10**9
    print(f"Model size: {num_params:.3f}B parameters")
    return int(num_params)


## Load Reference Model

In [None]:
from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
import os

os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

# Load meta-llama/Meta-Llama-3-8B model, config and tokenizer
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_name)
config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
count_parameters(model)

In [None]:
model

In [None]:
def extract_model_weights(reference_model, n_layers):
    params = {}
    current_layer = 0  # To keep track of the main layer count

    # Iterate over all named modules
    for name, module in reference_model.named_modules():

        # Check and store parameters
        if hasattr(module, 'weight') and module.weight is not None:
            params[name + '.weight'] = module.weight.data.clone()
        if hasattr(module, 'bias') and module.bias is not None:
            params[name + '.bias'] = module.bias.data.clone()

        if 'model.layers.' in name:
            # Check the layer index
            layer_index = int(name.split('.')[2])  # This splits the name and gets the third element
            if layer_index > current_layer:
                current_layer = layer_index
                if current_layer > n_layers-1:
                    break  # Stop after reaching the specified main layer

    norm_layer = model.model.norm  # Adjust this path based on your model's architecture
    if hasattr(norm_layer, 'weight') and norm_layer.weight is not None:
        params['model.norm.weight'] = norm_layer.weight.data.clone()
    if hasattr(norm_layer, 'bias') and norm_layer.bias is not None:
        params['model.norm.bias'] = norm_layer.bias.data.clone()

    lm_head = reference_model.lm_head
    if hasattr(lm_head, 'weight') and lm_head.weight is not None:
        params["lm_head.weight"] = lm_head.weight.data
    if hasattr(lm_head, 'bias') and lm_head.bias is not None:
        params["lm_head.bias"] = lm_head.bias.data

    return params


In [None]:
target_model_n_layers = 24
pretrained_weights = extract_model_weights(model, target_model_n_layers)

In [None]:
from transformers import AutoModelForCausalLM, AutoConfig
config = AutoConfig.from_pretrained(model_name)
config.num_hidden_layers = target_model_n_layers
target_model = AutoModelForCausalLM.from_config(config)


In [None]:
target_model_size = count_parameters(target_model)

In [None]:
target_model.load_state_dict(pretrained_weights)


In [None]:
inputs = tokenizer(
[
   "What is Python"
], return_tensors = "pt")

# inputs = tokenizer.apply_chat_template(
#     [
#         # {"content":"","role":"system"},
#         {"content":"""Given the question: Read the article and select the best
#          answer. Article: Can you swim? Do you like swimming? Well, how can you
#          learn to swim? I think the best way is to go into the water and learn.
#         I'm afraid you'll never learn to swim just by reading books about
#         Swimming or looking at others swimming. It's the same with the English
#         study. We must practice, practice and practice. Listening and speaking
#         are very important for beginners. We can listen to English programs on radio.
#         You may just understand a few words. It doesn't matter. Just be relaxed,
#         try to catch every word. Somebody may be a good listener, but he is afraid
#         to speak because he's afraid of making mistakes. You know we sometimes
#         make mistakes when we speak Chinese. Don't be afraid. We must be brave.
#         If you really want to learn English well, you must try to speak with
#         everyone as long as he knows English. When there's nobody to talk with,
#         you can talk to yourself in English. It's interesting and also a good
#         way to practice your spoken English. Remember, the more you speak, the
#         fewer mistakes you'll make. Reading and writing are more important for
#         senior school students. First we must choose the books we're interested
#         in. A lot of reading will improve your language sense.
#         This is very important. It's easier said than done. Well, let's do
#         more practice from now on. I'm sure you'll learn English well in this
#         way. ,A, B, C, D,. (10)
#         Question: Which is the best title for the passage?
#         Options:
#             A: How to Learn English.
#             B: Easier Said Than Done.
#             C: Listen First, Speak Second.
#             D: How to learn to Swim.\n
#         The answer is:""","role":"user"}
#     ], add_generation_prompt=True, return_tensors='pt',
# )

In [None]:
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = target_model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

In [None]:
target_model.push_to_hub("Llama-3-6B-Instruct-v0.1")
tokenizer.push_to_hub("Llama-3-6B-Instruct-v0.1")

# Downcycling by getting the first X layers and last X layers 

Where X is a N/2.

For instance, if our target number layers is 24 then X will be 24/2 = 12.

In [None]:
target_model_n_layers = 24
weights_1 =  model.model.layers[:target_model_n_layers//2]
weights_2 = model.model.layers[-target_model_n_layers//2:]

# Assuming 'model' is your pre-existing large model
# This part is conceptual, assuming the model is split into exactly 24 layers evenly.

# Extract weights for the first 12 layers
weights_1 = {f'model.layers.{k}': v.clone() for k, v in weights_1.state_dict().items() }

# Extract weights for the last 12 layers
weights_2 = {f'model.layers.{k}': v.clone() for k, v in weights_2.state_dict().items()}


In [None]:
# Get remainder modules weights
weights_1["model.embed_tokens.weight"] = model.model.state_dict()['embed_tokens.weight']
weights_2["model.norm.weight"] = model.model.state_dict()['norm.weight']
weights_2["lm_head.weight"] = model.state_dict()['lm_head.weight']

In [None]:
import re
def update_layer_numbers(state_dict):
    new_state_dict = {}
    # Regular expression to find and manipulate the layer numbers
    pattern = re.compile(r'model.layers.(\d+)')

    for key, value in state_dict.items():
        # Search for the pattern and update
        new_key = pattern.sub(lambda x: f"model.layers.{int(x.group(1)) + 12}", key)
        new_state_dict[new_key] = value

    return new_state_dict


weights_2 = update_layer_numbers(weights_2)

In [None]:
from transformers import AutoModelForCausalLM, AutoConfig
config = AutoConfig.from_pretrained(model_name)
config.num_hidden_layers = target_model_n_layers
target_model = AutoModelForCausalLM.from_config(config)

In [None]:
target_model.load_state_dict({**weights_1, **weights_2})

In [None]:
inputs = tokenizer.apply_chat_template(
    [
        # {"content":"","role":"system"},
        {"content":"""Given the question: Read the article and select the best
         answer. Article: Can you swim? Do you like swimming? Well, how can you
         learn to swim? I think the best way is to go into the water and learn.
        I'm afraid you'll never learn to swim just by reading books about
        Swimming or looking at others swimming. It's the same with the English
        study. We must practice, practice and practice. Listening and speaking
        are very important for beginners. We can listen to English programs on radio.
        You may just understand a few words. It doesn't matter. Just be relaxed,
        try to catch every word. Somebody may be a good listener, but he is afraid
        to speak because he's afraid of making mistakes. You know we sometimes
        make mistakes when we speak Chinese. Don't be afraid. We must be brave.
        If you really want to learn English well, you must try to speak with
        everyone as long as he knows English. When there's nobody to talk with,
        you can talk to yourself in English. It's interesting and also a good
        way to practice your spoken English. Remember, the more you speak, the
        fewer mistakes you'll make. Reading and writing are more important for
        senior school students. First we must choose the books we're interested
        in. A lot of reading will improve your language sense.
        This is very important. It's easier said than done. Well, let's do
        more practice from now on. I'm sure you'll learn English well in this
        way. ,A, B, C, D,. (10)
        Question: Which is the best title for the passage?
        Options:
            A: How to Learn English.
            B: Easier Said Than Done.
            C: Listen First, Speak Second.
            D: How to learn to Swim.\n
        The answer is:""","role":"user"}
    ], add_generation_prompt=True, return_tensors='pt',
)

In [None]:
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = target_model.generate(inputs, streamer = text_streamer, max_new_tokens = 128)

In [None]:
target_model.push_to_hub("Llama-3-6B-Instruct-Granite-v0.1")
tokenizer.push_to_hub("Llama-3-6B-Instruct-Granite-v0.1")