# Model Training

In [5]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "m-a-p/OpenCodeInterpreter-DS-6.7B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
# pretrained_model = AutoModelForCausalLM.from_pretrained(model_name)

In [8]:
import torch

# Initialize an empty list to store the tensors
tensor_list = []

# Create 10 tensors, each with shape [1, 10]
for _ in range(3):
    tensor = torch.randn(1, 10)  # You can replace torch.randn with any other initialization method
    tensor_list.append(tensor)


In [10]:
logits = torch.cat(tensor_list, dim=0)
torch.log2(torch.std(logits.float(), 0)).sum()

tensor(-4.4591)

# Parameters manipulation and count

In [90]:
for param in model.parameters():
    param.requires_grad = False

# for i in range(0, 31):
#     for param in model.model.layers[i].parameters():
#         param.requires_grad = True

for param in model.model.layers[0].parameters():
    param.requires_grad = True

In [96]:
total_param = sum(p.numel() for p in model.parameters())
trinable_param = sum(p.numel() for p in model.parameters() if p.requires_grad)

percentages = trinable_param / total_param * 100

print(f"Percentage of trainable parameters: {percentages:.2f}%")

Percentage of trainable parameters: 3.00%


In [98]:
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    trainable_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad:
            continue
        params = parameter.numel()
        table.add_row([name, params])
        trainable_params += params
    print(table)
    print(f"Total Trainable Params: {trainable_params}")
    total_param = sum(p.numel() for p in model.parameters())
    percentage = trainable_params/total_param * 100

    return print(f"\nPercentage of trainable parameters: {percentage:.2f}%")
    
count_parameters(model)

+------------------------------------------------+------------+
|                    Modules                     | Parameters |
+------------------------------------------------+------------+
|     model.layers.0.self_attn.q_proj.weight     |  16777216  |
|     model.layers.0.self_attn.k_proj.weight     |  16777216  |
|     model.layers.0.self_attn.v_proj.weight     |  16777216  |
|     model.layers.0.self_attn.o_proj.weight     |  16777216  |
|      model.layers.0.mlp.gate_proj.weight       |  45088768  |
|       model.layers.0.mlp.up_proj.weight        |  45088768  |
|      model.layers.0.mlp.down_proj.weight       |  45088768  |
|     model.layers.0.input_layernorm.weight      |    4096    |
| model.layers.0.post_attention_layernorm.weight |    4096    |
+------------------------------------------------+------------+
Total Trainable Params: 202383360

Percentage of trainable parameters: 3.00%
