# Check model training

__Objective:__ establish whether the model's weights are actually moving between different training checkpoints.

In [48]:
import os
import pandas as pd
import torch
from transformers import AutoModelForSequenceClassification

In [8]:
EXPERIMENT_ID = 'majority_vote_model_binarized_labels_restricted_data_5'
MODEL_OUTPUT_DIR = f'/data1/moscato/personalised-hate-boundaries-data/models/{EXPERIMENT_ID}/'

checkpoints = [
    int(dirname.split('-')[-1]) for dirname in os.listdir(MODEL_OUTPUT_DIR)
    if 'checkpoint' in dirname
]

checkpoints

[1252, 1565]

In [15]:
classifier_checkpoint_0 = AutoModelForSequenceClassification.from_pretrained(
    os.path.join(MODEL_OUTPUT_DIR, f'checkpoint-{checkpoints[0]}/')
)

classifier_checkpoint_1 = AutoModelForSequenceClassification.from_pretrained(
    os.path.join(MODEL_OUTPUT_DIR, f'checkpoint-{checkpoints[1]}/')
)

In [51]:
parameters_data = []

for (p0_name, p0), (p1_name, p1) in zip(classifier_checkpoint_0.named_parameters(), classifier_checkpoint_1.named_parameters()):
    if p0_name != p1_name:
        raise Exception('Parameters considered are different between the two models')

    with torch.no_grad():
        p0_sum = p0.sum().numpy()
        p1_sum = p1.sum().numpy()

    tensors_equal = (p0 == p1).all().numpy()

    parameters_data.append({
        'name': p0_name,
        'p0_sum': p0_sum,
        'p1_sum': p1_sum,
        'tensors_equal':tensors_equal
    })
        
parameters_data = pd.DataFrame(parameters_data)

In [52]:
parameters_data

Unnamed: 0,name,p0_sum,p1_sum,tensors_equal
0,deberta.embeddings.word_embeddings.weight,74325.766,74325.77,False
1,deberta.embeddings.LayerNorm.weight,472.65997,472.65997,False
2,deberta.embeddings.LayerNorm.bias,-10.93608,-10.936094,False
3,deberta.encoder.layer.0.attention.self.query_p...,18.024467,18.024088,False
4,deberta.encoder.layer.0.attention.self.query_p...,8.226106,8.22612,False
...,...,...,...,...
197,deberta.encoder.LayerNorm.bias,-18.938795,-18.938017,False
198,pooler.dense.weight,-33.23482,-33.243042,False
199,pooler.dense.bias,0.6090887,0.6094283,False
200,classifier.weight,0.2011265,0.20112672,False


In [53]:
parameters_data['tensors_equal'].mean()

np.float64(0.0)