In [7]:
%matplotlib inline
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd
import argparse
import re
import time
import glob
import joblib
import sys

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn import preprocessing
import torch
from transformers import TrainingArguments, Trainer
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import EarlyStoppingCallback
from transformers.integrations import AzureMLCallback
from transformers import AutoTokenizer, DataCollatorWithPadding
from torchsummary import summary

sys.path.append(os.path.join(os.path.join(os.getcwd(), ".."), 'project'))
from train_transformer import get_model, adjust_tokenizer, compute_metrics, get_encode_labels, tokenize_function, generate_tokenized_dataset, get_datasets, test_model
from utils import *
# from utils import get_valid_runs, get_highest_performing_model, get_dataset


In [2]:
from azureml.core import Run
import azureml
import mlflow
from azureml.core import Workspace, Dataset, Environment

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)
print("MLflow version:", mlflow.version.VERSION)


Azure ML SDK Version:  1.44.0
MLflow version: 1.26.1


In [3]:
ws = Workspace.from_config()
# mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep='\n')


Workspace name: scsccps-dsai-aide-dev-mlw
Azure region: canadacentral
Subscription id: 105efa68-0ff4-486f-ae3a-86e28a447237
Resource group: scsc-dsai-aide-dev-rg


In [4]:
from azureml.core import Experiment

script_folder = './project'
os.makedirs(script_folder, exist_ok=True)

exp = Experiment(workspace=ws, name='transformer_hp')
# mlflow.set_experiment('transformer_hp')

In [11]:
dir = 'output_120'
model_directory = f'{dir}/outputs/model'
print(f'the output path: [{model_directory}]')

model = AutoModelForSequenceClassification.from_pretrained(model_directory, num_labels=121)
tokenizer = AutoTokenizer.from_pretrained(model_directory)
le=joblib.load(model_directory + '/labelEncoder.joblib')
print('Model objects and their dependencies are loaded')

the output path: [output_120/outputs/model]
Model objects and their dependencies are loaded


In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
model.zero_grad()
print(device)

cuda:0


In [17]:
counter = 0
parameter_target = None
for name, parameter in model.named_parameters():
    if name == 'bert.encoder.layer.2.attention.self.query.weight':
        parameter_target = parameter
        print(name)
        counter += 1
    # print(parameter)

counter

bert.encoder.layer.2.attention.self.query.weight


1

In [18]:
model_base = AutoModelForSequenceClassification.from_pretrained('bert-base-cased', num_labels=121)

counter = 0
parameter_target_base = None
for name, parameter in model_base.named_parameters():
    if name == 'bert.encoder.layer.2.attention.self.query.weight':
        parameter_target_base = parameter
        print(name)
        counter += 1
    # print(parameter)

counter

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

In [26]:
parameter_target.to(torch.device('cpu')) == parameter_target_base.to(torch.device('cpu'))


tensor([[False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]])

In [27]:
model_base_2 = AutoModelForSequenceClassification.from_pretrained('bert-base-cased', num_labels=121)

counter = 0
parameter_target_base_2 = None
for name, parameter in model_base_2.named_parameters():
    if name == 'bert.encoder.layer.2.attention.self.query.weight':
        parameter_target_base_2 = parameter
        print(name)
        counter += 1
    # print(parameter)

counter

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

bert.encoder.layer.2.attention.self.query.weight


1

In [29]:
parameter_target.to(torch.device('cpu')) == parameter_target_base_2.to(torch.device('cpu'))


tensor([[False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]])

In [None]:
parameter_target.to(torch.device('cpu')) == parameter_target_base_2.to(torch.device('cpu'))


In [34]:
model_base = AutoModelForSequenceClassification.from_pretrained('bert-base-cased', num_labels=12)

li_shapes = []
counter = 0
parameter_target_base = None
for name, parameter in model_base.named_parameters():
    x = parameter.shape[0]
    y = parameter.shape[0]
    li_shapes.append(x * y)
    # if name == 'bert.encoder.layer.2.attention.self.query.weight':
    #     parameter_target_base = parameter
    #     print(name)
    #     counter += 1
    # print(parameter)

counter

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

0

In [35]:
sum(li_shapes)


1168972596