In [1]:
import numpy as np
from numpy.linalg import pinv,inv
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

from scipy import linalg
from sklearn.linear_model import LogisticRegression

from sklearn.utils.extmath import svd_flip
from sklearn.decomposition import PCA

from tqdm import tqdm
import pickle

import torch
from transformers import AutoModelForTokenClassification

In [2]:
with open('embeddings.pickle', 'rb') as handle:
    eval_dict = pickle.load(handle)

In [3]:
TRAIN_LANGS = ["en", "zh", "es", "de", "nl"]

lang_color = {
    'en': 'r',
    'zh': 'b',
    'es': 'g',
    'de': 'yellow',
    'nl': 'orange'
}

label_color = {
    'B-LOC': 'r',
    'B-MISC': 'g',
    'B-ORG': 'b',
    'B-PER': 'yellow',
    'I-LOC': 'r',
    'I-MISC': 'g',
    'I-ORG': 'b',
    'I-PER': 'yellow',
    'O': 'orange'
}

label_color_small = {
    'LOC': 'r',
    'MISC': 'g',
    'ORG': 'b',
    'PER': 'yellow',
    'O': 'orange'
}

In [4]:
embeddings_arr = []
words_arr = []
refs_arr = []

# merge all languages data
for lang in tqdm(TRAIN_LANGS):
    embeddings_arr.extend(eval_dict[lang]['embeddings'])
    words_arr.extend(eval_dict[lang]['words'])
    refs_arr.extend(eval_dict[lang]['refs'])

100%|██████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 184.38it/s]


In [5]:
get_lang_color = lambda i: lang_color[i]
get_label_color = lambda i: label_color[i]

color_lang_arr = []
color_label_arr = []
lang_starting_index = {}

for lang in TRAIN_LANGS:
    print(f"{lang} starts from {len(color_lang_arr)}")
    lang_starting_index[lang] = len(color_lang_arr)
    
    color_lang_arr.extend(list(map(get_lang_color, eval_dict[lang]['langs'])))
    
    
color_label_arr.extend(list(map(get_label_color, refs_arr)))      

en starts from 0
zh starts from 46364
es starts from 202376
de starts from 253577
nl starts from 305273


In [6]:
color_lang_arr = np.array(color_lang_arr)  
color_label_arr = np.array(color_label_arr)  
words_arr = np.array(words_arr)
refs_arr = np.array(refs_arr)

In [7]:
# Classifier
path_3 = '/mnt/xtb/knarik/outputs/DG/models/domain_en_de_zh_epoch_50'
path_4 = '/mnt/xtb/knarik/outputs/DG/classifiers/domain_en_de_zh_epoch_50_en_de_zh_es_epoch_50'
path_1 = '/mnt/xtb/knarik/outputs/DG/classifiers/domain_en_de_zh_epoch_50_es_epoch_50'
path_0 = '/mnt/xtb/knarik/outputs/DG/models/domain_es_epoch_50'


classifier_3 = AutoModelForTokenClassification.from_pretrained(path_3, output_hidden_states=True, num_labels=9).classifier
classifier_4 = AutoModelForTokenClassification.from_pretrained(path_4, output_hidden_states=True, num_labels=9).classifier
classifier_1 = AutoModelForTokenClassification.from_pretrained(path_1, output_hidden_states=True, num_labels=9).classifier
classifier_0 = AutoModelForTokenClassification.from_pretrained(path_0, output_hidden_states=True, num_labels=9).classifier


['B-LOC', 'B-MISC', 'B-ORG', 'B-PER', 'I-LOC', 'I-MISC', 'I-ORG', 'I-PER', 'O']

In [8]:
# en_de_zh_es
B_loc_4_w = classifier_4.weight[0].detach().numpy()
I_loc_4_w = classifier_4.weight[4].detach().numpy()




# es after fine-tune on en_de_zh
B_loc_1_w = classifier_1.weight[0].detach().numpy()
I_loc_1_w = classifier_1.weight[4].detach().numpy()

In [9]:
R_b_loc =  np.outer(1/(B_loc_4_w.shape[0] * B_loc_4_w), B_loc_1_w)

In [10]:
R_b_loc.shape

(768, 768)

In [11]:
R_i_loc =  np.outer(1/(I_loc_4_w.shape[0] * I_loc_4_w), I_loc_1_w)

In [12]:
R_b_loc

array([[ 0.00058771,  0.00078463, -0.00132883, ...,  0.00127244,
        -0.00022043,  0.00061833],
       [ 0.00132022,  0.00176257, -0.00298505, ...,  0.0028584 ,
        -0.00049517,  0.001389  ],
       [-0.00067242, -0.00089772,  0.00152035, ..., -0.00145585,
         0.0002522 , -0.00070745],
       ...,
       [ 0.00092774,  0.00123858, -0.00209763, ...,  0.00200863,
        -0.00034796,  0.00097607],
       [-0.00146262, -0.00195267,  0.00330701, ..., -0.00316669,
         0.00054857, -0.00153882],
       [ 0.00075276,  0.00100497, -0.001702  , ...,  0.00162979,
        -0.00028233,  0.00079198]], dtype=float32)

In [13]:
R_i_loc

array([[0.00090264, 0.0006426 , 0.00054664, ..., 0.00027378, 0.0003834 ,
        0.00046891],
       [0.00148788, 0.00105923, 0.00090105, ..., 0.00045128, 0.00063198,
        0.00077293],
       [0.00168553, 0.00119994, 0.00102075, ..., 0.00051123, 0.00071593,
        0.00087561],
       ...,
       [0.00397137, 0.00282726, 0.00240505, ..., 0.00120454, 0.00168685,
        0.00206307],
       [0.00277359, 0.00197455, 0.00167968, ..., 0.00084125, 0.00117809,
        0.00144084],
       [0.00501347, 0.00356914, 0.00303614, ..., 0.00152061, 0.00212949,
        0.00260443]], dtype=float32)

In [14]:
# en_de_zh_es
B_per_4_w = classifier_4.weight[3].detach().numpy()
I_per_4_w = classifier_4.weight[7].detach().numpy()




# es after fine-tune on en_de_zh
B_per_1_w = classifier_1.weight[3].detach().numpy()
I_per_1_w = classifier_1.weight[7].detach().numpy()

In [15]:
R_b_per =  np.outer(1/(B_per_4_w.shape[0] * B_per_4_w), B_per_1_w)
R_i_per =  np.outer(1/(I_per_4_w.shape[0] * I_per_4_w), I_per_1_w)

In [16]:
R_b_per

array([[ 0.0018879 , -0.00545456, -0.00420899, ..., -0.00245884,
         0.00352651, -0.00324994],
       [-0.00049065,  0.0014176 ,  0.00109388, ...,  0.00063903,
        -0.00091651,  0.00084463],
       [-0.00061961,  0.00179019,  0.00138139, ...,  0.00080699,
        -0.0011574 ,  0.00106663],
       ...,
       [-0.00077876,  0.00225   ,  0.00173621, ...,  0.00101427,
        -0.00145468,  0.0013406 ],
       [ 0.00068991, -0.0019933 , -0.00153812, ..., -0.00089855,
         0.00128872, -0.00118765],
       [-0.00063919,  0.00184676,  0.00142505, ...,  0.00083249,
        -0.00119398,  0.00110034]], dtype=float32)

In [17]:
R_i_per

array([[ 0.00056008,  0.00267074, -0.00246963, ...,  0.00041602,
         0.00159905,  0.00076995],
       [ 0.00036435,  0.00173741, -0.00160658, ...,  0.00027064,
         0.00104024,  0.00050088],
       [-0.00020799, -0.00099177,  0.0009171 , ..., -0.00015449,
        -0.0005938 , -0.00028592],
       ...,
       [ 0.0042955 ,  0.02048292, -0.01894057, ...,  0.00319065,
         0.01226372,  0.00590503],
       [ 0.00032748,  0.00156156, -0.00144398, ...,  0.00024325,
         0.00093495,  0.00045018],
       [ 0.00064464,  0.00307395, -0.00284249, ...,  0.00047883,
         0.00184047,  0.00088619]], dtype=float32)

In [18]:
np.linalg.det(R_b_per)

0.0

In [19]:
R_b_loc_per =  np.outer(1/(B_loc_4_w.shape[0] * B_loc_4_w), B_per_4_w)
R_b_per_loc =  np.outer(1/(B_per_4_w.shape[0] * B_per_4_w), B_loc_4_w)

In [20]:
np.linalg.det(R_b_loc_per), np.linalg.det(R_b_per_loc)

(0.0, 0.0)