In [86]:
# ==============imports===================
import pandas as pd
import os

import matplotlib.pyplot as plt
plt.ioff()
import matplotlib.ticker as mtick
import seaborn as sns

In [87]:
# ==================args=====================

model = 'bloom-1b7'
checkpoints = ['10000', '50000', '100000', '150000', '200000', '250000']
# checkpoints = ['1000', '10000', '100000', '200000', '300000', '400000', '500000', '600000']

layer = 'inter-layer-17'

sns.set_theme()

In [88]:
# ======================read cos similarities==============================

fig, ax1 = plt.subplots(figsize=(5.5, 5))
plt.ioff()

for l in ['last-layer', layer]:

    cos_dict = pd.read_csv(f'csv_files/{model}_{l}_cos-similarity.csv', index_col=0).to_dict()

    avg_cos_dict = {}
    code_cos_dict = {}    

    for ckpt, cos_similarity in cos_dict.items():
        if ckpt == 'best':
            continue
        code_cos_dict[int(ckpt)] = cos_similarity['code']
        del cos_similarity['code']
        avg_cos_dict[int(ckpt)] = sum(cos_similarity.values())/len(cos_similarity.values())

    # plotting
    ckpts, cos_similarities = zip(*avg_cos_dict.items())
    code_cos_similarities = code_cos_dict.values()
    ax1.set_xlabel('global steps')
    ax1.set_ylabel('Cosine Similarities')
    ax1.tick_params(axis='y', grid_alpha=0.5)
    if l == 'last-layer':         
        ax1.plot(ckpts, cos_similarities, 'g--', label='nl-last-layer')
        ax1.plot(ckpts, code_cos_similarities, 'm--', label='code-last-layer')
    else:
        ax1.plot(ckpts, cos_similarities, 'g:', label='nl')
        ax1.plot(ckpts, code_cos_similarities, 'm:', label='code')        

In [89]:
# ===========================read overlap ratios============================
ax2 = ax1.twinx()

with open(f'csv_files/{model}_{layer}_avg-ovlp-rate-by-ckpt.txt', 'r') as f:
    lines = f.readlines()
    avg_ovlp_dict = {}
    for line in lines:
        list = line.split(',')
        print(list)
        if list[0] == 'best':
            continue
        avg_ovlp_dict[int(list[0])] = float(list[1])
    ckpts, avg_ovlps = zip(*avg_ovlp_dict.items())
    ax2.plot(ckpts, avg_ovlps, 'r-', label='avg')

['best', '0.1342079772079772\n']
['10000', '0.04760805860805861\n']
['50000', '0.06011884411884413\n']
['100000', '0.0765050875050875\n']
['150000', '0.08755718355718356\n']
['200000', '0.09470288970288969\n']
['250000', '0.10913268213268212\n']


In [90]:
ax1.legend(loc='center left', bbox_to_anchor=(-0.6,0.5))
ax1.set_ylabel('Cosine Similarities')

ax2.legend(loc='center right', bbox_to_anchor=(1.45,0.5))
ax2.set_ylabel('overlap rates')

fig.gca().yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1))
sns.set_theme()
plt.savefig(f'experiments/scatterplots/{model}/checkpoints_{layer}_cos-similarity.pdf', bbox_inches='tight')