In [1]:
base_path = "results_updated"


In [14]:
!tar -zxf {base_path}.tar.gz

In [2]:
import os, json
from collections import defaultdict

SCORES = []

params = defaultdict(set)
for base_folder, folders, files in os.walk(base_path):
    if files:
        date, rest = base_folder.split('/')[-1].split('-')
        time, time_suf, lang, smth, cat = rest.split('_')
        for file in files:
            dat = json.load(open(f"{base_folder}/{file}"))

            for k, v in dat["params"].items():
                if type(v) is str: params[k].add(v)
            xs = list(dat["results"]["test_score"].keys())
            ys = [el[0] for el in dat["results"]["test_score"].values()]
            score = dat["params"].copy()
            score["coord"] = xs
            score["score"] = ys
            SCORES.append(score)


In [3]:
len(SCORES)

2046

In [4]:
from itertools import product
from matplotlib import pyplot as plt
from datetime import datetime
import numpy as np
from collections import defaultdict
import random


CATEGORICAL_T = "categorical"
CONTINUOUS_T = "continuous"


In [5]:
task2type = {}
for probing_type in params["probing_type"]:
    categorical_ = input(f"is \"{probing_type}\" a categorical probing task? [Y/N]: ")
    if categorical_.lower().startswith('y'):
        type_ = CATEGORICAL_T
    else:
        type_ = CONTINUOUS_T
    task2type[probing_type] = type_

is "layer" a categorical probing task? [Y/N]:  N


In [6]:
axes2type = defaultdict(lambda: "continuous")
from itertools import cycle
colors = cycle(["#b6e1fc","#ccccff","#ede1ed","#d8ebd1"])

lev2col = defaultdict(lambda: next(colors))


CNT = 0
CHARTS = []
POOR_COORDS = []
GOOD_COORDS = set()

facet_values = defaultdict(set)
for score in SCORES:
    for facet_type in score:
        facet_value = score[facet_type]
        if type(facet_value) is dict: continue
        if type(facet_value) is list: facet_value=tuple(facet_value)
        facet_values[facet_type].add(facet_value)

def barchart(x_ax, y_ax, scores, reference_ordering=None):
    """takes the axes labels, exps results and (optional) ordering
    returns the properly ordered <x,y> points
    """
    xy_pairs = [(res[x_ax], res[y_ax]) for res in scores]
    
    if reference_ordering is None and axes2type[x_ax] == CATEGORICAL_T:
        xy_pairs = sorted(xy_pairs, key=lambda tup: tup[1])
    elif reference_ordering:
        xy_di = dict(xy_pairs)
        xy_pairs = [(x, xy_di[x]) for x in reference_ordering]
    if xy_pairs:
        xs, ys = zip(*xy_pairs)
    else: xs, ys = [], []
    return np.array(xs).flatten(), np.array(ys).flatten()

def matches_restriction(score, restriction):
    return all(score[k] == v for k,v in restriction.items())

def histogram_html(x_ax, y_ax, facet_coords):
    
    global SCORES, CHARTS, CNT, POOR_COORDS
    scores_to_plot = [score for score in SCORES if matches_restriction(score, facet_coords)]
    xs, ys = barchart(x_ax, y_ax, scores_to_plot)

    if len(xs):
        CHARTS.append({"coord": facet_coords.copy(), "xs": xs, "ys": ys})
        GOOD_COORDS.add(str(facet_coords))
    else:
        POOR_COORDS.append(facet_coords)
    CNT +=1
    if CNT % 100 == 0:
        print(CNT, end = ' ')
        if CNT % 1000 == 0:
            print()
    return ''

    
    fig, ax = plt.subplots(1, 1, figsize=(4,4))
    ax.bar(xs, ys)
    fig.suptitle(''.join(f"{k}={v}" for k,v in facet_coords.items()))
    ax.set_xlabel(x_ax)
    ax.set_ylabel(SCORES[0]["metric_name"])
    figfile = f"{str(datetime.now().timestamp()).replace('.','')}.jpg"
    fig.savefig(figfile)
    html_code = f'<img src="{figfile}"/>'
    return html_code

def process_facet_level(facet_level, next_levels, x_ax, y_ax, facet_coords=None):
    global SCORES
    if facet_coords is None: facet_coords = {}
    codes = []
    for facet_value in facet_values[facet_level]:
        facet_coords[facet_level] = facet_value
        if next_levels:
            nested_html = process_facet_level(next_levels[0], next_levels[1:], x_ax, y_ax, facet_coords)
        else:
            nested_html = histogram_html(x_ax, y_ax, facet_coords)
        codes.append(f"<div id='{facet_level}-{facet_value}' style='border:1px solid black;display: grid;  grid-gap: 5px; background-color: {lev2col[facet_level]}; margin:20px'><h2>{facet_level+'='+facet_value}</h2>{nested_html}</div>")
    return ''.join(codes)




x_ax = "coord"
y_ax = "score"
facet_hierarchy = ["task_category", "task_language"]

res = process_facet_level(facet_hierarchy[0], facet_hierarchy[1:], x_ax, y_ax)
print(res, file=open("facets.html", 'w'))


100 200 300 400 500 600 700 800 900 1000 
1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 
2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 
3100 3200 3300 3400 3500 3600 3700 3800 3900 4000 
4100 4200 4300 4400 4500 4600 4700 4800 4900 5000 
5100 5200 5300 5400 5500 5600 5700 5800 5900 6000 
6100 6200 6300 6400 6500 6600 6700 6800 6900 7000 
7100 7200 7300 7400 7500 7600 7700 7800 7900 8000 
8100 8200 8300 8400 8500 8600 8700 8800 8900 

In [7]:
print(len(CHARTS))

1240


In [8]:
print(len(GOOD_COORDS))

1240


In [10]:
from itertools import product
from similaritymeasures import frechet_dist
from scipy.stats import pearsonr
from tqdm.auto import tqdm
from langcodes import Language


import pandas as pd
lang2wals = pd.read_csv("Languages.csv").set_index("iso_codes")

already_scored = {}
lang2lat = {}
lang2long = {}
for left_c, right_c in tqdm(product(CHARTS, CHARTS), total=(len(CHARTS)*len(CHARTS))):
    if str(left_c['coord']) == str(right_c['coord']): continue
    if (str(left_c['coord']), str(right_c['coord'])) in already_scored: continue
    if (str(right_c['coord']), str(left_c['coord'])) in already_scored: continue
    if len(left_c['ys']) != len(right_c['ys']): continue
    
    res_di = {}
    for col, value in left_c['coord'].items(): res_di[f"left:{col}"] = value
    for col, value in right_c['coord'].items(): res_di[f"right:{col}"] = value
    
    bad_row=False
    res_di_update = {}
    for colname, value in res_di.items():
        if "task_language" in colname:
            res_di_update[colname.replace("task_language", "language")] = Language.get(value).to_alpha3()
            latitude = lang2lat.setdefault(value, lang2wals.latitude.get(Language.get(value).to_alpha3(), None))
            if "iso_codes" in str(latitude): latitude=None
            if lang2lat[value] is None: bad_row=True; break;
            longitude = lang2long.setdefault(value, lang2wals.longitude.get(Language.get(value).to_alpha3(), None))
            if "iso_codes" in str(longitude): longitude=None
            res_di_update[colname.replace("task_language", "latitude")] = latitude
            res_di_update[colname.replace("task_language", "longitude")] = longitude
    res_di.update(res_di_update)
    if bad_row: continue

    res_di["frechet"] = frechet_dist(left_c['ys'], right_c['ys'])
    res_di["pearson"] = pearsonr(left_c['ys'], right_c['ys'])
    res_di["pearson_v"] = res_di["pearson"][0]
    already_scored[(str(left_c['coord']), str(right_c['coord']))] = res_di
    

  0%|          | 0/1537600 [00:00<?, ?it/s]



In [11]:
len(already_scored.values())

129743

In [12]:
import pandas as pd

already_scored_df = pd.DataFrame(already_scored.values()).dropna('rows', subset=["pearson_v", "left:latitude", "left:longitude", "right:language", "right:latitude", "right:longitude"])
already_scored_df

  already_scored_df = pd.DataFrame(already_scored.values()).dropna('rows', subset=["pearson_v", "left:latitude", "left:longitude", "right:language", "right:latitude", "right:longitude"])


Unnamed: 0,left:task_category,left:task_language,right:task_category,right:task_language,left:language,left:latitude,left:longitude,right:language,right:latitude,right:longitude,frechet,pearson,pearson_v
0,Subcat,af,Subcat,quc,afr,-31.0,22.000000,quc,15.00,-91.416667,0.199777,"(0.29024383916911617, 0.36011379668087046)",0.290244
1,Subcat,af,SubGender,pl,afr,-31.0,22.000000,pol,52.00,20.000000,0.119375,"(-0.3240244743300963, 0.3041924978003351)",-0.324024
2,Subcat,af,PunctType,ca,afr,-31.0,22.000000,cat,41.75,2.000000,0.334786,"(0.5596106141617089, 0.05849629123835554)",0.559611
6,Subcat,af,Gender,br,afr,-31.0,22.000000,bre,48.00,-3.000000,0.097577,"(0.24798474927430902, 0.4370752777159013)",0.247985
11,Subcat,af,Gender,bg,afr,-31.0,22.000000,bul,42.50,25.000000,0.271278,"(0.005573970297842179, 0.9862833756961876)",0.005574
...,...,...,...,...,...,...,...,...,...,...,...,...,...
129738,Nomzr,gub,Person[subj],quc,gub,-5.0,-46.000000,quc,15.00,-91.416667,0.222419,"(-0.23492678934579586, 0.46234544303991776)",-0.234927
129739,Nomzr,gub,Person[subj],mdf,gub,-5.0,-46.000000,mdf,54.00,44.000000,0.132370,"(0.4418688088561764, 0.15037966448837928)",0.441869
129740,Person[subj],myv,Person[subj],quc,myv,53.0,45.500000,quc,15.00,-91.416667,0.168760,"(-0.121568735894544, 0.7066443636344626)",-0.121569
129741,Person[subj],myv,Person[subj],mdf,myv,53.0,45.500000,mdf,54.00,44.000000,0.044675,"(0.3272227654697816, 0.2991655029594745)",0.327223


In [24]:
already_scored_df.to_csv("already_scored.csv", index=False)

In [13]:
!nohup python3 app.py &

OSError: Background processes not supported.