# Case Study 2

In this notebook we find *prospective* models on GLUE, SuperGLUE and VALUE.

In [1]:
import pandas as pd
import numpy as np
import sys
from pprint import pprint

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

sys.path.append("..")

from votenrank import Leaderboard
from votenrank.data_processing import preprocess_glue, preprocess_sglue, preprocess_value

## GLUE

In [2]:
glue, glue_weights = preprocess_glue(pd.read_csv("../tables/leaderboards - GLUE.csv"))
not_nan_glue_lb = Leaderboard(glue.dropna(), weights=glue_weights)

split = not_nan_glue_lb.split_models_by_feasibility()

print(f"Prospective models ({len(split['feasible'])}):")
pprint(split["feasible"])
print()

print(f"Non-prospective models ({len(split['infeasible'])}):")
pprint(split["infeasible"])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


  0%|          | 0/97 [00:00<?, ?it/s]

Prospective models (9):
['ERNIE',
 'StructBERT + CLEVER',
 'DeBERTa / TuringNLRv4',
 'DeBERTa + CLEVER',
 'MacALBERT + DKM',
 'ALBERT + DAAF + NAS',
 'T5',
 'MT-DNN-SMART',
 'GLUE Human Baselines']

Non-prospective models (88):
['NEZHA-Large',
 'Funnel-Transformer (Ensemble B10-10-10H1024)',
 'ELECTRA-Large + Standard Tricks',
 'FreeLB-RoBERTa (ensemble)',
 'HIRE-RoBERTa',
 'RoBERTa',
 'MT-DNN-ensemble',
 'ELECTRA-Large-NewSCL(single)',
 'Bort (Alexa AI)',
 'ConvBERT base',
 'Snorkel MeTaL',
 'XLM (English only)',
 'ConvBERT-base-paddle-v1.1',
 'SemBERT',
 'mpnet-base-paddle',
 'SpanBERT (single-task training)',
 'distilRoBERTa+GAL (6-layer transformer single model)',
 'BERT + BAM',
 'Span-Extractive BERT on STILTs',
 'LV-BERT-base',
 'BERT on STILTs',
 '1',
 'RobustRoBERTa',
 'WARP with RoBERTa',
 'CombinedKD-TinyRoBERTa (6 layer 82M parameters, MATE-KD + AnnealingKD)',
 'segaBERT-large',
 "u-PMLM-R (Huawei Noah's Ark Lab)",
 'AMBERT-BASE',
 'Routed BERTs',
 'CERT',
 'BERT: 24-layers,

## SuperGLUE

In [3]:
sglue, sglue_weights = preprocess_sglue(pd.read_csv("../tables/leaderboards - SuperGLUE.csv"))
not_nan_sglue_lb = Leaderboard(sglue.dropna(), weights=sglue_weights)

split = not_nan_sglue_lb.split_models_by_feasibility()

print(f"Prospective models ({len(split['feasible'])}):")
pprint(split["feasible"])
print()

print(f"Non-prospective models ({len(split['infeasible'])}):")
pprint(split["infeasible"])

  0%|          | 0/22 [00:00<?, ?it/s]

Prospective models (10):
['ERNIE 3.0',
 'T5 + UDG, Single Model (Google Brain)',
 'DeBERTa / TuringNLRv4',
 'SuperGLUE Human Baselines',
 'T5',
 'AILabs Team, Transformers',
 'ADAPET (ALBERT) - few-shot',
 'Most Frequent Class',
 'CBoW',
 'WARP (ALBERT-XXL-V2) - Few-Shot (32 Examples)']

Non-prospective models (12):
['NEZHA-Plus',
 'PAI Albert',
 'RoBERTa-iCETS',
 'RoBERTa (ensemble)',
 'RoBERTa-mtl-adv',
 'RoBERTa',
 'iPET (ALBERT) - Few-Shot (32 Examples)',
 'Bort (Alexa AI)',
 'BERT-mtl',
 'GPT-3 few-shot - OpenAI',
 'BERT++',
 'BERT']


## VALUE

In [4]:
value = preprocess_value(pd.read_csv("../tables/leaderboards - VALUE.csv"))
not_nan_value_lb = Leaderboard(value.dropna())

split = not_nan_value_lb.split_models_by_feasibility()

print(f"Prospective models ({len(split['feasible'])}):")
pprint(split["feasible"])
print()

print(f"Non-prospective models ({len(split['infeasible'])}):")
pprint(split["infeasible"])

  0%|          | 0/6 [00:00<?, ?it/s]

Prospective models (3):
['craig.starr', 'DuKG', 'HERO 1']

Non-prospective models (3):
['HERO 2', 'HERO 3', 'HERO 4']
