# Stats in Paper

In [28]:
# imports
import ipdb, os, re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
import collections

## IBM

In [2]:
IBM_train_csv = "~/Research/task-dataset-metric-nli-extraction/data/ibm/exp/few-shot-setup/NLP-TDMS/paperVersion/train.tsv"
IBM_test_csv = "~/Research/task-dataset-metric-nli-extraction/data/ibm/exp/few-shot-setup/NLP-TDMS/paperVersion/test.tsv"

In [3]:
train_IBM = pd.read_csv(IBM_train_csv, 
                    sep="\t", names=["label", "title", "TDM", "Context"])
# train_IBM['label'] = train_IBM.label.apply(lambda x: "true" if x else "false")

test_IBM = pd.read_csv(IBM_test_csv, 
                    sep="\t", names=["label", "title", "TDM", "Context"])

In [61]:
def get_stats(path_to_df): 
    
    unique_labels = path_to_df[(path_to_df.label == True)].TDM.tolist()
    
    TDM = set()
    Uniq_task = set()
    Uniq_dataset = set()
    Uniq_metric = set()
    unknown_count = 0
    avg_tdm_per_paper = defaultdict(lambda : 0)
    
    TDM_count = defaultdict(lambda : 0)
    
    for contrib in unique_labels:
        split = contrib.split(';')
        
        if(len(split) == 1):
            
            unknown_count += 1 
        else:
            if len(split) !=3:
#                 ipdb.set_trace()
                task, dataset, metric, _ = split
                
            else:
                task, dataset, metric = split
            
            t, d, m = task.strip(), dataset.strip(), metric.strip()
            TDM.add(f"{t}#{d}#{m}")
            
            TDM_count[f"{t}#{d}#{m}"] += 1
            
            Uniq_task.add(t)
            Uniq_dataset.add(d)
            Uniq_metric.add(m)
    
    for paper in path_to_df[(path_to_df.label == True) & (path_to_df.TDM != 'unknown') ].title.tolist():
        avg_tdm_per_paper[paper] += 1
    
    print(f"Number of papers: {len(set(path_to_df[(path_to_df.label == True)].title.tolist()))}")
    print(f"Unknown count: {unknown_count}")
    print(f"Total leaderboards: {len(path_to_df[(path_to_df.label == True) & (path_to_df.TDM != 'unknown')].title.tolist())}")
    print(f"Avg leaderboard per paper: {round(np.mean(list(avg_tdm_per_paper.values())), 2)}")
    print(f"Distinc leaderboard: {len(TDM)}")
    print(f"Distinct taks: {len(Uniq_task)}")
    print(f"Distinc datasets: {len(Uniq_dataset)}")
    print(f"Distinc metrics: {len(Uniq_metric)}")
    print(f"Max leaderboard per paper: {round(np.max(list(avg_tdm_per_paper.values())), 2)}")
    print(f"Min leaderboard per paper: {round(np.min(list(avg_tdm_per_paper.values())), 2)}")
    
    oder_TDM_count = sorted(TDM_count.items(), key=lambda item: item[1])
    print(f"Least frequent leaderboard :{oder_TDM_count[:3]}")
    print(f"Most frequent leaderboard :{oder_TDM_count[-3:]}")
    
    return oder_TDM_count

### Train

In [5]:
train_IBM.head()

Unnamed: 0,label,title,TDM,Context
0,True,D16-1036.pdf,unknown,Multi-view Response Selection for Human-Comput...
1,False,D16-1036.pdf,question answering; SQuAD; F1,Multi-view Response Selection for Human-Comput...
2,False,D16-1036.pdf,relation prediction; FB15K-237; H@1,Multi-view Response Selection for Human-Comput...
3,False,D16-1036.pdf,word sense disambiguation; SemEval 2013; F1,Multi-view Response Selection for Human-Comput...
4,False,D16-1036.pdf,language modeling; 1B Words / Google Billion W...,Multi-view Response Selection for Human-Comput...


In [6]:
train_IBM.title.nunique()

170

In [7]:
avg_tdm_per_paper = get_stats(train_IBM)

Number of papers: 170
Unknown count: 46
Total leaderboards: 327
Avg leaderboard per paper: 2.64
Distinc leaderboard: 78
Distinct taks: 18
Distinc datasets: 44
Distinc metrics: 31
Max leaderboard per paper: 10
Min leaderboard per paper: 1


### Test

In [8]:
test_IBM.head()

Unnamed: 0,label,title,TDM,Context
0,True,1803.11175.pdf,sentiment analysis; SUBJ; Accuracy,Universal Sentence Encoder We present models f...
1,True,1803.11175.pdf,text classification; TREC; Error,Universal Sentence Encoder We present models f...
2,False,1803.11175.pdf,question answering; SQuAD; F1,Universal Sentence Encoder We present models f...
3,False,1803.11175.pdf,relation prediction; FB15K-237; H@1,Universal Sentence Encoder We present models f...
4,False,1803.11175.pdf,word sense disambiguation; SemEval 2013; F1,Universal Sentence Encoder We present models f...


In [9]:
metric = get_stats(test_IBM)

Number of papers: 167
Unknown count: 45
Total leaderboards: 294
Avg leaderboard per paper: 2.41
Distinc leaderboard: 78
Distinct taks: 18
Distinc datasets: 44
Distinc metrics: 31
Max leaderboard per paper: 7
Min leaderboard per paper: 1


In [10]:
# Make sure that all leaderboard in test are present in train 
count = []
for paper in test_IBM.TDM.to_list():
    if paper not in train_IBM.TDM.to_list():
        count.append(paper)
print(count)

[]


In [12]:
count = []
for paper in train_IBM.TDM.to_list():
    if paper not in test_IBM.TDM.to_list():
        print(paper)
        count.append(paper)
print(count)

[]


### our dataset

In [53]:
# New_train_csv = "~/Research/task-dataset-metric-nli-extraction/data/pwc_ibm_150_5_10_800/twofoldwithunk/fold1/train.tsv"
# New_test_csv = "~/Research/task-dataset-metric-nli-extraction/data/pwc_ibm_150_5_10_800/twofoldwithunk/fold1/dev.tsv"

New_train_csv = "/nfs/home/kabenamualus/Research/task-dataset-metric-nli-extraction/data/pwc_ibm_150_5_10_10000/10Neg10000unk/twofoldwithunk/fold2/train.tsv"
New_test_csv = "/nfs/home/kabenamualus/Research/task-dataset-metric-nli-extraction/data/pwc_ibm_150_5_10_10000/10Neg10000unk/twofoldwithunk/fold2/dev.tsv"

# New_train_csv = "/nfs/home/kabenamualus/Research/task-dataset-metric-nli-extraction/data/pwc_ibm_150_5_10_5000/10Neg5000unk/twofoldwithunk/fold2/train.tsv"
# New_test_csv = "/nfs/home/kabenamualus/Research/task-dataset-metric-nli-extraction/data/pwc_ibm_150_5_10_5000/10Neg5000unk/twofoldwithunk/fold2/dev.tsv"

# New_train_csv = "/nfs/home/kabenamualus/Research/task-dataset-metric-nli-extraction/data/pwc_ibm_150_5_10_1000/10Neg1000unk/twofoldwithunk/fold2/train.tsv"
# New_test_csv = "/nfs/home/kabenamualus/Research/task-dataset-metric-nli-extraction/data/pwc_ibm_150_5_10_1000/10Neg1000unk/twofoldwithunk/fold2/dev.tsv"

# New_train_csv = "/nfs/home/kabenamualus/Research/task-dataset-metric-nli-extraction/data/pwc_ibm_150_5_10_500/10Neg500unk/twofoldwithunk/fold1/train.tsv"
# New_test_csv = "/nfs/home/kabenamualus/Research/task-dataset-metric-nli-extraction/data/pwc_ibm_150_5_10_500/10Neg500unk/twofoldwithunk/fold1/dev.tsv"

# New_train_csv = IBM_train_csv
# New_test_csv = IBM_test_csv

In [54]:
train_New = pd.read_csv(New_train_csv, 
                    sep="\t", names=["label", "title", "TDM", "Context"])
# train_IBM['label'] = train_IBM.label.apply(lambda x: "true" if x else "false")

test_New = pd.read_csv(New_test_csv, 
                    sep="\t", names=["label", "title", "TDM", "Context"])

In [55]:
len(train_New.drop_duplicates())

50207

In [56]:
len(train_New)

50207

In [62]:
TDM_count = get_stats(train_New)

Number of papers: 3753
Unknown count: 920
Total leaderboards: 11757
Avg leaderboard per paper: 4.15
Distinc leaderboard: 1820
Distinct taks: 291
Distinc datasets: 912
Distinc metrics: 553
Max leaderboard per paper: 58
Min leaderboard per paper: 1
Least frequent leaderboard :[('Word Sense Disambiguation#WiC-TSV#Task 1 Accuracy: all', 1), ('Word Sense Disambiguation#WiC-TSV#Task 1 Accuracy: domain specific', 1), ('Word Sense Disambiguation#WiC-TSV#Task 1 Accuracy: general purpose', 1)]
Most frequent leaderboard :[('Image Classification#CIFAR-10#Percentage correct', 51), ('Object Detection#COCO test-dev#box AP', 57), ('Image Classification#ImageNet#Top 1 Accuracy', 93)]


In [None]:
TDM_count[:30]

In [58]:
print("Train")
print("======")
print(f"Number of papers: {round((3753 + 3753)/2)}")
print(f"Avg Unknown count: {round((923 + 920)/2)}")
print(f"Avg Total leaderboards: {round((11690 + 11757)/2)}")
print(f"Avg leaderboard per paper: {round((4.13 + 4.15)/2, 1)}")
print(f"Avg Distinc leaderboard: {round((1791 + 1820)/2)}")
print(f"Avg Distinct taks: {round((286 + 291)/2)}")
print(f"Avg Distinc datasets: {round((905 + 912)/2)}")
print(f"Avg Distinc metrics: {round((547 + 553)/2)}")

Train
Number of papers: 3753
Avg Unknown count: 922
Avg Total leaderboards: 11724
Avg leaderboard per paper: 4.1
Avg Distinc leaderboard: 1806
Avg Distinct taks: 288
Avg Distinc datasets: 908
Avg Distinc metrics: 550


In [59]:
TDM_count_test = get_stats(test_New)

Number of papers: 1608
Unknown count: 381
Total leaderboards: 5027
Avg leaderboard per paper: 4.1
Distinc leaderboard: 1541
Distinct taks: 250
Distinc datasets: 790
Distinc metrics: 466
Max leaderboard per paper: 58
Min leaderboard per paper: 1
Least frequent leaderboard :[('Visual Object Tracking#GOT-10k#Average Overlap', 1), ('Visual Object Tracking#GOT-10k#Success Rate 0.5', 1), ('Image Clustering#Extended Yale-B#Accuracy', 1)]
Most frequent leaderboard :[('Image Classification#CIFAR-100#Percentage correct', 30), ('Image Classification#CIFAR-10#Percentage correct', 40), ('Image Classification#ImageNet#Top 1 Accuracy', 45)]


In [18]:
print("Test")
print("======")
print(f"Number of papers: {round((1608 + 1608)/2)}")
print(f"Avg Unknown count: {round((378 + 381)/2)}")
print(f"Avg Total leaderboards: {round((5094 + 5027)/2)}")
print(f"Avg leaderboard per paper: {round((4.14 + 4.1)/2, 1)}")
print(f"Avg Distinc leaderboard: {round((1556 + 1541)/2)}")
print(f"Avg Distinct taks: {round((254 + 250)/2)}")
print(f"Avg Distinc datasets: {round((806 + 790)/2)}")
print(f"Avg Distinc metrics: {round((472 + 466)/2)}")

Test
Number of papers: 1608
Avg Unknown count: 380
Avg Total leaderboards: 5060
Avg leaderboard per paper: 4.1
Avg Distinc leaderboard: 1548
Avg Distinct taks: 252
Avg Distinc datasets: 798
Avg Distinc metrics: 469


In [17]:
count = []
for tdm in test_New.TDM.to_list():
    if tdm not in train_New.TDM.to_list():
        count.append(tdm)
print(count)

[]


In [18]:
count = []
for tdm in train_New.TDM.to_list():
    if tdm not in test_New.TDM.to_list():
        count.append(tdm)
print(len(count))

0
