### Data Table of Metrics and their counts per year

In [1]:
# importing libraries 
import numpy as np
import pandas as pd

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
# process data

comps = pd.read_csv('Competitions.csv')
comps.head()

Unnamed: 0,Id,Slug,Title,Subtitle,HostSegmentTitle,ForumId,OrganizationId,EnabledDate,DeadlineDate,ProhibitNewEntrantsDeadlineDate,...,CanQualifyTiers,TotalTeams,TotalCompetitors,TotalSubmissions,ValidationSetName,ValidationSetValue,EnableSubmissionModelHashes,EnableSubmissionModelAttachments,HostName,CompetitionTypeId
0,2408,Eurovision2010,Forecast Eurovision Voting,This competition requires contestants to forec...,Featured,2,,04/07/2010 07:57:43,05/25/2010 18:00:00,,...,False,22,25,22,,,False,False,,1
1,2435,hivprogression,Predict HIV Progression,This contest requires competitors to predict t...,Featured,1,,04/27/2010 21:29:09,08/02/2010 12:32:00,,...,True,107,116,855,,,False,False,,1
2,2438,worldcup2010,World Cup 2010 - Take on the Quants,Quants at Goldman Sachs and JP Morgan have mod...,Featured,3094129,,06/03/2010 08:08:08,06/11/2010 13:29:00,,...,False,0,0,0,,,False,False,,1
3,2439,informs2010,INFORMS Data Mining Contest 2010,The goal of this contest is to predict short t...,Featured,4,,06/21/2010 21:53:25,10/10/2010 02:28:00,,...,True,145,153,1483,,,False,False,,1
4,2442,worldcupconf,World Cup 2010 - Confidence Challenge,The Confidence Challenge requires competitors ...,Featured,3,,06/03/2010 08:08:08,06/11/2010 13:28:00,,...,False,63,64,63,,,False,False,,1


In [3]:
comps.shape

(5669, 42)

In [8]:
# Focus on the columns representing the evaluation and the columns informing us about the competition name, start date, and type.

evaluation = ['EvaluationAlgorithmAbbreviation',
              'EvaluationAlgorithmName',
              'EvaluationAlgorithmDescription',]

compt = ['Title', 'EnabledDate', 'HostSegmentTitle']

df = comps[compt + evaluation].copy()

df['year'] = pd.to_datetime(df.EnabledDate).dt.year.values
df['comps'] = 1
time_select = df.year >= 2015
competition_type_select = df.HostSegmentTitle.isin(['Featured', 'Research'])


# crearing a pandas pivot table, combining the evaluation algorithm with the year, and counting the number of competitions using it. 
pd.pivot_table(df[time_select&competition_type_select],
                    values='comps',
                    index=['EvaluationAlgorithmAbbreviation'],
                    columns=['year'],
                    fill_value=0.0,
                    aggfunc=np.sum,
                    margins=True
                    ).sort_values(
                        by=('All'), ascending=False).iloc[1:,:].head(20)

year,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,All
EvaluationAlgorithmAbbreviation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AUC,4,4,1,3,3,2,3,1,0,0,21
LogLoss,2,2,5,2,3,2,0,2,0,0,18
MAP@{K},1,3,0,4,1,0,2,3,1,0,15
CategorizationAccuracy,1,0,4,0,1,2,0,1,0,0,9
FScoreBetaMicro,1,0,1,2,1,2,1,1,0,0,9
MulticlassLoss,2,3,2,0,1,0,0,1,0,0,9
RMSLE,2,1,3,1,1,0,0,0,0,0,8
RMSE,1,1,0,3,0,0,2,0,0,0,7
QuadraticWeightedKappa,3,0,0,1,2,1,0,0,0,0,7
MeanBestErrorAtK,0,0,2,2,1,1,0,1,0,0,7


In [9]:
# check data to find competitions where the metric of your choice has been adoted

metric = 'AUC'
metric_select = df['EvaluationAlgorithmAbbreviation']==metric
print(df[time_select&competition_type_select&metric_select][['Title', 'year']])

                                                  Title  year
235                          West Nile Virus Prediction  2015
261                       Springleaf Marketing Response  2015
263                                       Truly Native?  2015
280                           Homesite Quote Conversion  2015
311                     Santander Customer Satisfaction  2016
332                       Avito Duplicate Ads Detection  2016
346                   Predicting Red Hat Business Value  2016
368   Melbourne University AES/MathWorks/NIH Seizure...  2016
567       WSDM - KKBox's Music Recommendation Challenge  2017
782    TalkingData AdTracking Fraud Detection Challenge  2018
855                            Home Credit Default Risk  2018
1037          Santander Customer Transaction Prediction  2019
1078                       Microsoft Malware Prediction  2018
1666                              Instant Gratification  2019
1669                           IEEE-CIS Fraud Detection  2019
2499   J

In [10]:
# check for single metrics per year

counts = (df[time_select&competition_type_select]
            .groupby('EvaluationAlgorithmAbbreviation'))
total_comps_per_year = (df[time_select&competition_type_select]
                        .groupby('year').sum())
single_metrics_per_year = (counts.sum()[counts.sum().comps==1]
                            .groupby('year').sum())
table = (total_comps_per_year.rename(columns={'comps': 'n_comps'})
            .join(single_metrics_per_year / total_comps_per_year)
            .rename(columns={'comps': 'pct_comps'}))
            
print(table)

      n_comps  pct_comps
year                    
2015       28   0.142857
2016       19   0.157895
2017       34   0.117647
2018       35   0.200000
2019       36   0.277778
2020       44   0.272727
2021       31   0.322581
2022       38   0.421053
2023       35   0.685714
2024        2   1.000000


In [12]:
print(counts.sum()[counts.sum().comps==1].index.values)

['34817366' '35896185' '36031993' '37085174' '38195349' '38445762'
 '39078087' '39243534' '39243586' '39244032' '39244492' '40581166'
 '41308515' '42009344' '42595776' '42603795' '43391374' '45372968'
 '47852869' '48030576' 'AHD@{Type}' 'AI4CodeKendallTau'
 'AmexGiniAndPercentageCaptureX' 'BenetechMixedMatch'
 'CSIROObjectDetectionFBeta' 'CVPRAutoDrivingAveragePrecision'
 'CernWeightedAuc' 'DFLEventDetectionAP' 'Dice3DHausdorff' 'DiceFBeta'
 'FScore_1 (deprecated)' 'GroupMeanLogMAE' 'ImageMatchingChallengeMaa'
 'ImageNetObjectLocalization' 'IndoorLocalization'
 'IntersectionOverUnionObjectSegmentationBeta'
 'IntersectionOverUnionObjectSegmentationWithClassification'
 'IntersectionOverUnionObjectSegmentationWithF1' 'JPXSharpe'
 'JaccardDSTLParallel' 'JaccardFbeta' 'JaneStreetPnl'
 'JigsawAgreementWithAnnotators' 'JigsawBiasAUC' 'LaplaceLogLikelihood'
 'LevenshteinMean' 'Lyft3DObjectDetectionAP' 'M5_WRMSSE' 'MASpearmanR'
 'MCSpearmanR' 'MSE' 'MeanAngularError' 'MeanColumnwiseLogLoss'
 'M