## Task complexity correlates with extracted features

We noticed that extracted features correlate well with task complexity.

In [1]:
import mml.interactive
mml.interactive.init()
from mml_tf.representations import FullFeatureRepresentations, AveragedFeatureRepresentations
from mml_tf.tasks import task_infos, all_tasks
from scipy.stats import pearsonr

 _____ ______   _____ ______   ___
|\   _ \  _   \|\   _ \  _   \|\  \
\ \  \\\__\ \  \ \  \\\__\ \  \ \  \
 \ \  \\|__| \  \ \  \\|__| \  \ \  \
  \ \  \    \ \  \ \  \    \ \  \ \  \____
   \ \__\    \ \__\ \__\    \ \__\ \_______\
    \|__|     \|__|\|__|     \|__|\|_______|
         ____  _  _    __  _  _  ____  _  _
        (  _ \( \/ )  (  )( \/ )/ ___)( \/ )
         ) _ ( )  /    )( / \/ \\___ \ )  /
        (____/(__/    (__)\_)(_/(____/(__/
Interactive MML API initialized.


  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)


In [8]:
# load features
full_rep = FullFeatureRepresentations()
full_rep.load_representations()
# compute averages across all samples per feature dimension
avg_rep = AveragedFeatureRepresentations(full_features=full_rep)
avg_rep.load_representations()

In [9]:
# for each task we take 
# - the measured dimension (based on "The Intrinsic Dimensionaity of Images and Its Impact On Learning"
#   by Phillip Pope, Chen Zhu, Ahmed Abdelkader, Micah Goldblum, Tom Goldstein (ICLR 2021, spotlight)
# - the sum over feature dimension of the average (along sample dimension) of the extracted features
dims = []
norms = []
for task in all_tasks:
    dims.append(task_infos.dimensions[task])
    norms.append(avg_rep.mapping[task].sum().item())

In [10]:
pearsonr(dims, norms)

PearsonRResult(statistic=0.4692465603413244, pvalue=3.6609766883292303e-05)