In [4]:
import requests
import os
import json

import pandas as pd
from pathlib import Path
from typing import Tuple, List
import random
import torch


def get_test_images_and_classes_vid(dataset_path: Path, session_token: str, data_type: str='sample') -> Tuple[List[str],List[str]]:
    """
    Helper method to dynamically get the test labels and give us the possible classes that can be submitted
    for the current dataset
    
    Params
    ------
    
    dataset_path : Path
        The path to the `development` dataset downloads
    
    session_token : str
        Your current session token so that we can look up the current session metadata
    
    data_type: str
        Indicates whether you are using the `sample` or `full` dataset. 
    Returns
    -------
    
    Tuple[List[str], List[str]]
        The list of test image ids needed to submit a prediction and the list of class names that you can predict against
    """
    # Then we can just reference our current metadata to get our dataset name and use that in the path
    headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': os.environ.get('GOVTEAM_SECRET')}
    r = requests.get(f"{url}/session_status", headers=headers)
    current_dataset = r.json()['Session_Status']['current_dataset']
    current_dataset_name = current_dataset['name']
    current_dataset_classes = current_dataset['classes']

    test_meta = pd.read_feather(dataset_path.joinpath(f"{current_dataset_name}/labels_{data_type}/meta_test.feather"))
    test_ids = test_meta['id'].tolist()
    return test_ids, current_dataset_classes



def generate_random_predictions_on_test_set(test_imgs: List[str], current_dataset_classes: List[str]) -> pd.DataFrame:
    """
    Generates a prediction dataframe for image classification based on random sampling from our available classes
    """
    rand_lbls = [str(random.choice(current_dataset_classes)) for _ in range(len(test_imgs))]
    df = pd.DataFrame({'id': test_imgs, 'class': rand_lbls})
    return df


def get_random_labels_from_train_dataset_vid(dataset_path: Path, session_token: str, n: int=None, data_type: str='sample') -> List[str]:
    """
    Helper function to get random `n` video segment ids from our train dataset to request labels for
    from the api
    
    Params
    ------
    
    dataset_path : Path
        The path to the `development` dataset downloads
    
    session_token : str
        Your current session token so that we can look up the current session metadata
    data_type: str
        Indicates whether you are using the `sample` or `full` size dataset
    
    Returns
    -------
    
    List[str]
        A list of n unique image ids for the current session dataset
        
    """
    headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': os.environ.get('GOVTEAM_SECRET')}
    r = requests.get(f"{url}/session_status", headers=headers)
    current_dataset = r.json()['Session_Status']['current_dataset']
    current_dataset_name = current_dataset['name']
    budget_left = r.json()['Session_Status']['budget_left_until_checkpoint'] 
    if not n:
        n = budget_left
        
        print(f"budget_left is {budget_left}")
      
    meta_train_path = dataset_path.joinpath(f"{current_dataset_name}/labels_{data_type}/meta_train.feather")
    meta_train = pd.read_feather(meta_train_path)
    random_ids = meta_train['id'].sample(n=n).tolist()
    return random_ids



secret = 'a5aed2a8-db80-4b22-bf72-11f2d0765572'# my-group-secret (Brown)
gov_secret = 'mock-secret'
url = 'https://api-dev.lollllz.com'

headers = {'user_secret': secret, 'govteam_secret': gov_secret}

# This is a convenience for development purposes, IN EVAL ALWAYS USE `full`
data_type = 'sample' # can either be `sample` or `full`

r = requests.post(f"{url}/auth/create_session", json={'session_name': 'reproduce_error', 'data_type': data_type, 'task_id': 'problem_test_video_classification'}, headers=headers)
r.json()
session_token = r.json()['session_token']


headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': gov_secret}

r = requests.get(f"{url}/seed_labels", headers=headers)


DATASETS_PATH = Path.home().joinpath('/lwll/external')
test_ids, current_dataset_classes = get_test_images_and_classes_vid(dataset_path=DATASETS_PATH, session_token=session_token, data_type='sample')
df = generate_random_predictions_on_test_set(test_ids, current_dataset_classes)


headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': gov_secret}

r = requests.post(f"{url}/submit_predictions", json={'predictions': df.to_dict()}, headers=headers)
r.json()


for i in range(3):
    print(i+1)
    headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': os.environ.get('GOVTEAM_SECRET')}
    r = requests.get(f"{url}/seed_labels", headers=headers)
    r = requests.post(f"{url}/submit_predictions", json={'predictions': df.to_dict()}, headers=headers)
print(json.dumps(r.json(), indent=4))

images_to_be_labeled = get_random_labels_from_train_dataset_vid(DATASETS_PATH, session_token)

#images_to_be_labeled = ['426597.jpg', '444854.jpg', '293426.jpg', '394143.jpg', '475104.jpg', '466700.jpg', '434541.jpg', '403310.jpg', '463886.jpg', '419271.jpg', '499764.jpg', '383900.jpg', '473090.jpg', '351779.jpg', '397958.jpg', '351810.jpg', '322990.jpg', '393386.jpg', '482671.jpg', '505782.jpg', '353192.jpg', '405677.jpg', '327915.jpg', '312268.jpg', '411502.jpg', '463282.jpg', '488973.jpg', '330913.jpg', '418090.jpg', '450558.jpg', '406777.jpg', '381549.jpg', '487051.jpg', '463913.jpg', '453865.jpg', '478428.jpg', '394477.jpg', '305002.jpg', '485933.jpg', '254062.jpg', '322078.jpg', '431623.jpg', '171340.jpg', '469255.jpg', '412119.jpg', '392331.jpg', '409253.jpg', '305048.jpg', '378614.jpg', '492679.jpg', '384504.jpg', '390812.jpg', '352550.jpg', '485559.jpg', '455513.jpg', '321446.jpg', '317903.jpg', '408780.jpg', '473579.jpg', '467482.jpg', '334252.jpg', '403188.jpg', '486968.jpg', '400517.jpg', '343077.jpg', '485851.jpg', '412094.jpg', '484619.jpg', '381466.jpg', '506106.jpg', '485837.jpg', '501087.jpg', '398532.jpg', '454979.jpg', '321680.jpg', '503566.jpg', '426574.jpg']

query = {
    'example_ids': images_to_be_labeled
}

r = requests.post(f"{url}/query_labels", json=query, headers=headers)
len(r.json()['Labels'])

1
2
3
{
    "Session_Status": {
        "active": "In Progress",
        "budget_left_until_checkpoint": 77,
        "budget_used": 408,
        "current_dataset": {
            "classes": [
                "push",
                "cartwheel",
                "dribble",
                "pullup",
                "jump",
                "shoot_gun",
                "smile",
                "dive",
                "shake_hands",
                "draw_sword",
                "fencing",
                "climb",
                "clap",
                "kick_ball",
                "sword_exercise",
                "pour",
                "punch",
                "walk",
                "sit",
                "brush_hair",
                "shoot_ball",
                "ride_horse",
                "throw",
                "pick",
                "smoke",
                "pushup",
                "wave",
                "golf",
                "kick",
                "hit",
                "hug

FileNotFoundError: [Errno 2] Failed to open local file '/lwll/external/hmdb/labels_sample/meta_train.feather'. Detail: [errno 2] No such file or directory

In [6]:
data_type = 'sample'

DATASETS_PATH = Path.home().joinpath('/lwll/external')
meta_train_path = DATASETS_PATH.joinpath(f"hmdb/labels_{data_type}/meta_test.feather")
meta_train = pd.read_feather(meta_train_path)

In [7]:
meta_train

Unnamed: 0,id,video_id,start_frame,end_frame
0,5503,5503,514892,514971
1,5855,5855,546636,546694
2,6188,6188,577992,578139
3,5894,5894,549850,549930
4,6485,6485,604761,605087
...,...,...,...,...
95,6138,6138,573674,573751
96,5513,5513,515867,516215
97,6521,6521,608567,608590
98,6095,6095,569428,569506


{'tasks': ['06023f86-a66b-4b2c-8b8b-951f5edd0f22',
  '4d924004-347e-4043-8dd2-f4f8b0f52efd',
  '6d5e1f85-5d8f-4cc9-8184-299db03713f4',
  '7c103ece-fd8e-483b-bede-b55e5ea57fe9',
  '923cbe9c-1e4f-4a05-a156-dc972ef5edf5',
  'b01a6738-0b85-46c2-9318-16c3e2ef0f6d',
  'bbfadb2c-c7c3-4596-b548-3dd01a6d1d2c',
  'd48f8a99-ba12-4df8-a74a-d06413b0f1ba',
  'problem_test_image_classification',
  'problem_test_obj_detection',
  'problem_test_video_classification']}

In [9]:
len(r.json()['Labels'])

51

{'Session_Status': {'active': 'In Progress',
  'budget_left_until_checkpoint': 51,
  'budget_used': 51,
  'current_dataset': {'classes': ['shoot_ball',
    'somersault',
    'stand',
    'smile',
    'pour',
    'climb_stairs',
    'flic_flac',
    'situp',
    'golf',
    'pick',
    'draw_sword',
    'smoke',
    'clap',
    'walk',
    'dribble',
    'talk',
    'pushup',
    'fall_floor',
    'catch',
    'sword',
    'kick_ball',
    'cartwheel',
    'punch',
    'sword_exercise',
    'shoot_bow',
    'brush_hair',
    'push',
    'wave',
    'eat',
    'hug',
    'swing_baseball',
    'ride_horse',
    'throw',
    'run',
    'sit',
    'pullup',
    'dive',
    'turn',
    'climb',
    'chew',
    'handstand',
    'hit',
    'laugh',
    'kiss',
    'drink',
    'ride_bike',
    'shake_hands',
    'kick',
    'fencing',
    'jump',
    'shoot_gun'],
   'dataset_type': 'video_classification',
   'license_citation': 'Kuehne, Hildegard, Hueihan Jhuang, Estíbaliz Garrote, Tomaso Pog

In [23]:
len(images_to_be_labeled)

77

{'Labels': [{'class': 'run',
   'end_frame': 463335,
   'id': 4942,
   'start_frame': 463256,
   'video_id': 4942},
  {'class': 'draw_sword',
   'end_frame': 473661,
   'id': 5061,
   'start_frame': 473564,
   'video_id': 5061},
  {'class': 'ride_horse',
   'end_frame': 383912,
   'id': 4112,
   'start_frame': 383730,
   'video_id': 4112},
  {'class': 'pour',
   'end_frame': 305109,
   'id': 3273,
   'start_frame': 304785,
   'video_id': 3273},
  {'class': 'pullup',
   'end_frame': 419317,
   'id': 4497,
   'start_frame': 419239,
   'video_id': 4497},
  {'class': 'laugh',
   'end_frame': 487224,
   'id': 5206,
   'start_frame': 486865,
   'video_id': 5206},
  {'class': 'throw',
   'end_frame': 381552,
   'id': 4086,
   'start_frame': 381398,
   'video_id': 4086},
  {'class': 'eat',
   'end_frame': 492716,
   'id': 5268,
   'start_frame': 492615,
   'video_id': 5268},
  {'class': 'pour',
   'end_frame': 317909,
   'id': 3398,
   'start_frame': 317694,
   'video_id': 3398},
  {'class': '

In [26]:
len(r.json()['Labels'])

76

In [8]:
video = True

In [9]:
headers = {'user_secret': secret,
           'govteam_secret': gov_secret,
           'session_token': session_token}

r = requests.get(url + "/seed_labels", headers=headers)
labels = r.json()['Labels']

if video:
    seed_labels = []
    dictionary_clips = {}
    for clip in labels:
        action_frames = [str(i)+'.jpg' for i in range(clip['start_frame'], clip['end_frame'])]
        dictionary_clips[clip["id"]] = action_frames
        seed_labels.append([clip["class"], clip["id"]])
    #return seed_labels, dictionary_clips

else:
    seed_labels = []
    for image in labels:
        seed_labels.append([image["class"], image["id"]])
    #return seed_labels

In [11]:
dictionary_clips.keys()

dict_keys([881, 1366, 93, 561, 294, 679, 1064, 1428, 360, 145, 209, 1697, 110, 1637, 1783, 893, 1726, 1689, 1424, 46, 1321, 1283, 564, 2017, 581, 818, 443, 259, 1343, 1066, 540, 1759, 142, 287, 1665, 1396, 983, 198, 750, 542, 647, 89, 1363, 103, 2036, 1854, 716, 484, 216, 118, 370])

In [13]:
dictionary_clips[581]

['50991.jpg',
 '50992.jpg',
 '50993.jpg',
 '50994.jpg',
 '50995.jpg',
 '50996.jpg',
 '50997.jpg',
 '50998.jpg',
 '50999.jpg',
 '51000.jpg',
 '51001.jpg',
 '51002.jpg',
 '51003.jpg',
 '51004.jpg',
 '51005.jpg',
 '51006.jpg',
 '51007.jpg',
 '51008.jpg',
 '51009.jpg',
 '51010.jpg',
 '51011.jpg',
 '51012.jpg',
 '51013.jpg',
 '51014.jpg',
 '51015.jpg',
 '51016.jpg',
 '51017.jpg',
 '51018.jpg',
 '51019.jpg',
 '51020.jpg',
 '51021.jpg',
 '51022.jpg',
 '51023.jpg',
 '51024.jpg',
 '51025.jpg',
 '51026.jpg',
 '51027.jpg',
 '51028.jpg',
 '51029.jpg',
 '51030.jpg',
 '51031.jpg',
 '51032.jpg',
 '51033.jpg',
 '51034.jpg',
 '51035.jpg',
 '51036.jpg',
 '51037.jpg',
 '51038.jpg',
 '51039.jpg',
 '51040.jpg',
 '51041.jpg',
 '51042.jpg',
 '51043.jpg',
 '51044.jpg',
 '51045.jpg',
 '51046.jpg',
 '51047.jpg',
 '51048.jpg',
 '51049.jpg',
 '51050.jpg',
 '51051.jpg',
 '51052.jpg',
 '51053.jpg',
 '51054.jpg',
 '51055.jpg',
 '51056.jpg',
 '51057.jpg',
 '51058.jpg',
 '51059.jpg',
 '51060.jpg',
 '51061.jpg',
 '5106

In [16]:
def get_test_images_and_classes_vid(dataset_path: Path, session_token: str, data_type: str='sample') -> Tuple[List[str],List[str]]:
    """
    Helper method to dynamically get the test labels and give us the possible classes that can be submitted
    for the current dataset
    
    Params
    ------
    
    dataset_path : Path
        The path to the `development` dataset downloads
    
    session_token : str
        Your current session token so that we can look up the current session metadata
    
    data_type: str
        Indicates whether you are using the `sample` or `full` dataset. 
    Returns
    -------
    
    Tuple[List[str], List[str]]
        The list of test image ids needed to submit a prediction and the list of class names that you can predict against
    """
    # Then we can just reference our current metadata to get our dataset name and use that in the path
    headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': os.environ.get('GOVTEAM_SECRET')}
    r = requests.get(f"{url}/session_status", headers=headers)
    current_dataset = r.json()['Session_Status']['current_dataset']
    current_dataset_name = current_dataset['name']
    current_dataset_classes = current_dataset['classes']

    test_meta = pd.read_feather(dataset_path.joinpath(f"{current_dataset_name}/{current_dataset_name}_{data_type}/meta_test.feather"))
    test_ids = test_meta['id'].tolist()
    return test_meta, test_ids, current_dataset_classes

In [21]:
def get_test_images_and_classes_vid(dataset_path: Path, session_token: str, data_type: str='sample') -> Tuple[List[str],List[str]]:
    """
    Helper method to dynamically get the test labels and give us the possible classes that can be submitted
    for the current dataset
    
    Params
    ------
    
    dataset_path : Path
        The path to the `development` dataset downloads
    
    session_token : str
        Your current session token so that we can look up the current session metadata
    
    data_type: str
        Indicates whether you are using the `sample` or `full` dataset. 
    Returns
    -------
    
    Tuple[List[str], List[str]]
        The list of test image ids needed to submit a prediction and the list of class names that you can predict against
    """
    # Then we can just reference our current metadata to get our dataset name and use that in the path
    headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': os.environ.get('GOVTEAM_SECRET')}
    r = requests.get(f"{url}/session_status", headers=headers)
    current_dataset = r.json()['Session_Status']['current_dataset']
    current_dataset_name = current_dataset['name']
    current_dataset_classes = current_dataset['classes']

    test_meta = pd.read_feather(dataset_path.joinpath(f"{current_dataset_name}/labels_{data_type}/meta_test.feather"))
    test_ids = test_meta['id'].tolist()
    return test_meta, test_ids, current_dataset_classes

In [22]:
DATASETS_PATH = Path.home().joinpath('/lwll/external')#/hmdb/hmdb_sample/test/')

In [18]:
DATASETS_PATH

PosixPath('/lwll/external')

In [19]:
os.path.isdir(DATASETS_PATH) 

True

In [23]:
 test_meta, test_ids, current_dataset_classes = get_test_images_and_classes_vid(dataset_path=DATASETS_PATH, session_token=session_token, data_type='sample')

In [32]:
image_paths = []
dictionary_clips = {}
for clip in test_meta.iterrows():
    row = clip[1]
    action_frames = [str(i)+'.jpg' for i in range(row['start_frame'], row['end_frame'])]
    dictionary_clips[row["id"]] = action_frames
    image_paths.append('ciao' + str(row["id"]))

In [33]:
image_paths

['ciao5503',
 'ciao5855',
 'ciao6188',
 'ciao5894',
 'ciao6485',
 'ciao6222',
 'ciao5968',
 'ciao5959',
 'ciao5627',
 'ciao6505',
 'ciao6098',
 'ciao6434',
 'ciao5794',
 'ciao6754',
 'ciao5958',
 'ciao6148',
 'ciao5460',
 'ciao5700',
 'ciao5616',
 'ciao6184',
 'ciao6490',
 'ciao5737',
 'ciao6083',
 'ciao6725',
 'ciao6445',
 'ciao6595',
 'ciao6481',
 'ciao5472',
 'ciao5415',
 'ciao5727',
 'ciao6179',
 'ciao6156',
 'ciao6538',
 'ciao6530',
 'ciao6092',
 'ciao5613',
 'ciao6482',
 'ciao6389',
 'ciao5715',
 'ciao5660',
 'ciao6537',
 'ciao6370',
 'ciao5579',
 'ciao5500',
 'ciao6693',
 'ciao6304',
 'ciao6726',
 'ciao6518',
 'ciao6554',
 'ciao6475',
 'ciao5431',
 'ciao5820',
 'ciao5492',
 'ciao6105',
 'ciao5753',
 'ciao6477',
 'ciao6580',
 'ciao5679',
 'ciao6560',
 'ciao5742',
 'ciao6178',
 'ciao5589',
 'ciao6303',
 'ciao6429',
 'ciao6450',
 'ciao5961',
 'ciao6609',
 'ciao5844',
 'ciao6392',
 'ciao6311',
 'ciao5527',
 'ciao6495',
 'ciao6588',
 'ciao5840',
 'ciao6514',
 'ciao5876',
 'ciao5787',

In [122]:
def get_random_labels_from_train_dataset(dataset_path: Path, session_token: str, n: int=None, data_type: str='sample') -> List[str]:
    """
    Helper function to get a random `n` image ids from our train dataset to request labels for
    from the api
    
    Params
    ------
    
    dataset_path : Path
        The path to the `development` dataset downloads
    
    session_token : str
        Your current session token so that we can look up the current session metadata
    data_type: str
        Indicates whether you are using the `sample` or `full` size dataset
    
    Returns
    -------
    
    List[str]
        A list of n unique image ids for the current session dataset
        
    """
    headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': os.environ.get('GOVTEAM_SECRET')}
    r = requests.get(f"{url}/session_status", headers=headers)
    current_dataset = r.json()['Session_Status']['current_dataset']
    current_dataset_name = current_dataset['name']
    budget_left = r.json()['Session_Status']['budget_left_until_checkpoint']
    print(f"budget_left is {budget_left}")
    if not n:
        n = budget_left
    
    train_imgs_dir = dataset_path.joinpath(f"{current_dataset_name}/{current_dataset_name}_{data_type}/train")
    train_imgs = [f.name for f in train_imgs_dir.iterdir() if f.is_file()]
    print(f"train_imgs: {train_imgs[0:4]}")
    
    random_ids = random.sample(train_imgs, k=n)
    return random_ids
    

In [123]:
images_to_be_labeled = get_random_labels_from_train_dataset(DATASETS_PATH, session_token, n=2)

budget_left is 51
train_imgs: ['10304.jpg', '10305.jpg', '10306.jpg', '10307.jpg']


In [128]:
images_to_be_labeled

['290066.jpg', '506621.jpg']

In [129]:
query = {
    'example_ids': images_to_be_labeled
}

In [133]:
headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': gov_secret}
r = requests.post(f"{url}/query_labels", json=query, headers=headers)
r.json()

{'Labels': [{'class': 'ride_bike',
   'end_frame': 506636,
   'id': 5411,
   'start_frame': 506558,
   'video_id': 5411},
  {'class': 'stand',
   'end_frame': 290102,
   'id': 3122,
   'start_frame': 290056,
   'video_id': 3122}],
 'Session_Status': {'active': 'In Progress',
  'budget_left_until_checkpoint': 49,
  'budget_used': 2,
  'current_dataset': {'classes': ['shoot_ball',
    'somersault',
    'stand',
    'smile',
    'pour',
    'climb_stairs',
    'flic_flac',
    'situp',
    'golf',
    'pick',
    'draw_sword',
    'smoke',
    'clap',
    'walk',
    'dribble',
    'talk',
    'pushup',
    'fall_floor',
    'catch',
    'sword',
    'kick_ball',
    'cartwheel',
    'punch',
    'sword_exercise',
    'shoot_bow',
    'brush_hair',
    'push',
    'wave',
    'eat',
    'hug',
    'swing_baseball',
    'ride_horse',
    'throw',
    'run',
    'sit',
    'pullup',
    'dive',
    'turn',
    'climb',
    'chew',
    'handstand',
    'hit',
    'laugh',
    'kiss',
    'dr

In [132]:
url

'https://api-dev.lollllz.com'

In [5]:
headers = {'user_secret': secret, 'govteam_secret': gov_secret}
r = requests.get(f"{url}/task_metadata/problem_test_video_classification", headers=headers)
print(json.dumps(r.json(), indent=2))

{
  "task_metadata": {
    "adaptation_dataset": "hmdb",
    "adaptation_evaluation_metrics": [
      "accuracy"
    ],
    "adaptation_label_budget_full": [
      51,
      102,
      204,
      408,
      779,
      1486,
      2836,
      5412
    ],
    "adaptation_label_budget_sample": [
      51,
      102,
      204,
      408,
      485,
      577,
      686,
      816
    ],
    "base_dataset": "hmdb",
    "base_evaluation_metrics": [
      "accuracy"
    ],
    "base_label_budget_full": [
      51,
      102,
      204,
      408,
      779,
      1486,
      2836,
      5412
    ],
    "base_label_budget_sample": [
      51,
      102,
      204,
      408,
      485,
      577,
      686,
      816
    ],
    "problem_type": "video_classification",
    "task_id": "problem_test_video_classification",
    "whitelist": [
      "imagenet_1k"
    ]
  }
}


In [6]:
r.json()['task_metadata']['whitelist']

['imagenet_1k']

In [22]:
headers = {'user_secret': secret, 'govteam_secret': gov_secret}
r = requests.get(f"{url}/dataset_metadata/google_open_image", headers=headers)
print(json.dumps(r.json(), indent=2))

{
  "dataset_metadata": {
    "classes": [
      "Cat furniture",
      "Cheese",
      "Kettle",
      "Bicycle",
      "Chest of drawers",
      "Shorts",
      "Desk",
      "Fox",
      "Plant",
      "Footwear",
      "Bathroom accessory",
      "Flute",
      "Light switch",
      "Sea lion",
      "Whisk",
      "Flowerpot",
      "Treadmill",
      "Bicycle helmet",
      "Cheetah",
      "Glove",
      "Blue jay",
      "Grape",
      "Tent",
      "Microphone",
      "Clothing",
      "Moths and butterflies",
      "Radish",
      "Pancake",
      "Owl",
      "Furniture",
      "Banana",
      "Helmet",
      "Digital clock",
      "Strawberry",
      "Measuring cup",
      "Girl",
      "Candy",
      "Bat (Animal)",
      "Jaguar (Animal)",
      "Scissors",
      "Ipod",
      "Canoe",
      "Pencil sharpener",
      "Willow",
      "Human head",
      "Submarine sandwich",
      "Waste container",
      "Axe",
      "Spoon",
      "Lavender (Plant)",
      "Bell pepper",

In [8]:
from typing import List
def get_task_subset_by_type(subset_type: str, url: str) -> List[str]:
    """
    Helper function that returns the task ids in a list that match a specified
    problem type
    
    Params
    ------
    
    subset_type : str
        The task_type subset you want to get back
    """
    headers = {'user_secret': secret, 'govteam_secret': gov_secret}
    tasks = requests.get(f"{url}/list_tasks", headers=headers)
    task_list = tasks.json()['tasks']
    subset_tasks = []
    for _task in task_list:
        r = requests.get(f"{url}/task_metadata/{_task}", headers=headers)
        task_metadata = r.json()
        try:
            if task_metadata['task_metadata']['problem_type'] == subset_type:
                subset_tasks.append(_task)
        except Exception as e:
            print(_task)
            print(e)
    return subset_tasks

In [9]:
video_classification_tasks = get_task_subset_by_type('video_classification', url)
img_classification_tasks = get_task_subset_by_type('image_classification', url)

In [10]:
video_classification_tasks

['problem_test_video_classification']

In [26]:
img_classification_tasks

['6d5e1f85-5d8f-4cc9-8184-299db03713f4',
 'b01a6738-0b85-46c2-9318-16c3e2ef0f6d',
 'bbfadb2c-c7c3-4596-b548-3dd01a6d1d2c',
 'problem_test_image_classification']

In [12]:
headers = {'user_secret': secret, 'govteam_secret': gov_secret}

# This is a convenience for development purposes, IN EVAL ALWAYS USE `full`
data_type = 'sample' # can either be `sample` or `full`

# Option to customize the session name 
r = requests.post(f"{url}/auth/create_session", json={'session_name': 'testing', 'data_type': data_type, 
                                                      'task_id': 'problem_test_video_classification'},
                  headers=headers)
r.json()

{'session_token': 'rP42SIPvvFPYTAe56lbW'}

In [13]:
session_token = r.json()['session_token']

In [14]:
headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': gov_secret}

r = requests.get(f"{url}/session_status", headers=headers)
r.json()

{'Session_Status': {'active': 'In Progress',
  'budget_left_until_checkpoint': 51,
  'budget_used': 0,
  'current_dataset': {'classes': ['shoot_ball',
    'somersault',
    'stand',
    'smile',
    'pour',
    'climb_stairs',
    'flic_flac',
    'situp',
    'golf',
    'pick',
    'draw_sword',
    'smoke',
    'clap',
    'walk',
    'dribble',
    'talk',
    'pushup',
    'fall_floor',
    'catch',
    'sword',
    'kick_ball',
    'cartwheel',
    'punch',
    'sword_exercise',
    'shoot_bow',
    'brush_hair',
    'push',
    'wave',
    'eat',
    'hug',
    'swing_baseball',
    'ride_horse',
    'throw',
    'run',
    'sit',
    'pullup',
    'dive',
    'turn',
    'climb',
    'chew',
    'handstand',
    'hit',
    'laugh',
    'kiss',
    'drink',
    'ride_bike',
    'shake_hands',
    'kick',
    'fencing',
    'jump',
    'shoot_gun'],
   'dataset_type': 'video_classification',
   'license_citation': 'Kuehne, Hildegard, Hueihan Jhuang, Estíbaliz Garrote, Tomaso Pogg

In [32]:
#This shows the active sessions for your team
headers_session = {'user_secret': secret, 'govteam_secret': gov_secret}

r = requests.get(f"{url}/list_active_sessions", headers=headers_session)
active_sessions = r.json()
active_sessions

{'active_sessions': ['07HRMjQx2FhKg5ePG9ES',
  '0axbLnk9q8Edbe8Uto6B',
  '2hiyDBNkIvjSxfsAJiWE',
  '31IT4UtOCge9Z4588186',
  '3LnrRHLKTcv39xguM7SM',
  '3TuOcOxeOlqBRWntguk3',
  '44RHjL8nBADJqHRwdyaF',
  '4l47oy8FJiYfvN4bOb8U',
  '5Jca2i4tNkZtW0XJQX6y',
  '5WmYxgcTdXUZbGxOQVHT',
  '5gM8ximMNgsogk9md5NX',
  '5x1qcLiikkWPtdKO9BpM',
  '6UmGVewydWxxgVBScAQp',
  '828JXK1wA7BFLYjWDLo8',
  '8UtrWLtmswwDQvv5VhaK',
  '8zjJi4Xoaqf41er56neT',
  '9R0iWvrberOt3DiCUdcw',
  'Ba3Co8fSrhH1x8TuygA4',
  'BtRQMbs75CRyd044cIRo',
  'CHJuyAawICW1xeC9uOXQ',
  'Dqa1NuHE4ik7rp7Ns1nj',
  'EafoOLXGie384F4slQIO',
  'EipSnGK0q8WeFdyyBtH8',
  'FEve9SZoGkKwkgD17RUs',
  'Gj5N3bfuX6TXVZ7eKaZy',
  'J81dKtsVxLqsQbpvhouP',
  'JjwONCZK0GnI3nQgwRMg',
  'K5xPNejwR7FQInPFdwi3',
  'LY7rsV9S0bohJQQWQfIQ',
  'MnCVhHzptNsQs8w3X3w4',
  'NHcd7GvqPKQvUe9TGrGU',
  'NMFa4DG6i7cautR3aZvX',
  'Ota4oh4wSha0ZxOzHWJl',
  'TOxPWTXjFqeaYduMWz7Z',
  'UIM9qlksZjJwAJDYUtsl',
  'UdsuSn03PL07nKGPQaec',
  'UpCLBHcG3EEHSPl9cDvV',
  'aTDsEM7mz5uUoAH6

In [15]:
headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': gov_secret}

r = requests.get(f"{url}/seed_labels", headers=headers)
r.json()

{'Labels': [{'class': 'brush_hair',
   'end_frame': 80711,
   'id': 881,
   'start_frame': 80326,
   'video_id': 881},
  {'class': 'cartwheel',
   'end_frame': 125969,
   'id': 1366,
   'start_frame': 125892,
   'video_id': 1366},
  {'class': 'catch',
   'end_frame': 8277,
   'id': 93,
   'start_frame': 8242,
   'video_id': 93},
  {'class': 'chew',
   'end_frame': 49439,
   'id': 561,
   'start_frame': 49341,
   'video_id': 561},
  {'class': 'clap',
   'end_frame': 26696,
   'id': 294,
   'start_frame': 26653,
   'video_id': 294},
  {'class': 'climb',
   'end_frame': 60370,
   'id': 679,
   'start_frame': 60278,
   'video_id': 679},
  {'class': 'climb_stairs',
   'end_frame': 98319,
   'id': 1064,
   'start_frame': 98245,
   'video_id': 1064},
  {'class': 'dive',
   'end_frame': 131732,
   'id': 1428,
   'start_frame': 131702,
   'video_id': 1428},
  {'class': 'draw_sword',
   'end_frame': 32416,
   'id': 360,
   'start_frame': 32363,
   'video_id': 360},
  {'class': 'dribble',
   'end

In [16]:
headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': gov_secret}

r = requests.get(f"{url}/session_status", headers=headers)
r.json()

{'Session_Status': {'active': 'In Progress',
  'budget_left_until_checkpoint': 0,
  'budget_used': 51,
  'current_dataset': {'classes': ['shoot_ball',
    'somersault',
    'stand',
    'smile',
    'pour',
    'climb_stairs',
    'flic_flac',
    'situp',
    'golf',
    'pick',
    'draw_sword',
    'smoke',
    'clap',
    'walk',
    'dribble',
    'talk',
    'pushup',
    'fall_floor',
    'catch',
    'sword',
    'kick_ball',
    'cartwheel',
    'punch',
    'sword_exercise',
    'shoot_bow',
    'brush_hair',
    'push',
    'wave',
    'eat',
    'hug',
    'swing_baseball',
    'ride_horse',
    'throw',
    'run',
    'sit',
    'pullup',
    'dive',
    'turn',
    'climb',
    'chew',
    'handstand',
    'hit',
    'laugh',
    'kiss',
    'drink',
    'ride_bike',
    'shake_hands',
    'kick',
    'fencing',
    'jump',
    'shoot_gun'],
   'dataset_type': 'video_classification',
   'license_citation': 'Kuehne, Hildegard, Hueihan Jhuang, Estíbaliz Garrote, Tomaso Pogg

In [39]:
r.json()['Session_Status']['budget_left_until_checkpoint']

0

In [17]:
def get_test_images_and_classes(dataset_path: Path, session_token: str, data_type: str='sample') -> Tuple[List[str],List[str]]:
    """
    Helper method to dynamically get the test labels and give us the possible classes that can be submitted
    for the current dataset
    
    Params
    ------
    
    dataset_path : Path
        The path to the `development` dataset downloads
    
    session_token : str
        Your current session token so that we can look up the current session metadata
    
    data_type: str
        Indicates whether you are using the `sample` or `full` dataset. 
    Returns
    -------
    
    Tuple[List[str], List[str]]
        The list of test image ids needed to submit a prediction and the list of class names that you can predict against
    """
    # Then we can just reference our current metadata to get our dataset name and use that in the path
    headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': os.environ.get('GOVTEAM_SECRET')}
    r = requests.get(f"{url}/session_status", headers=headers)
    current_dataset = r.json()['Session_Status']['current_dataset']
    current_dataset_name = current_dataset['name']
    current_dataset_classes = current_dataset['classes']

    test_imgs_dir = dataset_path.joinpath(f"{current_dataset_name}/{current_dataset_name}_{data_type}/test")
    test_imgs = [f.name for f in test_imgs_dir.iterdir() if f.is_file()]
    return test_imgs, current_dataset_classes

def generate_random_predictions_on_test_set(test_imgs: List[str], current_dataset_classes: List[str]) -> pd.DataFrame:
    """
    Generates a prediction dataframe for image classification based on random sampling from our available classes
    """
    rand_lbls = [str(random.choice(current_dataset_classes)) for _ in range(len(test_imgs))]
    df = pd.DataFrame({'id': test_imgs, 'class': rand_lbls})
    return df

In [18]:
DATASETS_PATH = Path.home().joinpath('lwll_datasets/development')

In [19]:
test_imgs, current_dataset_classes = get_test_images_and_classes(DATASETS_PATH, session_token)

FileNotFoundError: [Errno 2] No such file or directory: '/home/ubuntu/lwll_datasets/development/hmdb/hmdb_sample/test'

In [52]:
df = generate_random_predictions_on_test_set(test_imgs, current_dataset_classes)

In [53]:
df

Unnamed: 0,id,class
0,4046.png,8
1,1169.png,1
2,3505.png,3
3,9271.png,1
4,1753.png,1
...,...,...
995,8127.png,4
996,3822.png,4
997,4874.png,4
998,3425.png,2


In [56]:
headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': gov_secret}

r = requests.post(f"{url}/submit_predictions", json={'predictions': df.to_dict()}, headers=headers)
r.json()

{'Session_Status': {'active': 'In Progress',
  'budget_left_until_checkpoint': 10,
  'budget_used': 10,
  'current_dataset': {'classes': ['0',
    '1',
    '2',
    '3',
    '4',
    '5',
    '6',
    '7',
    '8',
    '9'],
   'dataset_type': 'image_classification',
   'license_citation': '[LeCun et al., 1998a] Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based learning applied to document recognition." Proceedings of the IEEE, 86(11):2278-2324, November 1998. http://yann.lecun.com/exdb/publis/index.html#lecun-98',
   'license_link': 'http://yann.lecun.com/exdb/mnist/',
   'license_requirements': 'None',
   'name': 'mnist',
   'number_of_channels': 1,
   'number_of_classes': 10,
   'number_of_samples_test': 1000,
   'number_of_samples_train': 160,
   'uid': 'mnist'},
  'current_label_budget_stages': [10, 20, 40, 80, 95, 113, 135, 160],
  'date_created': 1615924043000,
  'date_last_interacted': 1615924481881,
  'pair_stage': 'base',
  'session_name': 'testing',
  'task_id'

In [57]:
headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': os.environ.get('GOVTEAM_SECRET')}

r = requests.get(f"{url}/seed_labels", headers=headers)
r.json()

{'Labels': [{'class': '0', 'id': '33665.png'},
  {'class': '1', 'id': '6371.png'},
  {'class': '2', 'id': '45055.png'},
  {'class': '3', 'id': '40914.png'},
  {'class': '4', 'id': '6733.png'},
  {'class': '5', 'id': '16400.png'},
  {'class': '6', 'id': '14405.png'},
  {'class': '7', 'id': '55078.png'},
  {'class': '8', 'id': '17808.png'},
  {'class': '9', 'id': '10944.png'}]}

In [58]:
headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': os.environ.get('GOVTEAM_SECRET')}

r = requests.get(f"{url}/session_status", headers=headers)
r.json()

{'Session_Status': {'active': 'In Progress',
  'budget_left_until_checkpoint': 0,
  'budget_used': 20,
  'current_dataset': {'classes': ['0',
    '1',
    '2',
    '3',
    '4',
    '5',
    '6',
    '7',
    '8',
    '9'],
   'dataset_type': 'image_classification',
   'license_citation': '[LeCun et al., 1998a] Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based learning applied to document recognition." Proceedings of the IEEE, 86(11):2278-2324, November 1998. http://yann.lecun.com/exdb/publis/index.html#lecun-98',
   'license_link': 'http://yann.lecun.com/exdb/mnist/',
   'license_requirements': 'None',
   'name': 'mnist',
   'number_of_channels': 1,
   'number_of_classes': 10,
   'number_of_samples_test': 1000,
   'number_of_samples_train': 160,
   'uid': 'mnist'},
  'current_label_budget_stages': [10, 20, 40, 80, 95, 113, 135, 160],
  'date_created': 1615924043000,
  'date_last_interacted': 1615925773325,
  'pair_stage': 'base',
  'session_name': 'testing',
  'task_id':

In [59]:
test_imgs, current_dataset_classes = get_test_images_and_classes(DATASETS_PATH, session_token)
df = generate_random_predictions_on_test_set(test_imgs, current_dataset_classes)

In [60]:
headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': os.environ.get('GOVTEAM_SECRET')}

r = requests.post(f"{url}/submit_predictions", json={'predictions': df.to_dict()}, headers=headers)
r.json()

{'Session_Status': {'active': 'In Progress',
  'budget_left_until_checkpoint': 20,
  'budget_used': 20,
  'current_dataset': {'classes': ['0',
    '1',
    '2',
    '3',
    '4',
    '5',
    '6',
    '7',
    '8',
    '9'],
   'dataset_type': 'image_classification',
   'license_citation': '[LeCun et al., 1998a] Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based learning applied to document recognition." Proceedings of the IEEE, 86(11):2278-2324, November 1998. http://yann.lecun.com/exdb/publis/index.html#lecun-98',
   'license_link': 'http://yann.lecun.com/exdb/mnist/',
   'license_requirements': 'None',
   'name': 'mnist',
   'number_of_channels': 1,
   'number_of_classes': 10,
   'number_of_samples_test': 1000,
   'number_of_samples_train': 160,
   'uid': 'mnist'},
  'current_label_budget_stages': [10, 20, 40, 80, 95, 113, 135, 160],
  'date_created': 1615924043000,
  'date_last_interacted': 1615925773325,
  'pair_stage': 'base',
  'session_name': 'testing',
  'task_id'

In [61]:
headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': os.environ.get('GOVTEAM_SECRET')}

r = requests.get(f"{url}/seed_labels", headers=headers)
r.json()

{'Labels': [{'class': '0', 'id': '49011.png'},
  {'class': '0', 'id': '40303.png'},
  {'class': '1', 'id': '30274.png'},
  {'class': '1', 'id': '9251.png'},
  {'class': '2', 'id': '38652.png'},
  {'class': '2', 'id': '1374.png'},
  {'class': '3', 'id': '5412.png'},
  {'class': '3', 'id': '49882.png'},
  {'class': '4', 'id': '32895.png'},
  {'class': '4', 'id': '45778.png'},
  {'class': '5', 'id': '41243.png'},
  {'class': '5', 'id': '31863.png'},
  {'class': '6', 'id': '2300.png'},
  {'class': '6', 'id': '38643.png'},
  {'class': '7', 'id': '58671.png'},
  {'class': '7', 'id': '9538.png'},
  {'class': '8', 'id': '22166.png'},
  {'class': '8', 'id': '43944.png'},
  {'class': '9', 'id': '31275.png'},
  {'class': '9', 'id': '20434.png'}]}

In [62]:
headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': os.environ.get('GOVTEAM_SECRET')}

r = requests.get(f"{url}/session_status", headers=headers)
r.json()

{'Session_Status': {'active': 'In Progress',
  'budget_left_until_checkpoint': 0,
  'budget_used': 40,
  'current_dataset': {'classes': ['0',
    '1',
    '2',
    '3',
    '4',
    '5',
    '6',
    '7',
    '8',
    '9'],
   'dataset_type': 'image_classification',
   'license_citation': '[LeCun et al., 1998a] Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based learning applied to document recognition." Proceedings of the IEEE, 86(11):2278-2324, November 1998. http://yann.lecun.com/exdb/publis/index.html#lecun-98',
   'license_link': 'http://yann.lecun.com/exdb/mnist/',
   'license_requirements': 'None',
   'name': 'mnist',
   'number_of_channels': 1,
   'number_of_classes': 10,
   'number_of_samples_test': 1000,
   'number_of_samples_train': 160,
   'uid': 'mnist'},
  'current_label_budget_stages': [10, 20, 40, 80, 95, 113, 135, 160],
  'date_created': 1615924043000,
  'date_last_interacted': 1615925885014,
  'pair_stage': 'base',
  'session_name': 'testing',
  'task_id':

In [63]:
test_imgs, current_dataset_classes = get_test_images_and_classes(DATASETS_PATH, session_token)
df = generate_random_predictions_on_test_set(test_imgs, current_dataset_classes)

In [64]:
headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': os.environ.get('GOVTEAM_SECRET')}

r = requests.post(f"{url}/submit_predictions", json={'predictions': df.to_dict()}, headers=headers)
r.json()

{'Session_Status': {'active': 'In Progress',
  'budget_left_until_checkpoint': 40,
  'budget_used': 40,
  'current_dataset': {'classes': ['0',
    '1',
    '2',
    '3',
    '4',
    '5',
    '6',
    '7',
    '8',
    '9'],
   'dataset_type': 'image_classification',
   'license_citation': '[LeCun et al., 1998a] Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based learning applied to document recognition." Proceedings of the IEEE, 86(11):2278-2324, November 1998. http://yann.lecun.com/exdb/publis/index.html#lecun-98',
   'license_link': 'http://yann.lecun.com/exdb/mnist/',
   'license_requirements': 'None',
   'name': 'mnist',
   'number_of_channels': 1,
   'number_of_classes': 10,
   'number_of_samples_test': 1000,
   'number_of_samples_train': 160,
   'uid': 'mnist'},
  'current_label_budget_stages': [10, 20, 40, 80, 95, 113, 135, 160],
  'date_created': 1615924043000,
  'date_last_interacted': 1615925885014,
  'pair_stage': 'base',
  'session_name': 'testing',
  'task_id'

In [65]:
def get_random_labels_from_train_dataset(dataset_path: Path, session_token: str, n: int=None, data_type: str='sample') -> List[str]:
    """
    Helper function to get a random `n` image ids from our train dataset to request labels for
    from the api
    
    Params
    ------
    
    dataset_path : Path
        The path to the `development` dataset downloads
    
    session_token : str
        Your current session token so that we can look up the current session metadata
    data_type: str
        Indicates whether you are using the `sample` or `full` size dataset
    
    Returns
    -------
    
    List[str]
        A list of n unique image ids for the current session dataset
        
    """
    headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': gov_secret}
    r = requests.get(f"{url}/session_status", headers=headers)
    current_dataset = r.json()['Session_Status']['current_dataset']
    current_dataset_name = current_dataset['name']
    budget_left = r.json()['Session_Status']['budget_left_until_checkpoint']
    print(f"budget_left is {budget_left}")
    if not n:
        n = budget_left
    
    train_imgs_dir = dataset_path.joinpath(f"{current_dataset_name}/{current_dataset_name}_{data_type}/train")
    train_imgs = [f.name for f in train_imgs_dir.iterdir() if f.is_file()]
    print(f"train_imgs: {train_imgs[0:4]}")
    
    random_ids = random.sample(train_imgs, k=n)
    return random_ids

In [66]:
images_to_be_labeled = get_random_labels_from_train_dataset(DATASETS_PATH, session_token, n=10)
images_to_be_labeled

budget_left is 40
train_imgs: ['5238.png', '42470.png', '2944.png', '16089.png']


['14160.png',
 '12840.png',
 '15889.png',
 '22010.png',
 '39328.png',
 '52250.png',
 '38606.png',
 '6371.png',
 '49069.png',
 '45703.png']

In [67]:
headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': gov_secret}

query = {
    'example_ids': images_to_be_labeled
}

r = requests.post(f"{url}/query_labels", json=query, headers=headers)
r.json()

{'Labels': [{'class': '1', 'id': '6371.png'},
  {'class': '2', 'id': '22010.png'},
  {'class': '1', 'id': '39328.png'},
  {'class': '7', 'id': '49069.png'},
  {'class': '8', 'id': '38606.png'},
  {'class': '1', 'id': '45703.png'},
  {'class': '0', 'id': '14160.png'},
  {'class': '8', 'id': '52250.png'},
  {'class': '9', 'id': '12840.png'},
  {'class': '3', 'id': '15889.png'}],
 'Session_Status': {'active': 'In Progress',
  'budget_left_until_checkpoint': 30,
  'budget_used': 50,
  'current_dataset': {'classes': ['0',
    '1',
    '2',
    '3',
    '4',
    '5',
    '6',
    '7',
    '8',
    '9'],
   'dataset_type': 'image_classification',
   'license_citation': '[LeCun et al., 1998a] Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based learning applied to document recognition." Proceedings of the IEEE, 86(11):2278-2324, November 1998. http://yann.lecun.com/exdb/publis/index.html#lecun-98',
   'license_link': 'http://yann.lecun.com/exdb/mnist/',
   'license_requirements': 'N

In [68]:
images_to_be_labeled = get_random_labels_from_train_dataset(DATASETS_PATH, session_token, n=30)
images_to_be_labeled

budget_left is 30
train_imgs: ['5238.png', '42470.png', '2944.png', '16089.png']


['35102.png',
 '18742.png',
 '29804.png',
 '3182.png',
 '29720.png',
 '13624.png',
 '18451.png',
 '1380.png',
 '5775.png',
 '15654.png',
 '29423.png',
 '44395.png',
 '59668.png',
 '46864.png',
 '52098.png',
 '2557.png',
 '4922.png',
 '41956.png',
 '33443.png',
 '58374.png',
 '49105.png',
 '58451.png',
 '42697.png',
 '39245.png',
 '36073.png',
 '5684.png',
 '23929.png',
 '57242.png',
 '50632.png',
 '34989.png']

In [69]:
headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': gov_secret}

query = {
    'example_ids': images_to_be_labeled
}

r = requests.post(f"{url}/query_labels", json=query, headers=headers)
r.json()

{'Labels': [{'class': '8', 'id': '46864.png'},
  {'class': '3', 'id': '52098.png'},
  {'class': '0', 'id': '34989.png'},
  {'class': '0', 'id': '2557.png'},
  {'class': '1', 'id': '18451.png'},
  {'class': '9', 'id': '59668.png'},
  {'class': '4', 'id': '39245.png'},
  {'class': '2', 'id': '44395.png'},
  {'class': '3', 'id': '23929.png'},
  {'class': '1', 'id': '58451.png'},
  {'class': '4', 'id': '49105.png'},
  {'class': '4', 'id': '1380.png'},
  {'class': '6', 'id': '5684.png'},
  {'class': '7', 'id': '35102.png'},
  {'class': '4', 'id': '42697.png'},
  {'class': '7', 'id': '29423.png'},
  {'class': '5', 'id': '29804.png'},
  {'class': '9', 'id': '33443.png'},
  {'class': '5', 'id': '57242.png'},
  {'class': '8', 'id': '58374.png'},
  {'class': '1', 'id': '50632.png'},
  {'class': '2', 'id': '29720.png'},
  {'class': '4', 'id': '3182.png'},
  {'class': '8', 'id': '5775.png'},
  {'class': '3', 'id': '18742.png'},
  {'class': '6', 'id': '4922.png'},
  {'class': '1', 'id': '41956.png'

In [71]:
df = generate_random_predictions_on_test_set(test_imgs, current_dataset_classes)
headers = {'user_secret': secret, 'session_token': session_token, 'govteam_secret': gov_secret}

r = requests.post(f"{url}/submit_predictions", json={'predictions': df.to_dict()}, headers=headers)
r.json()

{'Session_Status': {'active': 'In Progress',
  'budget_left_until_checkpoint': 18,
  'budget_used': 95,
  'current_dataset': {'classes': ['0',
    '1',
    '2',
    '3',
    '4',
    '5',
    '6',
    '7',
    '8',
    '9'],
   'dataset_type': 'image_classification',
   'license_citation': '[LeCun et al., 1998a] Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based learning applied to document recognition." Proceedings of the IEEE, 86(11):2278-2324, November 1998. http://yann.lecun.com/exdb/publis/index.html#lecun-98',
   'license_link': 'http://yann.lecun.com/exdb/mnist/',
   'license_requirements': 'None',
   'name': 'mnist',
   'number_of_channels': 1,
   'number_of_classes': 10,
   'number_of_samples_test': 1000,
   'number_of_samples_train': 160,
   'uid': 'mnist'},
  'current_label_budget_stages': [10, 20, 40, 80, 95, 113, 135, 160],
  'date_created': 1615924043000,
  'date_last_interacted': 1615926541255,
  'pair_stage': 'base',
  'session_name': 'testing',
  'task_id'

In [16]:
class JPL:
    """
    A class to interact with JPL-like APIs.
    """
    def __init__(self, api_url, team_secret, gov_team_secret, dataset_type):
        """
        Create a new JPL object.
        """

        self.team_secret = team_secret
        self.gov_team_secret = gov_team_secret
        self.url = api_url 
        self.session_token = ''
        self.data_type = dataset_type


    def get_available_tasks(self, problem_type):
        """
        Get all available tasks.
        :return: A list of tasks (problems)
        """
        headers = {'user_secret': self.team_secret,
                   'govteam_secret': self.gov_team_secret
                   }
        r = requests.get(self.url + "/list_tasks", headers=headers)
        task_list = r.json()['tasks']
        #print(task_list)

        subset_tasks = []
        for _task in task_list:
            r = requests.get(self.url+"/task_metadata/"+_task, headers=headers)
            task_metadata = r.json()
            #print(task_metadata)
            if task_metadata['task_metadata']['problem_type'] == problem_type:
                subset_tasks.append(_task)
        return subset_tasks


In [17]:
jpl = JPL(url, secret, gov_secret, dataset_type='sample')

In [18]:
jpl.get_available_tasks('video_classification')

['problem_test_video_classification']