In [1]:
import os
from glob import glob
import pandas as pd
from tqdm import tqdm
import torch
import numpy as np
import shutil


## Visual (Behavior) Pipeline

### Extract Frame Data

In [2]:
from deepjuice.procedural import pandas_query as pdq
from deepjuice.procedural import pandas_query
from deepjuice.model_zoo.options import get_deepjuice_model

from deepjuice.procedural.datasets import get_data_loader
from deepjuice.extraction import FeatureExtractor
from deepjuice.procedural.cv_ops import CVIndexer
from deepjuice.alignment import TorchRidgeGCV
from deepjuice.reduction import compute_srp

from deepjuice.procedural import pandas_query as pdq
from deepjuice.alignment import compute_score

  from .autonotebook import tqdm as notebook_tqdm


### Build the Benchmark

In [5]:
def get_nearest_multiple(a, b):
    # Find the nearest multiple of b to a
    nearest_multiple = round(a / b) * b
    if nearest_multiple % 2 != 0:
        if (nearest_multiple - a) < (a - (nearest_multiple - b)):
            nearest_multiple += b
        else:
            nearest_multiple -= b
            
    return nearest_multiple # integer space

In [6]:
source_video_dir = '../data/raw/videos'
frame_dir = '../data/interim/frame_set/video_frames'
fresh_start = True # delete + remake
#fresh_start = True # comment override

if os.path.exists(frame_dir) and fresh_start:
    shutil.rmtree(frame_dir) # delete dir

key_frames, target_index = [0, 22, 45, 67, 89], None
event_data = '../data/raw/annotations/annotations.csv'
benchmark = visual_events_benchmark(event_data, source_video_dir, frame_dir,
                                    key_frames, target_index)

Processing Event Videos: 100%|██████████| 250/250 [01:40<00:00,  2.50it/s]


In [7]:
def run_visual_event_pipeline(model_uid, benchmark, device, **kwargs):

    model, preprocess = get_deepjuice_model(model_uid)
    
    response_data = benchmark['response_data']
    image_paths = benchmark['image_paths']
    group_index = benchmark['group_indices']
    
    target_names = response_data.columns.tolist()
    n_inputs = len(response_data) # unique total

    dataloader = get_data_loader(image_paths, preprocess)
    extractor_desc = 'Global Progress (Extractor Batch)'
    
    scoresheet_list = [] # append scoresheets by layer / metric
    method_info = {'regression': {'encoding_model': 'RidgeCV'},
                   'cvfunction': {'method': '10-iter-5-fold'}}

    sample_group_index = list(group_index.values())[0]
    average_over_nmany = len(sample_group_index)

    stimulus_info = {'frame_set': None}

    if average_over_nmany >= 1:
        tensor_fn = None # pass
        
        stimulus_info['frame_set'] = 'middle_frame'

    if average_over_nmany >= 2:
        skip = average_over_nmany

        stimulus_info['frame_set'] = f'average_of_{skip}'

        def tensor_fn(tensor):
            return moving_grouped_average(tensor, skip)

        batch_size = dataloader.batch_size
        if kwargs.get('batch_size', None):
            batch_size = kwargs.pop('batch_size')
        
        batch_size = get_nearest_multiple(batch_size, skip)
        
        dataloader = get_data_loader(image_paths, preprocess,
                                     batch_size = batch_size)

    method_info['stimulus_set'] = stimulus_info.copy()

    extractor = FeatureExtractor(model, dataloader, 
                                 tensor_fn=tensor_fn,
                                 n_inputs=n_inputs,
                                 initial_report=False)
    
    extractor.modify_settings(flatten=True, batch_progress=True)
        
    cv_indexer = CVIndexer(200, iterations=10, random_state=0,
                           iterable_format='list')
    
    cv_iter_idx = cv_indexer.kfold_split(kfolds=5) # get kfolds

    global_srp_matrix = extractor.get_global_srp_matrix()
    global_srp_on_gpu = global_srp_matrix.clone().to(device)

    y_actual = torch.from_numpy(response_data.to_numpy())
    y_actual = y_actual.to(torch.float32).to(device) 
    y_cvsplit, y_heldout = y_actual[:200], y_actual[200:]
    
    regression = TorchRidgeGCV(alphas=np.logspace(-1,5,7).tolist(), 
                               device=device, scale_X=True)

    shape_report = kwargs.pop('print_shapes', False) # for debug

    scoresheet_list = [] # fill with results from each feature map

    for batch_index, feature_maps in enumerate(tqdm(extractor, desc=extractor_desc)):

        feature_map_iterator = tqdm(feature_maps.items(), desc='Social Event Annotation (Layer)')

        for layer_index, (model_layer, feature_map) in enumerate(feature_map_iterator):
            feature_map_info = {'model_uid': model_uid, 'model_layer': model_layer, 
                                'model_layer_index': layer_index+1}


            try: # to run the sparse random projection on the GPU ...
                srp_kwargs = {'device': device, 'srp_matrix': global_srp_on_gpu}
                feature_map = compute_srp(feature_map, **srp_kwargs)

            except Exception: # run SRP with CPU
                feature_map = feature_map.to('cpu')
                clean_and_sweep() # the CUDA cache
                srp_kwargs = {'device': 'cpu', 'srp_matrix': global_srp_matrix}
                feature_map = compute_srp(feature_map, **srp_kwargs)

            feature_map = feature_map.squeeze().to(torch.float32).to(device)
        
            for cv_iter, kfold_split_idx in enumerate(cv_iter_idx):
                y_pred = torch.ones(y_cvsplit.shape, device=device)
                
                for kfold, cv_split_idx in kfold_split_idx.items():
                    X, y = {}, {} # fill with split + cv_idx
                    for split, cv_idx in cv_split_idx.items():
                        X[split] = feature_map[cv_idx, :]
                        y[split] = y_cvsplit[cv_idx]

                    if shape_report:
                        print(list(X['train'].shape), list(X['test'].shape),
                              list(y['train'].shape), list(y['test'].shape))
        
                    regression.fit(X['train'], y['train'])
                    
                    y_preds = {'train': regression.cv_y_pred_, 
                               'test': regression.predict(X['test'])}
        
                    y_pred[cv_split_idx['test']] = y_preds['test']
        
                for score_type in ['pearsonr']:
                    y_true = y_cvsplit.clone()
                    
                    scores = compute_score(y_true, y_pred, score_type)
                    
                    for target_index, target_name in enumerate(target_names):
                        score_val = scores[target_index].item()
                        
                        scoresheet = {**feature_map_info,
                                      'target': target_name,
                                      'cv_iter': cv_iter,
                                      'score': score_val}

                        for info_type in method_info:
                            scoresheet = {**scoresheet, **method_info[info_type]}
        
                        scoresheet_list.append(scoresheet)

    return pd.DataFrame(scoresheet_list)

In [8]:
model_uid, device = 'torchvision_alexnet_imagenet1k_v1', 'cuda:0'
results = run_visual_event_pipeline(model_uid, benchmark, device)

Extracting sample feature_maps with torchinfo.
Constructing global SRP matrix for Extractor.


Global Progress (Extractor Batch):   0%|          | 0/1 [00:00<?, ?it/s]
Feature Extraction (DataLoader):   0%|          | 0/18 [00:00<?, ?it/s][A
Feature Extraction (DataLoader):   6%|▌         | 1/18 [00:00<00:15,  1.13it/s][A
Feature Extraction (DataLoader):  11%|█         | 2/18 [00:01<00:13,  1.18it/s][A
Feature Extraction (DataLoader):  17%|█▋        | 3/18 [00:02<00:12,  1.19it/s][A
Feature Extraction (DataLoader):  22%|██▏       | 4/18 [00:03<00:11,  1.20it/s][A
Feature Extraction (DataLoader):  28%|██▊       | 5/18 [00:04<00:10,  1.20it/s][A
Feature Extraction (DataLoader):  33%|███▎      | 6/18 [00:05<00:10,  1.19it/s][A
Feature Extraction (DataLoader):  39%|███▉      | 7/18 [00:05<00:09,  1.19it/s][A
Feature Extraction (DataLoader):  44%|████▍     | 8/18 [00:06<00:08,  1.19it/s][A
Feature Extraction (DataLoader):  50%|█████     | 9/18 [00:07<00:07,  1.20it/s][A
Feature Extraction (DataLoader):  56%|█████▌    | 10/18 [00:08<00:06,  1.19it/s][A
Feature Extraction (D

In [9]:
(results.query(pdq('model_layer', 'Linear-2-15'))
 .groupby(['model_uid','model_layer','target'])
 ['score'].mean().reset_index().sort_values(by='score'))

Unnamed: 0,model_uid,model_layer,target,score
8,torchvision_alexnet_imagenet1k_v1,Linear-2-15,joint_action,-0.093655
3,torchvision_alexnet_imagenet1k_v1,Linear-2-15,cooperation,0.094823
6,torchvision_alexnet_imagenet1k_v1,Linear-2-15,facingness,0.099575
1,torchvision_alexnet_imagenet1k_v1,Linear-2-15,arousal,0.130174
4,torchvision_alexnet_imagenet1k_v1,Linear-2-15,dominance,0.133808
2,torchvision_alexnet_imagenet1k_v1,Linear-2-15,communication,0.229059
9,torchvision_alexnet_imagenet1k_v1,Linear-2-15,transitivity,0.22954
10,torchvision_alexnet_imagenet1k_v1,Linear-2-15,valence,0.23993
7,torchvision_alexnet_imagenet1k_v1,Linear-2-15,intimacy,0.456491
0,torchvision_alexnet_imagenet1k_v1,Linear-2-15,agent_distance,0.538364
