# Extracting Features with ViT
- https://huggingface.co/docs/transformers/model_doc/vit#vision-transformer-vit
- https://arxiv.org/abs/2010.11929

In [3]:
import os
import torch
import csv

import pandas as pd
import numpy as np
import torchvision.transforms as transforms

from PIL import Image
from transformers import ViTImageProcessor, ViTFeatureExtractor, ViTModel
from sklearn.model_selection import train_test_split, GridSearchCV

2023-04-24 20:17:18.277200: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
BASE = '/Users/brinkley97/Documents/development/'
CLASS_PATH = 'classes/csci_535_multimodal_probabilistic_learning/'
DATASET_PATH = 'datasets/'

In [5]:
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')
model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')

In [44]:
def extract_features(frames_in_crema_d):
    """
    Parameters:
    faces_in_specific_folder_path -- py str (of all faces in a specific folder)
    faces_file_names -- py list (of all the file names in a specific folder)
    
    Return:
    extracted_features -- py
    """
    
    extracted_features_per_video = torch.empty((0, 197, 768))
    for path_to_specific_face in frames_in_crema_d:
        
        specific_frame = Image.open(path_to_specific_face)
        # print(np.shape(specific_frame))

        '''
        Start ViT
        '''

        inputs = processor(images=specific_frame, return_tensors="pt")
        # print(inputs)
        outputs = model(**inputs)

        # last_hidden_states == representation (1 and 2 with GradCam)
        last_hidden_states = outputs.last_hidden_state
        extracted_features_per_video = torch.vstack((extracted_features_per_video, last_hidden_states))
        
        
    return extracted_features_per_video

# CREMA-D

In [7]:
# CREMA_D_PATH = BASE + CLASS_PATH + DATASET_PATH + 'git_lfs/CREMA-D/sample_VideoFlash2/all_faces/'
# CREMA_D_PATH

'/Users/brinkley97/Documents/development/classes/csci_535_multimodal_probabilistic_learning/datasets/git_lfs/CREMA-D/sample_VideoFlash2/all_faces/'

In [10]:
# specific_emotion_folder = os.listdir(CREMA_D_PATH)
# specific_emotion_folder

['.DS_Store',
 '1001_DFA_HAP_XX_frames_to_cropped_face',
 '1001_DFA_NEU_XX_frames_to_cropped_face',
 '1001_DFA_SAD_XX_frames_to_cropped_face',
 '1001_DFA_DIS_XX_frames_to_cropped_face',
 '1001_DFA_ANG_XX_frames_to_cropped_face',
 '1001_DFA_FEA_XX_frames_to_cropped_face']

In [26]:
# specific_emotion_path = CREMA_D_PATH + specific_emotion_folder[2]
# specific_emotion_with_frames = os.listdir(specific_emotion_path)
# # specific_emotion_with_frames

In [32]:
# path_for_specific_emotion_with_frames = specific_emotion_path + "/" + specific_emotion_with_frames[7]
# path_for_specific_emotion_with_frames

'/Users/brinkley97/Documents/development/classes/csci_535_multimodal_probabilistic_learning/datasets/git_lfs/CREMA-D/sample_VideoFlash2/all_faces/1001_DFA_NEU_XX_frames_to_cropped_face/frame50.jpg'

In [None]:
# extract_features(faces_in_specific_folder_path, faces_file_names)

In [1]:
# folder_of_fe_crema_d, fe_crema_d = get_video_path_for_feature_extraction(CREMA_D_PATH)

In [35]:
def get_video_path_for_feature_extraction(path_to_faces):
# def get_video_path_for_feature_extraction(path_to_faces, sub_set_path):
    """
    
    Parameters:
    path_to_faces -- str (of a single path to all saved cropped faces)
    sub_set_path -- str (of specific sub set of the entire dataset to use)
    
    Function calls: 
    extract_features
    
    Return
    folder, features -- tuple (of the folder the extracted features are from and the extracted features)
    """
    list_folders_with_faces_name = []
    store_features_from_faces = []
    
    count_number_files_in_dir = 0 
    face_folder_files = os.listdir(path_to_faces)
    
    build_file_path = []
    
    # for face_folder_file_idx in range(len(sub_set_path)):
        # specific_face_folder = sub_set_path[face_folder_file_idx]
        # print(specific_face_folder)
        
    for face_folder_file_idx in range(len(face_folder_files)):
        specific_face_folder = face_folder_files[face_folder_file_idx]
        path_to_faces_in_specific_folder = path_to_faces + specific_face_folder + "/"
        
        folder_exists = os.path.isdir(path_to_faces_in_specific_folder)
        
        if folder_exists == True:
            list_folders_with_faces_name.append(specific_face_folder)
            all_faces_in_specific_folder = os.listdir(path_to_faces_in_specific_folder)
            # print(specific_face_folder, all_faces_in_specific_folder)
            
            for faces_file_names_idx in range(len(all_faces_in_specific_folder)):
                path_to_specific_face = path_to_faces_in_specific_folder + all_faces_in_specific_folder[faces_file_names_idx]
                # print(path_to_specific_face)
                build_file_path.append(path_to_specific_face)

        else:
            pass
        
    return build_file_path

In [36]:
CREMA_D_PATH = BASE + CLASS_PATH + DATASET_PATH + 'git_lfs/CREMA-D/sample_VideoFlash2/all_faces/'
CREMA_D_PATH

'/Users/brinkley97/Documents/development/classes/csci_535_multimodal_probabilistic_learning/datasets/git_lfs/CREMA-D/sample_VideoFlash2/all_faces/'

In [37]:
path_crema_d = get_video_path_for_feature_extraction(CREMA_D_PATH)
# path_crema_d

In [39]:
crema_d_paths_to_image_df = pd.DataFrame(path_crema_d, columns=['CREMA-D Paths'])
crema_d_paths_to_image_df

Unnamed: 0,CREMA-D Paths
0,/Users/brinkley97/Documents/development/classe...
1,/Users/brinkley97/Documents/development/classe...
2,/Users/brinkley97/Documents/development/classe...
3,/Users/brinkley97/Documents/development/classe...
4,/Users/brinkley97/Documents/development/classe...
...,...
374,/Users/brinkley97/Documents/development/classe...
375,/Users/brinkley97/Documents/development/classe...
376,/Users/brinkley97/Documents/development/classe...
377,/Users/brinkley97/Documents/development/classe...


In [42]:
frames_in_crema_d = list(crema_d_paths_to_image_df['CREMA-D Paths'])

In [45]:
# frames_in_crema_d

In [46]:
extract_features(frames_in_crema_d)

tensor([[[ 0.1296,  0.1780, -0.1627,  ..., -0.4221,  0.1744, -0.0066],
         [-0.0081,  0.0847, -0.3951,  ..., -0.3195,  0.1200, -0.0123],
         [-0.0699,  0.1132, -0.4551,  ..., -0.3730,  0.2579,  0.0280],
         ...,
         [-0.0357,  0.0704, -0.1042,  ..., -0.1797,  0.3018,  0.0598],
         [-0.0190,  0.1394, -0.1630,  ..., -0.2395,  0.2587,  0.1013],
         [-0.1109,  0.1001, -0.1008,  ..., -0.3130,  0.2538,  0.0512]],

        [[ 0.1527,  0.1561, -0.1705,  ..., -0.3990,  0.1612, -0.0234],
         [ 0.1020,  0.0921, -0.3429,  ..., -0.2634,  0.1970,  0.0429],
         [-0.0990, -0.0488, -0.3809,  ..., -0.3617,  0.1873, -0.0080],
         ...,
         [-0.0459,  0.0547, -0.2063,  ..., -0.1403,  0.2510, -0.0671],
         [ 0.0177,  0.1767, -0.2055,  ..., -0.1880,  0.2718,  0.0913],
         [-0.0985,  0.0987, -0.1426,  ..., -0.2908,  0.2797, -0.0893]],

        [[ 0.1380,  0.1623, -0.1639,  ..., -0.4085,  0.1629, -0.0145],
         [ 0.0948,  0.1092, -0.3303,  ..., -0

In [90]:
face_folder_files = os.listdir(CREMA_D_PATH)
# print(face_folder_files)

number_files_in_folder = len(next(os.walk(CREMA_D_PATH))[1])
print(number_files_in_folder)

sub_set_1 = face_folder_files[0:4]
# print(len(sub_set_1), sub_set_1)

6


In [None]:
# folder_of_fe_crema_d_1, fe_crema_d_1 = get_video_path_for_feature_extraction(CREMA_D_PATH, sub_set_1)

In [None]:
# fe_crema_d_1

In [None]:
sub_set_2 = face_folder_files[4:number_files_in_folder]
# print(len(sub_set_2), sub_set_2)

In [None]:
folder_of_fe_crema_d_2, fe_crema_d_2 = get_video_path_for_feature_extraction(CREMA_D_PATH, sub_set_2)

In [None]:
fe_crema_d_2

In [None]:
# the folders of cropped faces to extract features from; #frames in single folder should match #features extracted
# size of extracted features (all should be 196, 768) 
# fe_crema_d[5].shape

In [None]:
def create_storage_for_file_and_extracted_features(folder_names, features_extracted):
    '''
    Parameters:
    folder_names -- py list (of file names, so 1 file per face)
    features_extracted -- py list (features from corresponding file)
    
    '''
    
    file_name_with_features_extracted_dict = {}

    for folder_name_idx in range(len(folder_names)):
        speficic_folder = folder_names[folder_name_idx]
        file_name_with_features_extracted_dict[speficic_folder] = features_extracted[folder_name_idx]

    return file_name_with_features_extracted_dict

In [None]:
file_with_fe_crema_d = create_storage_for_file_and_extracted_features(folder_of_fe_crema_d, fe_crema_d)
# file_with_fe_crema_d

In [None]:
def save_dictionary(dictionary_to_save, save_crema_d_dict_path):
    '''
    Parameters:
    dictionary_to_save -- py dic (of folder_names with corresponding features_extracted)
    
    '''
    save_location = save_crema_d_dict_path + 'crema_d_extracted_features.csv'
    # print(save_location)
    
    write_to_csv = csv.writer(open(save_location, "w"))

    # loop over dictionary keys and values
    for key, val in dictionary_to_save.items():

        # write every key and value to file
        write_to_csv.writerow([key, val])


In [None]:
save_crema_d_dict_path = BASE + CLASS_PATH + DATASET_PATH + 'git_lfs/CREMA-D/sample_VideoFlash/' 
# save_crema_d_dict_path

In [None]:
save_dictionary(file_with_fe_crema_d, save_crema_d_dict_path)

In [None]:
# DISREGAURD

In [None]:
def pool_images_per_emotion(faces_in_specific_folder_path, faces_file_names):
    '''
    Parameters:
    faces_in_specific_folder_path -- 
    faces_file_names -- 
    
    Return:
    '''
    
    array_of_images = []
    for faces_file_names_idx in range(len(faces_file_names)):
        path_to_specific_face = faces_in_specific_folder_path + faces_file_names[faces_file_names_idx]
        
        specific_frame = Image.open(path_to_specific_face)
        frame_as_array = np.asarray(specific_frame)
        # print(np.shape(frame_as_array))
        
        pooled_frames = np.mean(frame_as_array, axis=1)
        # print(np.shape(pooled_frames))
        
        array_to_image = Image.fromarray(pooled_frames, 'RGB')
        # print(type(array_to_image))
        array_of_images.append(array_to_image)
        
    return array_of_images