In [2]:
import pathlib
import os
import pandas as pd
import cv2
import numpy as np
from tqdm import tqdm

In [3]:
CK_images = pathlib.Path("C:/Users/cdr03/Documents/Thesis/dataset/CK+/extended-cohn-kanade-images/cohn-kanade-images")
CK_emotion_labels = pathlib.Path("C:/Users/cdr03/Documents/Thesis/dataset/CK+/Emotion_labels/Emotion")
CK_landmarks = pathlib.Path("C:/Users/cdr03/Documents/Thesis/dataset/CK+/Landmarks/Landmarks")

In [4]:
def list_files_recursive(path, format = ".txt"):
    txt_files = set()  # create an empty set to store unique txt file names
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(format):
                file_path = os.path.join(root, file)
                txt_files.add(file_path)  # add the file path to the set
        for dir in dirs:
            dir_path = os.path.join(root, dir)
            list_files_recursive(dir_path)  # recursively call the function on each subdirectory
    return list(txt_files)  # return a list of unique txt file names


Images

In [5]:
CK_images_list = list_files_recursive(CK_images, ".png")
CK_images_list

['C:\\Users\\cdr03\\Documents\\Thesis\\dataset\\CK+\\extended-cohn-kanade-images\\cohn-kanade-images\\S106\\005\\S106_005_00000029.png',
 'C:\\Users\\cdr03\\Documents\\Thesis\\dataset\\CK+\\extended-cohn-kanade-images\\cohn-kanade-images\\S112\\002\\S112_002_00000013.png',
 'C:\\Users\\cdr03\\Documents\\Thesis\\dataset\\CK+\\extended-cohn-kanade-images\\cohn-kanade-images\\S034\\003\\S034_003_00000016.png',
 'C:\\Users\\cdr03\\Documents\\Thesis\\dataset\\CK+\\extended-cohn-kanade-images\\cohn-kanade-images\\S022\\001\\S022_001_00000007.png',
 'C:\\Users\\cdr03\\Documents\\Thesis\\dataset\\CK+\\extended-cohn-kanade-images\\cohn-kanade-images\\S050\\002\\S050_002_00000002.png',
 'C:\\Users\\cdr03\\Documents\\Thesis\\dataset\\CK+\\extended-cohn-kanade-images\\cohn-kanade-images\\S058\\001\\S058_001_00000017.png',
 'C:\\Users\\cdr03\\Documents\\Thesis\\dataset\\CK+\\extended-cohn-kanade-images\\cohn-kanade-images\\S111\\002\\S111_002_00000014.png',
 'C:\\Users\\cdr03\\Documents\\Thesis\\da

In [6]:
def image_to_array(file_path):
    # Read image using imread() function from opencv
    img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
    # Convert image to numpy array using np.array() function
    img_array = np.array(img , dtype="float32")/255
    # Return the numpy array
    return img_array

In [7]:
def detect_and_crop_face(image_path, resize=None):
    # Load the input image
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Load the Haar Cascade classifier for face detection
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    # Detect faces in the image
    faces = face_cascade.detectMultiScale(image, scaleFactor=1.1, minNeighbors=5)

    # Crop the image to contain the detected face (if any)
    if len(faces) > 0:
        (x, y, w, h) = faces[0]
        face_image = image[y:y+h, x:x+w]
        if resize is not None:
            face_image = cv2.resize(face_image, resize)
            face_image = image_to_array(face_image)
            not_detected_faces = None
    else:
        face_image = None
        not_detected_faces = image
        print("face wasn't detected")
    return face_image, not_detected_faces

In [8]:
def image_to_array(img):
    array = np.array(img , dtype="float32")/255
    return array

In [37]:
size = (224,224)
def readImages_CK(file_list):
    df = pd.DataFrame(columns=['Name','Data'])
    df_face_undetected = pd.DataFrame(columns=['Name','Data'])
    for file_path in tqdm(file_list, desc="Processing Images"):
        data, non_faces = detect_and_crop_face(file_path, size)
        file_name = os.path.basename(file_path)
        clean_name = file_name.rsplit('.', 1)[0]
        temp_df = pd.DataFrame({'Name': [clean_name], 'Data':[data]})
        df = pd.concat([df, temp_df], ignore_index=True)
        temp_df_undetected = pd.DataFrame({'Name': [file_name], 'Data':[non_faces]})
        df_face_undetected = pd.concat([df_face_undetected, temp_df_undetected], ignore_index=True)        
    return df, df_face_undetected

In [39]:
CK_dataset, undetected = readImages_CK(CK_images_list)

Processing Images:   0%|          | 53/10708 [00:04<15:44, 11.29it/s]


KeyboardInterrupt: 

In [40]:
CK_dataset['Data'].iloc[1].shape

(224, 224)

In [41]:
cv2.imshow("test",CK_dataset['Data'].iloc[1])
cv2.waitKey(0)
cv2.destroyAllWindows()

In [42]:
CK_clean = CK_dataset['Name'].str.split("_", expand=True)
CK_clean['Data'] = CK_dataset['Data']
CK_clean.columns = ['Subject', 'Number', 'Code','Image']


In [43]:
CK_clean.to_pickle(f"./CK_Data_{size[0]}")

Emotions

In [44]:
def txt_to_dataframe(file_list):
    df = pd.DataFrame(columns=['Name', 'Value'])
    for file_path in file_list:
        with open(file_path, 'r') as f:
            content = f.read()
        file_name = os.path.basename(file_path)
        clean_name = '_'.join(file_name.split('_')[:3])
        temp_label = pd.DataFrame({'Name': [clean_name], 'Value': [int(float(content))]})
        df = pd.concat([df, temp_label], ignore_index=True)
    return df


In [45]:
CK_emotion_labels_set = list_files_recursive(CK_emotion_labels, ".txt")

In [46]:
CK_emotion_labels_df = txt_to_dataframe(CK_emotion_labels_set)

In [47]:
CK_emotion_labels_df

Unnamed: 0,Name,Value
0,S999_003_00000055,4
1,S064_004_00000014,6
2,S086_002_00000015,5
3,S505_006_00000019,6
4,S055_006_00000008,4
...,...,...
322,S065_003_00000022,7
323,S057_001_00000019,7
324,S064_003_00000025,5
325,S054_004_00000024,3


In [48]:
CK_clean_emotion_labels = CK_emotion_labels_df['Name'].str.split("_", expand=True)
CK_clean_emotion_labels.columns = ['Subject', 'Number', "Code"]
CK_clean_emotion_labels["Emotion"] = CK_emotion_labels_df['Value']

Create dataset just with the last frame

In [49]:
CK_emotion_Last_Frame = pd.merge(CK_clean,CK_clean_emotion_labels, how="inner", on=['Subject','Number','Code'])

In [50]:
CK_emotion_Last_Frame

Unnamed: 0,Subject,Number,Code,Image,Emotion
0,S117,006,00000010,"[[0.24705882, 0.24313726, 0.24705882, 0.247058...",1
1,S058,006,00000018,"[[0.12156863, 0.11764706, 0.11764706, 0.113725...",3
2,S075,006,00000025,"[[0.3764706, 0.38039216, 0.38039216, 0.3843137...",5
3,S129,012,00000011,"[[0.23137255, 0.23529412, 0.23529412, 0.227450...",5
4,S070,003,00000017,"[[0.12156863, 0.12941177, 0.12941177, 0.129411...",5
...,...,...,...,...,...
322,S116,001,00000014,"[[0.25490198, 0.26666668, 0.27450982, 0.274509...",7
323,S096,004,00000011,"[[0.38039216, 0.38039216, 0.3764706, 0.3843137...",5
324,S029,001,00000019,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",1
325,S059,001,00000018,"[[0.42352942, 0.42352942, 0.41960785, 0.419607...",7


In [51]:
CK_emotion_Last_Frame.to_pickle(f"./CK_Data_{size[0]}_Emotion_LF")

FACS

In [52]:
CK_FACS_labels =  pathlib.Path("C:/Users/cdr03/Documents/Thesis/dataset/CK+/FACS_labels/FACS")

In [53]:
def readFACSfile(file_list):
    df = pd.DataFrame(columns=['Name', 'Value'])
    for file_path in file_list:
        with open(file_path, 'r') as f:
            content = f.read()
        file_name = os.path.basename(file_path)
        clean_name = '_'.join(file_name.split('_')[:3])
        temp_label = pd.DataFrame({'Name': [clean_name], 'Value': content})
        df = pd.concat([df, temp_label], ignore_index=True)
    return df

In [54]:
def readFACSfile(file_list):
    df = pd.DataFrame(columns=['Name', 'AU', 'Intensity'])
    for file_path in file_list:
        action_units = []
        intensity = []
        with open(file_path, 'r') as f:
            for line in f.readlines():
                data = line.strip("\n")
                data = data.split("   ")
                action_units.append(int(float(data[1])))
                intensity.append(int(float(data[2])))
        file_name = os.path.basename(file_path)
        clean_name = '_'.join(file_name.split('_')[:3])
        temp_label = pd.DataFrame({'Name':[clean_name], 'AU':[action_units], "Intensity": [intensity]})
        df = pd.concat([df, temp_label], ignore_index=True)
    return df

In [55]:
CK_FACS_set = list_files_recursive(CK_FACS_labels, ".txt")
CK_FACS_df = readFACSfile(CK_FACS_set)

In [56]:
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(CK_FACS_df)

                  Name                                     AU  \
0    S136_001_00000019                   [1, 2, 4, 5, 25, 27]   
1    S133_002_00000020                         [4, 7, 15, 17]   
2    S080_008_00000009                            [9, 10, 24]   
3    S103_003_00000024                                   [12]   
4    S135_004_00000016                               [25, 27]   
5    S128_011_00000016                            [6, 12, 25]   
6    S138_008_00000009                           [14, 15, 24]   
7    S035_002_00000010                                [4, 17]   
8    S112_005_00000017                     [6, 7, 23, 24, 39]   
9    S088_002_00000017               [1, 4, 6, 7, 11, 20, 25]   
10   S118_005_00000026                            [4, 15, 17]   
11   S101_003_00000025                  [1, 4, 7, 17, 23, 20]   
12   S131_003_00000024                      [1, 4, 7, 15, 17]   
13   S111_002_00000015                            [6, 12, 25]   
14   S010_005_00000016   

In [57]:
CK_clean_FACS_labels = CK_FACS_df['Name'].str.split("_", expand=True)
CK_clean_FACS_labels.columns = ['Subject', 'Number', "Code"]
CK_clean_FACS_labels["AU"] = CK_FACS_df['AU']
CK_clean_FACS_labels["Intensity"] = CK_FACS_df['Intensity']

In [58]:
CK_FACS_Last_Frame = pd.merge(CK_clean,CK_clean_FACS_labels, how="inner", on=['Subject','Number','Code'])

In [59]:
CK_FACS_Last_Frame.shape

(589, 6)

In [61]:
test.columns

Index(['Subject', 'Number', 'Code', 'AU_x', 'Intensity_x', 'Image', 'AU_y',
       'Intensity_y'],
      dtype='object')

In [62]:
empty_values = np.where(pd.isnull(test['AU_y']))

In [63]:
test.iloc[empty_values]

Unnamed: 0,Subject,Number,Code,AU_x,Intensity_x,Image,AU_y,Intensity_y
27,S116,2,15,[12],[2],,,
263,S109,2,8,"[25, 26]","[0, 0]",,,
368,S117,2,20,"[1, 4, 11, 15, 17, 39]","[0, 0, 0, 0, 0, 0]",,,
457,S113,2,34,"[1, 4, 7, 11, 20, 25]","[0, 0, 2, 0, 1, 0]",,,


In [64]:
CK_clean_FACS_labels.iloc[empty_values]

Unnamed: 0,Subject,Number,Code,AU,Intensity
27,S116,2,15,[12],[2]
263,S109,2,8,"[25, 26]","[0, 0]"
368,S117,2,20,"[1, 4, 11, 15, 17, 39]","[0, 0, 0, 0, 0, 0]"
457,S113,2,34,"[1, 4, 7, 11, 20, 25]","[0, 0, 2, 0, 1, 0]"


In [None]:
CK_FACS_Last_Frame_Data = pd.merge(CK_clean_FACS_labels,CK_FACS_Last_Frame, how="inner", on=['Subject','Number','Code'])

In [65]:
CK_FACS_Last_Frame.to_pickle(f"./CK_Data_{size[0]}_FACS_LF")

## Create training, validation and test datasets

In [52]:
from sklearn.model_selection import train_test_split

def create_stratified_datasets(X, y, test_size=0.1, val_size=0.1, random_state=None):
    """
    Creates three stratified datasets - train, validation, and test - from the input data X and target y.
    
    Parameters:
        X (array-like): The input data.
        y (array-like): The target variable.
        test_size (float): The proportion of the data to include in the test set.
        val_size (float): The proportion of the remaining data to include in the validation set.
        random_state (int): Seed for the random number generator.
    
    Returns:
        A tuple containing the train, validation, and test datasets, each as a tuple of input and target variables.
    """
    
    # split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)
    
    # split remaining data into validation and train sets
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_size/(1-test_size), random_state=random_state, stratify=y_train)
    
    return (X_train, y_train), (X_val, y_val), (X_test, y_test)

Emotion

In [47]:
CK_emotion = pd.read_pickle("./CK_Data_96_Emotion_LF")

In [48]:
CK_emotion = CK_emotion[CK_emotion.Emotion !=2 ]

In [54]:
CK_emotion.Emotion = CK_emotion.Emotion.replace(7,2)

In [60]:
CK_emotion_target = CK_emotion['Emotion'].astype('float32').dtypes

In [61]:
CK_data_target = CK_emotion['Image'].astype('float32').dtypes

ValueError: setting an array element with a sequence.

In [56]:
CK_emotion_target = CK_emotion['Emotion']
CK_data_target = CK_emotion['Image']
(X_train, y_train), (X_val, y_val), (X_test, y_test) = create_stratified_datasets(CK_data_target,CK_emotion_target)

print(f'Distribution in training set: \n{y_train.value_counts().sort_index() / len(y_train)}\n\n'+
      f'Distribution in validation set: \n{y_val.value_counts().sort_index() / len(y_val)}\n\n'+
      f'Distribution in testing set: \n{y_test.value_counts().sort_index() / len(y_test)}')

Distribution in training set: 
1    0.145749
2    0.271255
3    0.190283
4    0.080972
5    0.222672
6    0.089069
Name: Emotion, dtype: float64

Distribution in validation set: 
1    0.129032
2    0.258065
3    0.193548
4    0.096774
5    0.225806
6    0.096774
Name: Emotion, dtype: float64

Distribution in testing set: 
1    0.161290
2    0.258065
3    0.193548
4    0.064516
5    0.225806
6    0.096774
Name: Emotion, dtype: float64


In [57]:
print(f'Distribution in training set: \n{len(y_train)}\n\n'+
      f'Distribution in validation set: \n{len(y_val)}\n\n'+
      f'Distribution in testing set: \n{len(y_test)}')

Distribution in training set: 
247

Distribution in validation set: 
31

Distribution in testing set: 
31


In [58]:
X_train.to_pickle('./Clean_datasets/CK+/CK+_Emotion_X_train')
y_train.to_pickle('./Clean_datasets/CK+/CK+_Emotion_Y_train')
X_val.to_pickle('./Clean_datasets/CK+/CK+_Emotion_X_val')
y_val.to_pickle('./Clean_datasets/CK+/CK+_Emotion_Y_val')
X_test.to_pickle('./Clean_datasets/CK+/CK+_Emotion_X_test')
y_test.to_pickle('./Clean_datasets/CK+/CK+_Emotion_Y_test')

FACS

In [22]:
CK_clean = pd.read_pickle("./CK_Data_224")

In [23]:
CK_FACS_Last_Frame =pd.read_pickle("./CK_Data_224_FACS_LF")

In [24]:
CK_emotion_Last_Frame = pd.merge(CK_clean,CK_FACS_Last_Frame, how="inner", on=['Subject','Number','Code'])

In [25]:
CK_FACS_Last_Frame.columns

Index(['Subject', 'Number', 'Code', 'Image', 'AU', 'Intensity'], dtype='object')

In [26]:
CK_FACS_Last_Frame['Image'].isnull().values.any()

False

In [27]:
# Remove extra Action Units

In [29]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()

# fit the binarizer on the 'AU' column
mlb.fit(CK_FACS_Last_Frame['AU'])
# transform the 'AU' column into a one-hot encoded matrix
one_hot_encoded_au = mlb.transform(CK_FACS_Last_Frame['AU'])
# convert the matrix into a Pandas dataframe
one_hot_encoded_au_df = pd.DataFrame(one_hot_encoded_au, columns=mlb.classes_)
one_hot_encoded_au_df['Image'] = CK_FACS_Last_Frame['Image']

In [30]:
one_hot_encoded_au_df

Unnamed: 0,1,2,4,5,6,7,9,10,11,12,...,39,43,44,45,54,61,62,63,64,Image
0,0,0,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,"[[0.24705882, 0.24313726, 0.24705882, 0.247058..."
1,0,0,1,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,"[[1.0, 1.0, 1.0, 0.99607843, 1.0, 0.99607843, ..."
2,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,"[[0.12156863, 0.11764706, 0.11764706, 0.113725..."
3,0,0,0,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,"[[0.3764706, 0.38039216, 0.38039216, 0.3843137..."
4,1,1,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"[[0.07058824, 0.06666667, 0.06666667, 0.070588..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
584,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"[[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.99..."
585,0,0,0,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,"[[0.2, 0.2, 0.19607843, 0.19607843, 0.19215687..."
586,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,"[[0.2901961, 0.2901961, 0.28627452, 0.28627452..."
587,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"[[0.16078432, 0.14509805, 0.13333334, 0.109803..."


In [31]:
AU_columns = [1, 2, 4, 5, 6, 9, 10, 12, 15, 17, 18, 20, 24, 25, 26, 28, 'Image']

In [32]:
pandas_df = one_hot_encoded_au_df[one_hot_encoded_au_df.columns.intersection(AU_columns)]

In [35]:
CK_facs_X = pandas_df['Image']

In [36]:
Au_labels_df = pandas_df.drop(columns = ['Image'])

In [37]:
all_zeros = Au_labels_df.eq(0).all(axis=1)
print(all_zeros.loc[all_zeros==True])
# display rows with only zeros
empty_rows = all_zeros.loc[all_zeros==True].index
print(empty_rows)

14     True
79     True
153    True
215    True
293    True
349    True
415    True
484    True
dtype: bool
Int64Index([14, 79, 153, 215, 293, 349, 415, 484], dtype='int64')


In [38]:
CK_facs_X_clean = CK_facs_X.drop(empty_rows)
Au_labels_df_clean  = Au_labels_df.drop(empty_rows)

In [40]:
all_zeros = Au_labels_df_clean.eq(0).all(axis=1)
print(all_zeros.loc[all_zeros==True])

Series([], dtype: bool)


In [41]:
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit
import numpy as np
def multilable_split(X, y, size=0.1, random_state=12):
   msss = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=size, random_state=random_state)
   for train_index, test_index in msss.split(X, y):
      X_train, X_test = X.iloc[train_index], X.iloc[test_index]
      y_train, y_test = y.iloc[train_index], y.iloc[test_index]
      return X_train, X_test, y_train, y_test

In [42]:
def create_balanced_datasets_multilabel(X, y, test_size=0.10, val_size=0.10):
    """
    Creates three stratified datasets - train, validation, and test - from the input data X and target y.
    
    Parameters:
        X (array-like): The input data.
        y (array-like): The target variable.
        test_size (float): The proportion of the data to include in the test set.
        val_size (float): The proportion of the remaining data to include in the validation set.
        random_state (int): Seed for the random number generator.
    
    Returns:
        A tuple containing the train, validation, and test datasets, each as a tuple of input and target variables.
    """
    # split data into train and test sets
    X_train, X_test, y_train, y_test = multilable_split(X, y, size = test_size)
    
    # split remaining data into validation and train sets
    X_train, X_val, y_train, y_val = multilable_split(X_train, y_train, size=val_size/(1-test_size))
    
    return (X_train, y_train), (X_val, y_val), (X_test, y_test)

In [44]:
(X_train, y_train), (X_val, y_val), (X_test, y_test) = create_balanced_datasets_multilabel(CK_facs_X_clean,Au_labels_df_clean)

In [46]:
X_train.to_pickle('./Clean_datasets/CK+/CK+_FACS_X_train')
y_train.to_pickle('./Clean_datasets/CK+/CK+_FACS_Y_train')
X_val.to_pickle('./Clean_datasets/CK+/CK+_FACS_X_val')
y_val.to_pickle('./Clean_datasets/CK+/CK+_FACS_Y_val')
X_test.to_pickle('./Clean_datasets/CK+/CK+_FACS_X_test')
y_test.to_pickle('./Clean_datasets/CK+/CK+_FACS_Y_test')

In [27]:
X_train

0      [[0.24705882, 0.24313726, 0.24705882, 0.247058...
1      [[1.0, 1.0, 1.0, 0.99607843, 1.0, 0.99607843, ...
2      [[0.12156863, 0.11764706, 0.11764706, 0.113725...
3      [[0.3764706, 0.38039216, 0.38039216, 0.3843137...
4      [[0.07058824, 0.06666667, 0.06666667, 0.070588...
                             ...                        
584    [[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.99...
585    [[0.2, 0.2, 0.19607843, 0.19607843, 0.19215687...
586    [[0.2901961, 0.2901961, 0.28627452, 0.28627452...
587    [[0.16078432, 0.14509805, 0.13333334, 0.109803...
588    [[0.023529412, 0.007843138, 0.015686275, 0.019...
Name: Image, Length: 473, dtype: object

In [45]:
print(f'Distribution in training set ({len(y_train)}): \n{y_train.value_counts().sort_index() / len(y_train)}\n\n'+
      f'Distribution in validation set ({len(y_val)}): \n{y_val.value_counts().sort_index() / len(y_val)}\n\n'+
      f'Distribution in testing set ({len(y_test)}): \n{y_test.value_counts().sort_index() / len(y_test)}')

Distribution in training set (468): 
1  2  4  5  6  9  10  12  15  17  18  20  24  25  26  28
0  0  0  0  0  0  0   0   0   0   0   0   0   0   1   0     0.004274
                                              1   0   0     0.089744
                                                  1   0     0.042735
                                      1   0   1   0   0     0.014957
                              1   0   0   0   0   0   0     0.002137
                                                              ...   
1  1  1  1  0  0  0   0   0   0   0   0   0   1   0   0     0.004274
                                      1   0   1   0   0     0.002137
                              1   0   1   0   1   0   0     0.002137
                                                  1   0     0.002137
                      1   0   0   0   1   0   1   0   0     0.002137
Length: 122, dtype: float64

Distribution in validation set (54): 
1  2  4  5  6  9  10  12  15  17  18  20  24  25  26  28
0  0  0  0  0  0  0   0

In [6]:
y_train = pd.read_pickle('./Clean_datasets/CK+/CK+_FACS_y_train')

In [10]:
CK_au = list(y_train.columns)

In [8]:
MMI_y_train = pd.read_pickle('./Clean_datasets/MMI/MMI_FACS_y_train')

In [11]:
MMI_au = list(MMI_y_train.columns)

In [17]:
MMI_au

['1',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '2',
 '20',
 '21',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '28',
 '28B',
 '28T',
 '29',
 '30',
 '30L',
 '30R',
 '31',
 '32',
 '32B',
 '32T',
 '33',
 '34',
 '35',
 '36',
 '36B',
 '36L',
 '36R',
 '36T',
 '37',
 '38',
 '39',
 '4',
 '41',
 '42',
 '43',
 '44',
 '45',
 '46',
 '46L',
 '46R',
 '5',
 '6',
 '61',
 '62',
 '7',
 '8',
 '9']

In [18]:
MMI_au_numbers = []
for value in MMI_au:
    if value.isdigit():
        MMI_au_numbers.append(int(value))

In [19]:
MMI_au_numbers

[1,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 2,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 4,
 41,
 42,
 43,
 44,
 45,
 46,
 5,
 6,
 61,
 62,
 7,
 8,
 9]

In [14]:
Emotionet_y_train = pd.read_pickle('./Clean_datasets/EmotioNet/EmotioNet_FACS_y_train')

In [15]:
Emotionet_au = list(Emotionet_y_train.columns)

In [16]:
Emotionet_au

[1,
 2,
 4,
 5,
 6,
 9,
 10,
 12,
 15,
 17,
 18,
 20,
 24,
 25,
 26,
 28,
 51,
 52,
 53,
 54,
 55,
 56]

In [20]:
common_aus = set(Emotionet_au) & set(MMI_au_numbers) & set(CK_au)

In [21]:
common_aus

{1, 2, 4, 5, 6, 9, 10, 12, 15, 17, 18, 20, 24, 25, 26, 28}