Notebook that
- extract features from a training split for image transformation.
- can transform images from one emotion to the other by:
  - finding the emotions that are relevant to the emotion. Therefore, we use the ground truth probabilities and correlate them to the feature values. The p-values are then used to determine if the feature is relevant.
  - then the mean features for each emotion are calculated and used as the target values for the transformation
  - each test image is transferred to each emotion that is not the most probable emotion (in total 7 emotions so each image is transferred to 6 target emotions)
  - the transformation itself is done by first picking the features that are relevant to target and original emotion (using p-values) then the image is altered such that the features that are relevant match the mean features of the target emotion.

After the transformation is done we classify both the input and the altered image with Anmol's classification model. We get a label for the most likely emotion
Performance is evaluated by: 
  - precision based on the label
  - if the amount of images classified as the target emotion has increased
  - how many images were classified as the target emotion before vs after the transformation for each target emotion

We evaluate different training/test split:
- use the images that were used for training of the model for feature extraction and the validation set for testing
- the same but using only images that have major prob < 0.5 for testing

In [None]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)

In [3]:
import numpy as np
import cv2
from os import listdir
from os.path import join as p_join
from os.path import abspath
from pathlib import Path 
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.backends.backend_pdf
from skimage.feature import greycomatrix, greycoprops
import glob


#%cd /content/drive/MyDrive/CP/Computational Photography
from preprocess_emotion6 import get_split_from_folder, dataframe_from_model, threshold_images
from feature_extraction import calculateGLCMFeatures, calculateHSVFeatures, calculateLaplacian, rgb_values, rms_contrast, calculateFeatures
from feature_transform import change_laplacian, change_bgr, change_hsv
PROJDIR = abspath('')
PROJDIR

'C:\\Users\\Theresa\\Documents\\CS_Master\\EPFL\\CP\\Computational_Photography_Project'

In [4]:
# put the path to the folder here 
# if this is a shared folder: right click on the folder in "Shared with me" directory, and then click "Add shortcut to Drive". 
# then you can access the data from your drive!
#PROJDIR = '/content/drive/MyDrive/CP/Computational Photography'
print(PROJDIR)
DATA = p_join(PROJDIR, 'data') # here could be the data

C:\Users\Theresa\Documents\CS_Master\EPFL\CP\Computational_Photography_Project


In [6]:
# emotion6 dataset
emotions_e6 = ['anger', 'disgust', "fear", "joy", "sadness", "surprise"]
emotions=['anger','disgust','fear','joy','sadness','surprise','neutral']

# folder that contains subfolder for each emotion
EM = p_join(DATA, 'Emotion6')
EM_all = p_join(EM, 'images')


install the model

In [None]:
%ls
%cd /content/drive/MyDrive/CP/Computational Photography
%cd artemis
!pip install -e .

In [None]:
from google.colab.patches import cv2_imshow
from artemis.neural_models.resnet_encoder import ResnetEncoder
from artemis.neural_models.image_emotion_clf import ImageEmotionClassifier
from artemis.neural_models.mlp import MLP
from PIL import Image
import torch
import random
import warnings
import numpy as np
import pandas as pd
import os.path as osp
import multiprocessing as mp
import torchvision.transforms as transforms
from artemis.in_out.neural_net_oriented import torch_load_model, torch_save_model, save_state_dicts
import os.path as osp

save_dir = './model_temp'  # for trained model
checkpoint_file = osp.join(save_dir, 'our_model_em6.pt')

model = torch_load_model(checkpoint_file)    

read ground truth for images

1. dataframe that tells which images have been used for the training and validation of the classification model

In [7]:
emotion_distr_anmol = pd.read_csv(p_join(PROJDIR, "emotion6_filtered_split.csv"), sep=',')
emotion_distr_anmol.columns = ["Unnamed", "folder", "image", "emotion_distribution", "split"]

2. the ground truth data frame given with the emotion6 dataframe

In [9]:
emotion_distr = pd.read_csv(p_join(PROJDIR, "em6_groundtruth.csv"), sep=';')
emotion_distr.columns = ['folder', 'image', 'valence', 'arousal', 'anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral']
#change image number to integer
data_types_dict = {'image': np.int64}
# ground truth probability distributions + valence & arousal
emotion_distr = emotion_distr.astype(data_types_dict)
emotion_distr[(emotion_distr.folder=="fear") & (emotion_distr.image==104)]
# images
emotion_distr

Unnamed: 0,folder,image,valence,arousal,anger,disgust,fear,joy,sadness,surprise,neutral
0,disgust,1,2.5,3.8,0.13,0.700000,0.00,0.07,0.10,0.00,0.00
1,surprise,1,6.3,5.7,0.00,0.066667,0.17,0.23,0.13,0.23,0.17
2,fear,1,2.8,4.8,0.10,0.100000,0.53,0.00,0.13,0.00,0.13
3,joy,1,8.0,4.5,0.00,0.000000,0.00,0.67,0.00,0.27,0.07
4,sadness,1,2.7,5.2,0.00,0.000000,0.10,0.00,0.90,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...
1975,joy,329,7.7,5.2,0.00,0.000000,0.00,0.62,0.00,0.22,0.16
1976,sadness,329,4.2,4.6,0.08,0.072222,0.32,0.07,0.31,0.02,0.13
1977,disgust,330,4.1,5.0,0.02,0.355556,0.17,0.00,0.10,0.02,0.33
1978,joy,330,7.3,5.2,0.00,0.000000,0.00,0.72,0.00,0.12,0.16


function for extracting the features of images that are contained in the groundtruth dataframe and located in a given path

In [10]:
from gc import get_threshold
def extractFeaturesEmotion6(path, emotions: list, groundtruth: pd.DataFrame):
    """
    path: path to training image data
    emotions: list of emotions/folders
    probabilities: data frame with folder, image id and ground truth distribution for image
    """
    df = pd.DataFrame()
    # for each folder 
    folders = listdir(path)[:-1]
    # for each folder:
    for i, f in enumerate(folders):
        print(f)
        try:
            l = listdir(p_join(path, f))
        except:
            print(f, "not a folder")
            continue
        # for each image:
        for file in l:
            # image id (name of image in front of .jpg)
            image_id = file.split(".")[0]
            im = cv2.imread(p_join(p_join(path, f), file))
            # retrieve ground truth distribution for current image

            gt = groundtruth[(groundtruth['folder']==f) & (groundtruth['image']==int(image_id))]
            if gt.empty:
              continue
            # calculate features and append to data frame
            probabilities = gt.values[0,4:] # ground truth probabilities
            
            # compute most likely emotion
            label = emotions[np.argmax(probabilities)]
            
            row = {}
            row['folder'] = f
            row['image'] = file #image name
            row['Emotion'] = label #most likely emotion
            row['valence'] = gt.values[0,2]
            row['arousal'] = gt.values[0,3]
            for i, prob in enumerate(probabilities):
                row['prob_'+emotions[i]] = prob # probability for each emotion
            # calculate features (im in BGR colors)
            row = calculateFeatures(row, im)
            # add image results to dataframe
            df = df.append(row, ignore_index=True)
    return df

example

In [None]:
# extract the ground truth for the training split images in anmols model
train_em6 = dataframe_from_model(emotion_distr, emotion_distr_anmol, split_class="train")
# extract the features
df_em6_model = extractFeaturesEmotion6(EM_all, emotions_e6+['neutral'], train_em6)
df_em6_model.to_csv(p_join(PROJDIR, "emotion6_features_trainingsplitanmolsmodel.csv"))

In [None]:
# use all images with a major probability larger than 0.5
major_prob_em6 = threshold_images(emotion_distr, threshold = 0.5, greater_than = True)
df_em6 = extractFeaturesEmotion6(EM_all, emotions_e6+['neutral'], major_prob_em6)
df_em6.to_csv(p_join(PROJDIR, "emotion6_features_majoremotion.csv"))

read already calculated features to avoid having to repeat the previous steps every time.

In [None]:
df_major = pd.read_csv(p_join(PROJDIR, "emotion6_features_majoremotion.csv"), sep=',')
df_em6_model = pd.read_csv(p_join(PROJDIR, "emotion6_features_trainingsplitanmolsmodel.csv"), sep=",")

### step by step explanation how the feature transformation is done
1. extract only the features that can be used for transformations

In [12]:
from scipy.stats import kendalltau, pearsonr, spearmanr

def kendall_pval(x,y):
  return kendalltau(x,y)[1]
  
def pearsonr_pval(x,y):
  return pearsonr(x,y)[1]

def spearmanr_pval(x,y):
  return spearmanr(x,y)[1]

assumption: if p value < 0.05 correlation is significant

In [None]:
def get_p_val(feature_df, columns=["hue", "saturation", "brightness", "Laplacian", "blue", "green", "red"]):
    """
    get p-values from "feature_df" for the features given in "columns"
    """
    # if from file
    p_val = feature_df.corr(spearmanr_pval)[feature_df.corr().columns[3:10]].T[feature_df.corr().columns[10:]] 
    #if directly
    # feature_df.corr(spearmanr_pval)[feature_df.corr().columns[2:9]].T[feature_df.corr().columns[9:]]
    p_val_short = p_val[columns] # hue saturation brightness laplace contrast2 blue green red
    return p_val_short

p_val_short = get_p_val(df_major)
out_pdf = p_join(PROJDIR,'emotion6_pval.pdf')
pdf = matplotlib.backends.backend_pdf.PdfPages(out_pdf)
plt.figure(figsize=(9,7))
heat_map = sns.heatmap( p_val_short, linewidth = 1 , annot = True, cmap="Blues", vmin=0.0, vmax=0.05)
plt.suptitle("p-values emotion6")
pdf.savefig()
pdf.close()

2. calculate the means of each feature-emotion pair to have a reference to which values we need to change for each emotion

In [None]:
def get_means_em6(feature_df, emotions_e6, columns=["hue", "saturation", "brightness", "Laplacian", "blue", "green", "red"]):
    # choose only features that we can use for transformation (all except most GLCM features):
    means = feature_df.groupby(['Emotion']).mean()[columns]
    #means.columns = ["hue",	"saturation",	"brightness",	"Laplacian",	"contrast2",	"blue",	"green",	"red"]
    new_means = means.copy(True)
    neutral = means.iloc[4] 
    sadness = means.iloc[5] 
    surprise = means.iloc[6]
    new_means.iloc[4] = sadness
    new_means.iloc[5] = surprise
    new_means.iloc[6] = neutral 
    s = pd.Series(emotions_e6+["neutral"])
    new_means = new_means.set_index([s])
    return new_means

df_em6_means = get_means_em6(df_major, emotions_e6)
df_em6_means

3. create mapping dictionary that tells us which features we need to change for each pair of initial and target emotion.

function that creates a dictionary that tells us which values we need to change to what degree when we want to transform from one emotion to the other. if the value is 0 it means that we do $\textbf{not}$ change the feature:

In [None]:
def create_mapping_dictionary(p_val, means, emotions, use_pval=2):
    """
    returns dataframe that tells which features to change with which value for each pair of original and target emotion
    @param p_val p-values that were calculated using the training data of this data set
    @param means mean feature values by emotion. Used to determine the amount of change for a mapping of one emotion to the other
    @param use_pval in {0,1,2} 
    0=use all features and ignore p-values
    1=use features if p-val of target emotion < 0.05
    2=use features only if p-val of initial and target emotion < 0.05
    """
    features = p_val.columns
    df = pd.DataFrame()
    for original in emotions:
        for target in emotions:
            if original == target:
                continue
            row = {}
            row['original'] = original
            row['target'] = target
            # mask that tells whether each feature is relevant for both emotions
            relevant_p_val_original = (p_val<0.05).loc[['prob_'+original]].values
            relevant_p_val_target = (p_val<0.05).loc[['prob_'+target]].values
            overall_p_val = relevant_p_val_target & relevant_p_val_original
            # mean values all features
            if use_pval == 0:
                mean_features_target = (means.loc[[target]].values)[0]
            # mean values of the RELEVANT features of TARGET emotion
            elif use_pval == 1:
                mean_features_target = (means.loc[[target]].values * relevant_p_val_target)[0]
            # mean values of the RELEVANT features of TARGET and ORIGINAL emotion
            else:
                mean_features_target = (means.loc[[target]].values * overall_p_val)[0]
            for i, f in enumerate(features):
                row[f] = mean_features_target[i]
            df = df.append(row, ignore_index=True)
    return df
mapping = create_mapping_dictionary(p_val_short, df_em6_means, emotions_e6+['neutral'], 2)
# example: what features do we need to change to which values to go from joy to sadness?
mapping[(mapping['original']=='joy') & (mapping['target']=='sadness')]

## Feature transformation

----
first. examples for the transformation functions for RGB, HSV and laplacian.
The functions are found in feature_transform.py

1. RGB values

In [None]:
# example change bgr
image_path = p_join(p_join(EM_all, 'fear'), '108.jpg')

# read file and store in temporary file that can be manipulated
im = cv2.imread(image_path)
folder = image_path.rsplit('/', 2)[0]
temp_file = p_join(folder, 'temp.jpg')
cv2.imwrite(temp_file, im)

#see what it looks like originally 
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
plt.imshow(im)
plt.show()
print(f"before: mean blue={im[:,:,0].mean():.2f}, mean green={im[:,:,1].mean():.2f}, mean red={im[:,:,2].mean():.2f}")

# change to the given new means and look at the result
target = [20,200,100]
print(f"goal: mean blue={target[0]}, mean green={target[1]}, mean red={target[2]}")
im = change_bgr(temp_file, target) # bgr
print(f"after: mean blue={im[:,:,0].mean():.2f}, mean green={im[:,:,1].mean():.2f}, mean red={im[:,:,2].mean():.2f}")
im = cv2.imread(temp_file, cv2.IMREAD_COLOR)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
plt.imshow(im)

2. HSV values

In [None]:
# example hsv
image_path = p_join(p_join(EM_all, 'fear'), '108.jpg')

# read file and store in temporary file that can be manipulated
im = cv2.imread(image_path)
folder = image_path.rsplit('/', 2)[0]
temp_file = p_join(folder, 'temp.jpg')
cv2.imwrite(temp_file, im)

#see what it looks like originally 
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
plt.imshow(im)
plt.show()
im = cv2.cvtColor(im, cv2.COLOR_RGB2HSV)
print(f"before: mean hue={im[:,:,0].mean():.2f}, mean saturation={im[:,:,1].mean():.2f}, mean value={im[:,:,2].mean():.2f}", )

# change to the given new means and look at the result
target = [50,100,120]
im = change_hsv(temp_file, target, change_hue=True)
print(f"goal: mean hue={target[0]}, mean saturation={target[1]}, mean brightness={target[2]}")
im = cv2.imread(temp_file)
im = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
print(f"after: mean hue={im[:,:,0].mean():.2f}, mean saturation={im[:,:,1].mean():.2f}, mean value={im[:,:,2].mean():.2f}")
im = cv2.cvtColor(im, cv2.COLOR_HSV2RGB)
plt.imshow(im)

3. Sharpness

In [None]:
# change laplacian
dir = p_join(EM_all, 'fear')
count = 0
# for the first 10 images
for img in listdir(dir):
  if count == 3:
    break
  count += 1
  image_path = p_join(dir, img)
  im = cv2.imread(image_path)
  folder = image_path.rsplit('/', 2)[0]
  temp_file = p_join(folder, 'temp.jpg')
  cv2.imwrite(temp_file, im)

  change_laplacian(temp_file, np.random.randint(0,1000), plot=True)

function to classify image with classification model

In [None]:
def classify(name, model, emotions):
    img=cv2.imread(name)
  
    img = Image.open(name)
    if img.mode is not 'RGB':
        img = img.convert('RGB')
    img_transform=image_transformation(256)['train']
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    img_inp = torch.unsqueeze(img_transform(img).to(device),0)

    out=model(img_inp)
    max_pred = np.argmax(out.detach().cpu().numpy(), 1)    
    probabilities = out.cpu().detach().numpy()[0]
    return emotions[int(max_pred)], probabilities

In [None]:
image_net_mean = [0.485, 0.456, 0.406]
image_net_std = [0.229, 0.224, 0.225]

def image_transformation(img_dim, lanczos=True):
    """simple transformation/pre-processing of image data."""

    if lanczos:
        resample_method = Image.LANCZOS
    else:
        resample_method = Image.BILINEAR

    normalize = transforms.Normalize(mean=image_net_mean, std=image_net_std)
    img_transforms = dict()
    img_transforms['train'] = transforms.Compose([transforms.Resize((img_dim, img_dim), resample_method),
                                                  transforms.ToTensor(),
                                                  normalize])

    # Use same transformations as in train (since no data-augmentation is applied in train)
    img_transforms['test'] = img_transforms['train']
    img_transforms['val'] = img_transforms['train']
    img_transforms['rest'] = img_transforms['train']
    return img_transforms

function that transforms features to target values

In [None]:
def transform_emotion(target_values, image_name, temp_img):
    # read image and store in temporary file in which we store the changed temporary file after each manipulation step
    # use a temporary file because some functions use cv2 and others PIL, also we do not want to overwrite the original file
    # returns manipulated image
    im = cv2.imread(image_name)
    cv2.imwrite(temp_img, im)

    # change feature values to mean values of target emotion, result is written to temp_img
    change_bgr(temp_img, target_values[['blue', 'green', 'red']].values[0])
    if np.all(target_values[['blue', 'green', 'red']].values[0]==0):
        change_hsv(temp_img, target_values[['hue', 'saturation', 'brightness']].values[0], change_hue=True)
    else:
        change_hsv(temp_img, target_values[['hue', 'saturation', 'brightness']].values[0], change_hue=False)
  
    x = change_laplacian(temp_img, target_values['Laplacian'].values)
    return

In [None]:
def emotion_transform_e6(path, groundtruth, emotions, model, mapping):
    """
    path = path to folder in which there is a folder for each emotion6 class except neutral
    groundtruth = dataframe with the same structure as the emotion6 groundtruth dataframe, contains all images that should be used for the transformation
    emotions = list of emotions 
    model = model for classification
    mapping = df with entry for each pair of gt-emotion and target emotion that contains which features should be transformed to which value
    """
    df = pd.DataFrame()
    folders = listdir(path)[:-1]
    # for each folder (folders are named with emotions which are not necessarily the most probable emotion for the images in the folder):
    for f in folders:
        emotion_folder_path = p_join(path, f)
        try:
            l = listdir(emotion_folder_path)
        except:
            print(emotion_folder_path, "is not a folder")
        print(emotion_folder_path)
        # for each image:
        for file in l:
            image_id = file.split(".")[0]
            image_name = p_join(emotion_folder_path, file)
            im = cv2.imread(image_name)
            #print(f"before transformation:\nhue={row['hue']:.2f}, brightness={row['brightness']:.2f}, saturation={row['saturation']:.2f}, red={row['red']:.2f}, blue={row['blue']:.2f}, green={row['green']:.2f}, contrast2={row['contrast2']:.2f}, laplacian={row['Laplacian']:.2f}")

            # retrieve ground truth distribution for current image
            gt = groundtruth[(groundtruth['folder']==f) & (groundtruth['image']==int(image_id))]
            if gt.empty:
                continue
            probabilities = gt.values[0,4:] # ground truth probabilities
          
            # compute most likely emotion
            label = emotions[np.argmax(probabilities)]
            # predict emotion before transformation with model
            predicted_in, _ = classify(image_name, model, emotions)

            # all possible target emotions
            for target in emotions:
                if target==label:
                    continue
                target_values = mapping[(mapping['original']==label) & (mapping['target']==target)]
                temp_img = p_join(path, "temp.jpg")
                #print(f"goal {label} to {target}:\nhue={target_values['hue'].values[0]:.2f}, brightness={target_values['brightness'].values[0]:.2f}, saturation={target_values['saturation'].values[0]:.2f}, red={target_values['red'].values[0]:.2f}, blue={target_values['blue'].values[0]:.2f}, green={target_values['green'].values[0]:.2f}, contrast2={target_values['contrast2'].values[0]:.2f}, laplacian={target_values['Laplacian'].values[0]:.2f}")
                transform_emotion(target_values, image_name, temp_img)

                row = {}
                row['folder'] = f
                row['image'] = file #image name
                row['Emotion'] = label #most likely emotion
                row['target'] = target #target emotion
              
                # predict emotion after transformation
                predicted_out, probabilities_out = classify(temp_img, model, emotions)
                #print("before:", label, "wanted:", target, "after:", predicted_out)
                row['predicted_in'] = predicted_in
                row['predicted_out'] = predicted_out
                # add image results to dataframe
                df = df.append(row, ignore_index=True)
    return df

example for one image:
transformation + classification

In [None]:
em = "anger"
dir = p_join(EM_all, em)
groundtruth = emotion_distr
emotions = emotions_e6+['neutral']
for target in emotions:
    count = 0
    for img in listdir(dir)[8:]:
        if count == 1:
            break
        count += 1

        img_name = p_join(dir, img)
        image_id = img.split(".")[0]
        # retrieve ground truth distribution for current image
        gt = groundtruth[(groundtruth['folder']==em) & (groundtruth['image']==int(image_id))]
        print(image_id, dir)
        probabilities = gt.values[0,4:] # ground truth probabilities
        # compute most likely emotion
        label = emotions[np.argmax(probabilities)]
        if target==label: continue

        im = cv2.imread(img_name)
        row = {}
        row = calculateFeatures(row, im)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        plt.imshow(im)
        plt.show()
        print(label, 'to', target)
        print(f"before transformation:\nhue={row['hue']:.2f}, brightness={row['brightness']:.2f}, saturation={row['saturation']:.2f}, \nred={row['red']:.2f}, blue={row['blue']:.2f}, green={row['green']:.2f},\nlaplacian={row['Laplacian']:.2f}")

        target_values = mapping[(mapping['original']==label) & (mapping['target']==target)]
        print(f"goal:\nhue={target_values['hue'].values[0]:.2f}, brightness={target_values['brightness'].values[0]:.2f}, saturation={target_values['saturation'].values[0]:.2f}, \nred={target_values['red'].values[0]:.2f}, blue={target_values['blue'].values[0]:.2f}, green={target_values['green'].values[0]:.2f},\nlaplacian={target_values['Laplacian'].values[0]:.2f}")

        temp_img = p_join(EM_all, "test_"+target+".jpg")
        transform_emotion(target_values, img_name, temp_img) #transform_emotion(target_values, emotion_folder_path, file, temp_path)
        print("assigned=", classify(temp_img, model, emotions)[0])
        row = {}
        im = cv2.imread(temp_img)
        row = calculateFeatures(row, im)
        print(f"after transformation:\nhue={row['hue']:.2f}, brightness={row['brightness']:.2f}, saturation={row['saturation']:.2f}, \nred={row['red']:.2f}, blue={row['blue']:.2f}, green={row['green']:.2f},\nlaplacian={row['Laplacian']:.2f}")
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        plt.imshow(im)
        plt.show()


### Evaluating the transformations

In [None]:
def compute_all(path_to_data, emotions, model, feature_df, test_df, use_pvals=2):
    """
    function that takes images in ground truth data frame test_df from "path_to_data", transforms them according to the features in features_df by
    calculating p-values and mean feature values per emotion.
    path_to_data = path to folder in which there is a folder for each emotion
    emotions = list of emotions in the dataset
    model = model for classification of emotion
    feature_df = data frame that contains the features extracted from the training split
    test_df = the lines in the ground truth data frame that contain the images we want to transform
    use_pvals = True/False whether to use only the features for which the p-value that reflect the correlation between emotion prob. and feature value is smaller than 0.05
    """
    # calculate p-values to get relevant features
    p_val = feature_df.corr(spearmanr_pval)[feature_df.corr().columns[3:10]].T[feature_df.corr().columns[10:]]

    # get for each emotion the p-values of the features we want to transform
    p_val_short = get_p_val(feature_df)

    # get the mean feature values for each emotion
    df_em6_means = get_means_em6(feature_df, emotions_e6)

    # combine p-vals and means to get only the means of the relevant features 
    mapping = create_mapping_dictionary(p_val_short, df_em6_means, emotions, use_pvals)

    print("transform images")
    # transform and classify test images
    df=emotion_transform_e6(path_to_data, test_df, emotions, model, mapping)
    return df


## Evaluation
#### first dataset
images with highest prob > 0.5 for training (feature extraction), all others for testing (feature transformation)

1. only with p-val < 0.05

In [None]:
emotions = emotions_e6+['neutral']

# load already calculated features for training images
df_major = pd.read_csv(p_join(PROJDIR, "emotion6_features_majoremotion.csv"), sep=',')

# get subset of ground truth dataframe for all images with hightest probability smaller than 0.5
minor_prob_em6 = threshold_images(emotion_distr, threshold = 0.5, greater_than = False)
print(len(df_major), "images for feature extraction (train)")
print(len(minor_prob_em6), "images for transformation (test)")

df_strongtrain_weaktest = compute_all(EM_all, emotions_e6+['neutral'], model, df_major, minor_prob_em6, 2)
df_strongtrain_weaktest.to_csv(p_join(PROJDIR, 'df_strongtrain_weaktest_pvaluefilter.csv'))

In [None]:
df_strongtrain_weaktest = pd.read_csv(p_join(PROJDIR, 'df_strongtrain_weaktest_pvaluefilter.csv'), sep=",")
print(len(df_major), "images for feature extraction (train)")
print(len(minor_prob_em6), "images for transformation (test)")
print("correctly classified before transformation \t", (df_strongtrain_weaktest['Emotion'].values == df_strongtrain_weaktest["predicted_in"].values).sum() / len(df_strongtrain_weaktest))
print("classified as target before transformation \t", (df_strongtrain_weaktest["target"].values == df_strongtrain_weaktest["predicted_in"].values).sum() / len(df_strongtrain_weaktest))
print("classified as target after transformation \t", (df_strongtrain_weaktest["target"].values == df_strongtrain_weaktest["predicted_out"].values).sum() / len(df_strongtrain_weaktest))
print("fraction were prediction did not change \t", (df_strongtrain_weaktest["predicted_in"].values == df_strongtrain_weaktest["predicted_out"].values).sum() / len(df_strongtrain_weaktest))

for e in emotions:
    print("classified as", e, "before transformation:\t", ((df_strongtrain_weaktest["target"] == e) & (df_strongtrain_weaktest["predicted_in"] == e)).sum(),
        "\tAfter transformation ", ((df_strongtrain_weaktest["target"] == e) & (df_strongtrain_weaktest["predicted_out"] == e)).sum(),
        "\tNewly after transformation ", ((df_strongtrain_weaktest["target"] == e) & (df_strongtrain_weaktest["predicted_out"] == e) & (df_strongtrain_weaktest["predicted_in"] != e)).sum())


2. all features independant of p-val

In [None]:
df_strongtrain_weaktest = compute_all(EM_all, emotions_e6+['neutral'], model, df_major, minor_prob_em6, 0)
df_strongtrain_weaktest.to_csv(p_join(PROJDIR, 'df_strongtrain_weaktest_nopvaluefilter.csv'))

df_strongtrain_weaktest = pd.read_csv(p_join(PROJDIR, 'df_strongtrain_weaktest_nopvaluefilter.csv'))
print(len(df_major), "images for feature extraction (train)")
print(len(minor_prob_em6), "images for transformation (test)")
print("correctly classified before transformation \t", (df_strongtrain_weaktest['Emotion'].values == df_strongtrain_weaktest["predicted_in"].values).sum() / len(df_strongtrain_weaktest))
print("classified as target before transformation \t", (df_strongtrain_weaktest["target"].values == df_strongtrain_weaktest["predicted_in"].values).sum() / len(df_strongtrain_weaktest))
print("classified as target after transformation \t", (df_strongtrain_weaktest["target"].values == df_strongtrain_weaktest["predicted_out"].values).sum() / len(df_strongtrain_weaktest))
print("fraction were prediction did not change \t", (df_strongtrain_weaktest["predicted_in"].values == df_strongtrain_weaktest["predicted_out"].values).sum() / len(df_strongtrain_weaktest))

for e in emotions:
    print("classified as", e, "before transformation:\t", ((df_strongtrain_weaktest["target"] == e) & (df_strongtrain_weaktest["predicted_in"] == e)).sum(),
        "\tAfter transformation ", ((df_strongtrain_weaktest["target"] == e) & (df_strongtrain_weaktest["predicted_out"] == e)).sum(),
        "\tNewly after transformation ", ((df_strongtrain_weaktest["target"] == e) & (df_strongtrain_weaktest["predicted_out"] == e) & (df_strongtrain_weaktest["predicted_in"] != e)).sum())
from sklearn import metrics
confusion_matrix = metrics.confusion_matrix(df_strongtrain_weaktest["target"], df_strongtrain_weaktest["predicted_out"])
plt.figure(figsize=(8,6))
emsorted = emotions
emsorted.sort()
out_pdf = p_join(PROJDIR,'confusionmatrix_em6.pdf')
pdf = matplotlib.backends.backend_pdf.PdfPages(out_pdf)
sns.heatmap(confusion_matrix, annot=True, xticklabels=emsorted, yticklabels=emsorted)
plt.ylabel("target")
plt.xlabel("predicted")
pdf.savefig()
pdf.close()

3. only p-val for target emotion are relevant

In [None]:
df_strongtrain_weaktest = compute_all(EM_all, emotions_e6+['neutral'], model, df_major, minor_prob_em6, 1)
df_strongtrain_weaktest.to_csv(p_join(PROJDIR, 'df_strongtrain_weaktest_targetpvaluefilter.csv'))

df_strongtrain_weaktest = pd.read_csv(p_join(PROJDIR, 'df_strongtrain_weaktest_targetpvaluefilter.csv'), sep=",")
print(len(df_major), "images for feature extraction (train)")
print(len(minor_prob_em6), "images for transformation (test)")
print("correctly classified before transformation \t", (df_strongtrain_weaktest['Emotion'].values == df_strongtrain_weaktest["predicted_in"].values).sum() / len(df_strongtrain_weaktest))
print("classified as target before transformation \t", (df_strongtrain_weaktest["target"].values == df_strongtrain_weaktest["predicted_in"].values).sum() / len(df_strongtrain_weaktest))
print("classified as target after transformation \t", (df_strongtrain_weaktest["target"].values == df_strongtrain_weaktest["predicted_out"].values).sum() / len(df_strongtrain_weaktest))
print("fraction were prediction did not change \t", (df_strongtrain_weaktest["predicted_in"].values == df_strongtrain_weaktest["predicted_out"].values).sum() / len(df_strongtrain_weaktest))

for e in emotions:
    print("classified as", e, "before transformation:\t", ((df_strongtrain_weaktest["target"] == e) & (df_strongtrain_weaktest["predicted_in"] == e)).sum(),
        "\tAfter transformation ", ((df_strongtrain_weaktest["target"] == e) & (df_strongtrain_weaktest["predicted_out"] == e)).sum(),
        "\tNewly after transformation ", ((df_strongtrain_weaktest["target"] == e) & (df_strongtrain_weaktest["predicted_out"] == e) & (df_strongtrain_weaktest["predicted_in"] != e)).sum())



### using the train/val split from Anmol's classification model as train/test

1. using only features with p-value < 0.05

In [None]:
trainsplit_df = pd.read_csv(p_join(PROJDIR, "emotion6_features_trainingsplitanmolsmodel.csv"))
test_em6 = dataframe_from_model(emotion_distr, emotion_distr_anmol, split_class="val")
df_traintrain_valtest = compute_all(EM_all, emotions_e6+['neutral'], model, trainsplit_df, test_em6, 2)
df_traintrain_valtest.to_csv(p_join(PROJDIR, 'df_traintrain_valtest_pvaluefilter.csv'))

df_traintrain_valtest = pd.read_csv(p_join(PROJDIR, 'df_traintrain_valtest_pvaluefilter.csv'), sep=",")
print(len(trainsplit_df), "images for feature extraction (train)")
print(len(test_em6), "images for transformation (test)")
print("correctly classified before transformation \t", (df_traintrain_valtest['Emotion'].values == df_traintrain_valtest["predicted_in"].values).sum() / len(df_traintrain_valtest))
print("classified as target before transformation \t", (df_traintrain_valtest["target"].values == df_traintrain_valtest["predicted_in"].values).sum() / len(df_traintrain_valtest))
print("classified as target after transformation \t", (df_traintrain_valtest["target"].values == df_traintrain_valtest["predicted_out"].values).sum() / len(df_traintrain_valtest))
print("fraction were prediction did not change \t", (df_traintrain_valtest["predicted_in"].values == df_traintrain_valtest["predicted_out"].values).sum() / len(df_traintrain_valtest))
for e in emotions:
    print("classified as target", e, "before transformation:\t", ((df_traintrain_valtest["target"] == e) & (df_traintrain_valtest["predicted_in"] == e)).sum(),"\tAfter transformation ", ((df_traintrain_valtest["target"] == e) & (df_traintrain_valtest["predicted_out"] == e)).sum())

2. with all features (no p-value filter)

In [None]:
df_traintrain_valtest = compute_all(EM_all, emotions_e6+['neutral'], model, trainsplit_df, test_em6, 0)
df_traintrain_valtest.to_csv(p_join(PROJDIR, 'df_traintrain_valtest_nopvaluefilter.csv'))

df_traintrain_valtest = pd.read_csv(p_join(PROJDIR, 'df_traintrain_valtest_nopvaluefilter.csv'), sep=",")
print(len(trainsplit_df), "images for feature extraction (train)")
print(len(test_em6), "images for transformation (test)")
print("correctly classified before transformation \t", (df_traintrain_valtest['Emotion'].values == df_traintrain_valtest["predicted_in"].values).sum() / len(df_traintrain_valtest))
print("classified as target before transformation \t", (df_traintrain_valtest["target"].values == df_traintrain_valtest["predicted_in"].values).sum() / len(df_traintrain_valtest))
print("classified as target after transformation \t", (df_traintrain_valtest["target"].values == df_traintrain_valtest["predicted_out"].values).sum() / len(df_traintrain_valtest))
print("fraction were prediction did not change \t", (df_traintrain_valtest["predicted_in"].values == df_traintrain_valtest["predicted_out"].values).sum() / len(df_traintrain_valtest))
for e in emotions:
    print("classified as target", e, "before transformation:\t", ((df_traintrain_valtest["target"] == e) & (df_traintrain_valtest["predicted_in"] == e)).sum(),"\tAfter transformation ", ((df_traintrain_valtest["target"] == e) & (df_traintrain_valtest["predicted_out"] == e)).sum())



3. only target emotion p-values are relevant

In [None]:
df_traintrain_valtest = compute_all(EM_all, emotions_e6+['neutral'], model, trainsplit_df, test_em6, 1)
df_traintrain_valtest.to_csv(p_join(PROJDIR, 'df_traintrain_valtest_targetpvaluefilter.csv'))

df_traintrain_valtest = pd.read_csv(p_join(PROJDIR, 'df_traintrain_valtest_targetpvaluefilter.csv'), sep=",")
print("correctly classified before transformation \t", (df_traintrain_valtest['Emotion'].values == df_traintrain_valtest["predicted_in"].values).sum() / len(df_traintrain_valtest))
print("classified as target before transformation \t", (df_traintrain_valtest["target"].values == df_traintrain_valtest["predicted_in"].values).sum() / len(df_traintrain_valtest))
print("classified as target after transformation \t", (df_traintrain_valtest["target"].values == df_traintrain_valtest["predicted_out"].values).sum() / len(df_traintrain_valtest))
print("fraction were prediction did not change \t", (df_traintrain_valtest["predicted_in"].values == df_traintrain_valtest["predicted_out"].values).sum() / len(df_traintrain_valtest))
for e in emotions:
  print("classified as target", e, "before transformation:\t", ((df_traintrain_valtest["target"] == e) & (df_traintrain_valtest["predicted_in"] == e)).sum(),"\tAfter transformation ", ((df_traintrain_valtest["target"] == e) & (df_traintrain_valtest["predicted_out"] == e)).sum())



---
## APPENDIX
testing the contrast transformation

In [None]:
from PIL import Image,ImageEnhance
dir = p_join(EM_all, "joy")
factors = [0.25,0.5, 1.5,  2]
fig, ax = plt.subplots(1,len(factors), figsize=(14,5))

for i, factor in enumerate(factors):
  contrast_rms_before = []
  contrast_glcm_before = []
  contrast_rms_after = []
  contrast_glcm_after = []
  for img_name in listdir(dir):
    
    img_path = p_join(dir, img_name)
    img=cv2.imread(img_path)
    
    #calculate both contrast measures
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    glcm = greycomatrix(gray, distances=[1], angles=[np.pi/2])
    
    contrast_glcm_before += [greycoprops(glcm, "contrast")[0,0]]
    contrast_rms_before += [gray.std()]
    
    img = Image.open(img_path)
    if img.mode is not 'RGB':
      img = img.convert('RGB')

    img_contr_obj=ImageEnhance.Contrast(img)
    e_img=img_contr_obj.enhance(factor)
    e_img.save(p_join(EM_all, "temp.png"))

    img_path=p_join(EM_all, "temp.png")
    img=cv2.imread(img_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    glcm = greycomatrix(gray, distances=[1], angles=[np.pi/2])
    contrast_glcm_after += [greycoprops(glcm, "contrast")[0,0]]
    contrast_rms_after += [gray.std()]

  factors_glcm = np.array(contrast_glcm_after) / np.array(contrast_glcm_before)
  factors_rms = np.array(contrast_rms_after) / np.array(contrast_rms_before)
  ax[i].boxplot([factors_glcm, factors_rms] , labels=["glcm", "rms"])
  ax[i].set_title(f"change in contrast with factor {factor}")
fig.tight_layout()


---
transform images for the survey. use the best model (features from training set of classification model)
survey images are chosen to be not used for feature extraction

In [None]:
EM_survey = p_join(DATA, "surveyimages")
df_major = pd.read_csv(p_join(PROJDIR, "emotion6_features_majoremotion.csv"), sep=',')
p_val = get_p_val(df_major)
means = get_means_em6(df_major, emotions_e6)
mapping = create_mapping_dictionary(p_val, means, emotions, use_pval=True)

for file in listdir(EM_survey):
    file_name = file.split(".")[0]
    initial_emotion = file_name.split("_")[1]
    target = "sadness"
    img_path = p_join(EM_survey, file)
    print(img_path)
    if initial_emotion in ["anger", "fear", "sadness", "disgust"]:
        target = "joy"
    elif initial_emotion not in ["joy", "surprise"]:
        print("not a valid image")
        continue

    target_values = mapping[(mapping['original']==initial_emotion) & (mapping['target']==target)]
    target_img = p_join(p_join(EM_survey, "model_theresa"), file_name+"_"+target+".jpg")
    print(target_img, "\n")
    transform_emotion(target_values, img_path, target_img)