In [3]:
import pandas as pd
import numpy as np
import itertools

# Load your data
data = pd.read_excel("btp_list.xlsx")

# Define emotion labels and their encoding
emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
emotion_dict = {label: i + 1 for i, label in enumerate(emotion_labels)}

# Helper function to convert data
def process_data(item):
    if isinstance(item, str):
        return np.array([float(x) for x in item.split(",") if x.strip()])
    elif isinstance(item, (list, np.ndarray)):  # Handling list or ndarray directly
        return np.array(item)
    elif pd.isna(item):
        return np.array([])  # Handling NaNs
    else:
        raise ValueError("Unsupported data type")

# Function to convert list of emotion labels to integers
def encode_emotions(emotion_list):
    if isinstance(emotion_list, str):
        return [emotion_dict[emotion] for emotion in emotion_list.split(",") if emotion.strip()]

# Function to calculate correlation safely
def safe_correlation(x, y):
    if len(x) > 1 and len(y) > 1:
        return np.corrcoef(x, y)[0, 1]
    return np.nan  # Return NaN if not enough data

# Prepare to store results
results = []

# For each row in the data
for index, row in data.iterrows():
    try:
        # Convert each column to array of floats
        shoulder_midpoints = process_data(row['shoulder_midpoints'])
        head_turn_angles = process_data(row['head_turn_angles'])
        right_hand = process_data(row['right_hand'])
        left_hand = process_data(row['left_hand'])
        resulted_emotions = encode_emotions(row['resulted_emotions'])

        # Dictionary to store correlations and emotions
        correlations = {'youtube_video_code': row['youtube_video_code'], 'encoded_emotions': resulted_emotions}

        # List of tuples of arrays and their names
        arrays = [('resulted_emotions', resulted_emotions), ('shoulder_midpoints', shoulder_midpoints), ('head_turn_angles', head_turn_angles),
                  ('right_hand', right_hand), ('left_hand', left_hand)]

        # Calculate all pairwise correlations
        for (name1, arr1), (name2, arr2) in itertools.combinations(arrays, 2):
            key = f'{name1}-{name2}'
            # Ensure arrays are the same length
            min_length = min(len(arr1), len(arr2))
            arr1 = arr1[:min_length]
            arr2 = arr2[:min_length]
            correlations[key] = safe_correlation(arr1, arr2)

        # Append the result
        results.append(correlations)
    except Exception as e:
        print(f"Error processing data for video {row['youtube_video_code']}: {e}")

# Create DataFrame from results
results_df = pd.DataFrame(results)

# Save to Excel
results_df.to_excel("pairwise_correlations.xlsx", index=False)

print("Pairwise correlations have been saved to 'pairwise_correlations.xlsx'.")

Error processing data for video Seheo-meHEU: ' happy'
Error processing data for video nzyaFRitfrQ: ' happy'
Error processing data for video 6M1GbIjrD9s: ' fear'
Error processing data for video IrgOlrVT-mo: ' happy'
Error processing data for video e7a9a_X-ebw: ' angry'
Error processing data for video SN5fZn9wIxM: ' happy'
Error processing data for video PF-h2I-jpKg: ' neutral'
Error processing data for video 55HfCkXa7fk: ' neutral'
Error processing data for video FViktfj6kf0: ' happy'
Error processing data for video 7jse0Aizft0: ' neutral'
Error processing data for video erRGmOU0bu4: ' angry'
Error processing data for video T-nArlMdxNI: ' neutral'
Error processing data for video 1D1KHAZQ4f8: ' sad'
Error processing data for video cYWtnCg-t1k: ' sad'
Error processing data for video F7gl-F5POB4: ' fear'
Error processing data for video KJ1aJxmpQTQ: ' happy'
Error processing data for video _d_FZvAt3Bw: ' neutral'
Error processing data for video 8SKavhicqE8: ' fear'
Error processing data for

In [4]:
!pip install xlsxwriter


