### Average distance travelled by a child

Averaging position differences in rows for each dataset

In [6]:
import pandas as pd
import os
import warnings
import numpy as np

# List of dataset names
dataset_names = [
    "DYAD02F_", "DYAD06F_", "DYAD06NF_", "DYAD10F_", "DYAD10NF_", "DYAD11F_",
    "DYAD11NF_", "DYAD12F_", "DYAD12NF_", "DYAD14F_", "DYAD14NF_", "DYAD15F_",
    "DYAD15NF_", "DYAD16F_", "DYAD16NF_", "DYAD18F_", "DYAD18NF_", "DYAD21F_", 
    "DYAD21NF_", "DYAD23F_", "DYAD23NF_", "DYAD24F_", "DYAD24NF_"
]

# Directories
base_dir = "/Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/"
scaling_file_path = "/Users/ruzenkakaldenbach/Desktop/Drive/DESK_Measurements_ALL_with_scaling.xlsx"
PixDistConvert = pd.read_excel(scaling_file_path)

# Relevant columns
coordinate_pairs = [("xb", "yb"), ("xr", "yr"), ("xy", "yy")]

# Dictionary to store average covered distances for each dataset
average_distance_summary = {}

# Iterate through each dataset
for dat_name in dataset_names:
    file_path = f"{base_dir}Loopy_{dat_name}_processed.csv"
    if not os.path.exists(file_path):
        warnings.warn(f"File not found: {file_path}")
        continue
    df = pd.read_csv(file_path)
    
    # Choose the corresponding scaling factor depending on the current dataset name
    scaling_factor = PixDistConvert.loc[PixDistConvert['Group'] == dat_name, 'Conversion 1px to mm'].values[0]

    # List to store total distances per each child within the dataset
    total_distances = []
    
    for x_coor, y_coor in coordinate_pairs:
        # Apply scaling to x and y coordinates and store it in new columns
        df[f"{x_coor}_scaled"] = df[x_coor] * scaling_factor
        df[f"{y_coor}_scaled"] = df[y_coor] * scaling_factor

        # Compute difference between 2 consequtive rows and store it in new columns
        df[f"{x_coor}_scaled_diff"] = df[f"{x_coor}_scaled"].diff()  # Change in x coordinate
        df[f"{y_coor}_scaled_diff"] = df[f"{y_coor}_scaled"].diff()  # Change in y coordinate

        # Compute Euclidean distance traveled between rows and store it in a column named by the particular color
        df[f"distance_mm"] = np.sqrt(df[f"{x_coor}_scaled_diff"]**2 + df[f"{y_coor}_scaled_diff"]**2)

        # Sum up total distance for each child and convert mm to meters
        total_distance = df["distance_mm"].sum() / 1000
        total_distances.append(total_distance)

    # Compute the average covered distance per dataset
    average_distance = np.mean(total_distances)
    average_distance_summary[dat_name] = average_distance # store in a dictionary under dataset's name
    
    # Save the updated dataset
    output_file_path = f"{base_dir}Loopy_{dat_name}_processed_scaled.xlsx"
    df.to_excel(output_file_path, index=False)
    print(f"Processed and saved: {output_file_path}")

# Convert dictionary to a DataFrame
average_distance_df = pd.DataFrame.from_dict(average_distance_summary, orient="index", columns=["Average Distance (m)"])
print(average_distance_df.to_string())

# In this dataframe, compute overall total sum for each color (sums each column separately by default)
overall_sum = average_distance_df["Average Distance (m)"].mean()
print(overall_sum)
overall_sd = average_distance_df["Average Distance (m)"].std()
print(overall_sd)

Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD02F__processed_scaled.xlsx
Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD06F__processed_scaled.xlsx
Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD06NF__processed_scaled.xlsx
Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD10F__processed_scaled.xlsx
Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD10NF__processed_scaled.xlsx
Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD11F__processed_scaled.xlsx
Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD11NF__processed_scaled.xlsx
Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD12F__processed_scaled.xl

Accumulative distance for each each dataset and each child separately

In [5]:
import pandas as pd
import os
import warnings
import numpy as np

# List of dataset names
dataset_names = [
    "DYAD02F_", "DYAD06F_", "DYAD06NF_", "DYAD10F_", "DYAD10NF_", "DYAD11F_",
    "DYAD11NF_", "DYAD12F_", "DYAD12NF_", "DYAD14F_", "DYAD14NF_", "DYAD15F_",
    "DYAD15NF_", "DYAD16F_", "DYAD16NF_", "DYAD18F_", "DYAD18NF_", "DYAD21F_", 
    "DYAD21NF_", "DYAD23F_", "DYAD23NF_", "DYAD24F_", "DYAD24NF_"
]

# Directories
base_dir = "/Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/"
scaling_file_path = "/Users/ruzenkakaldenbach/Desktop/Drive/DESK_Measurements_ALL_with_scaling.xlsx"
PixDistConvert = pd.read_excel(scaling_file_path)

# Relevant columns
coordinate_pairs = [("xb", "yb"), ("xr", "yr"), ("xy", "yy")]

# Dictionary to store average covered distances for each dataset
total_distance_summary = {}

# Iterate through each dataset
for dat_name in dataset_names:
    file_path = f"{base_dir}Loopy_{dat_name}_processed.csv"
    if not os.path.exists(file_path):
        warnings.warn(f"File not found: {file_path}")
        continue
    df = pd.read_csv(file_path)
    
    # Choose the corresponding scaling factor depending on the current dataset name
    scaling_factor = PixDistConvert.loc[PixDistConvert['Group'] == dat_name, 'Conversion 1px to mm'].values[0]

    # Dictionary to store accumulative distances per each child within the dataset
    total_distance = {}
    
    for x_coor, y_coor in coordinate_pairs:
        # Apply scaling to x and y coordinates and store it in new columns
        df[f"{x_coor}_scaled"] = df[x_coor] * scaling_factor
        df[f"{y_coor}_scaled"] = df[y_coor] * scaling_factor

        # Compute difference between 2 consequtive rows and store it in new columns
        df[f"{x_coor}_scaled_diff"] = df[f"{x_coor}_scaled"].diff()  # Change in x coordinate
        df[f"{y_coor}_scaled_diff"] = df[f"{y_coor}_scaled"].diff()  # Change in y coordinate

        # Compute accumulative Euclidean distance traveled between rows and store it in a column named by the particular child
        df[f"distance_mm_{x_coor}_{y_coor}"] = np.sqrt(df[f"{x_coor}_scaled_diff"]**2 + df[f"{y_coor}_scaled_diff"]**2).cumsum()

        # Select the last row value (the total acumulated distance) for each coordinate pair, convert mm to meters and store in a dictionary by the coordinates as a key 
        total_distance[f"{x_coor}_{y_coor}"] = df[f"distance_mm_{x_coor}_{y_coor}"].iloc[-1] / 1000

    # Store in a dictionary under dataset's name
    total_distance_summary[dat_name] = total_distance # store in a dictionary under dataset's name
    
    # Save the updated dataset
    output_file_path = f"{base_dir}Loopy_{dat_name}_processed_scaled.xlsx"
    df.to_excel(output_file_path, index=False)
    print(f"Processed and saved: {output_file_path}")

# Convert dictionary to a DataFrame
total_distance_df = pd.DataFrame.from_dict(total_distance_summary, orient="index")
print(total_distance_df.to_string())

# Compute overall average accumulated distance across all datasets for each coordinate pair
total_distance_across_datasets = total_distance_df.mean()
print(total_distance_across_datasets)

# Compute overall average accumulated distance across all datasets across all coordinate pairs
total_distance_across_datasets_across_pairs = total_distance_across_datasets.mean()
print(total_distance_across_datasets_across_pairs)

# Compute overall standard deviation across all datasets and all coordinate pairs
total_distance_sd_across_datasets_across_pairs = total_distance_across_datasets.std()
print(total_distance_sd_across_datasets_across_pairs)

Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD02F__processed_scaled.xlsx
Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD06F__processed_scaled.xlsx
Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD06NF__processed_scaled.xlsx
Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD10F__processed_scaled.xlsx
Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD10NF__processed_scaled.xlsx
Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD11F__processed_scaled.xlsx
Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD11NF__processed_scaled.xlsx
Processed and saved: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD12F__processed_scaled.xl

### Average distances between dyads from the old calculated distances

In [9]:
# Dictionary to store average dyadic distances for each dataset
dyadic_distance_summary = {}

# Iterate through each dataset again to compute dyadic distances
for dat_name in dataset_names:
    file_path = f"{base_dir}Loopy_{dat_name}_processed_scaled.xlsx"  # Load the previously saved dataset
    df = pd.read_excel(file_path)

    # Compute Euclidean distances between dyads
    df["dist_rb"] = np.sqrt((df["xr_scaled"] - df["xb_scaled"])**2 + (df["yr_scaled"] - df["yb_scaled"])**2)
    df["dist_ry"] = np.sqrt((df["xr_scaled"] - df["xy_scaled"])**2 + (df["yr_scaled"] - df["yy_scaled"])**2)
    df["dist_by"] = np.sqrt((df["xb_scaled"] - df["xy_scaled"])**2 + (df["yb_scaled"] - df["yy_scaled"])**2)

    # Compute the average dyadic distance for this particular dataset
    dyadic_distance_summary[dat_name] = df[["dist_rb", "dist_ry", "dist_by"]].mean().to_dict() # converts DataFrame into Python dictionary

    # Save the updated dataset with new columns
    output_file = f"{base_dir}Loopy_{dat_name}_dyad_distances.xlsx"
    df.to_excel(output_file, index=False)
    print(f"Saved to: {output_file}")

# Average dyadic distances for each dataset, dictionary converted to a DataFrame
dyadic_distance_df = pd.DataFrame.from_dict(dyadic_distance_summary, orient="index")
print(dyadic_distance_df.to_string())

# Average dyadic distances across all datasets for dyads
dyadic_distance_df["mean_all_dyads"] = dyadic_distance_df.mean(axis=1)
print(dyadic_distance_df["mean_all_dyads"])

# Compute the overall average distance across all datasets
overall_mean_distance = dyadic_distance_df["mean_all_dyads"].mean()
overall_sd_distance = dyadic_distance_df["mean_all_dyads"].std()
print(overall_mean_distance)
print(overall_sd_distance)



Saved to: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD02F__dyad_distances.xlsx
Saved to: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD06F__dyad_distances.xlsx
Saved to: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD06NF__dyad_distances.xlsx
Saved to: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD10F__dyad_distances.xlsx
Saved to: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD10NF__dyad_distances.xlsx
Saved to: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD11F__dyad_distances.xlsx
Saved to: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD11NF__dyad_distances.xlsx
Saved to: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD12F__dyad_distances.xlsx
Saved to: /Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/Loopy_DYAD12NF__dyad_dis

### Average angles between dyads