In [6]:
import pandas as pd
import numpy as np
from scipy.stats import bootstrap
import seaborn as sns
import matplotlib.pyplot as plt
import os
import csv
import math

In [13]:

# Define the input CSV file path
input_csv_path = "F:/OneDrive - The University of Manchester/SPIM Dros Jacob Tom/23_03/Embryo 6 (PVDRN)/23_03_23_E6_PVDRNz_Statistics/23_03_23_E6_PVDRNz_PosVel.csv" 
w = 25 #moving average window
c = 'tab:blue' #colour 


In [14]:

# Extract the base file name from the input CSV path (without extension)
base_file_name = os.path.splitext(os.path.basename(input_csv_path))[0]

# Find the last underscore and remove everything after it
last_underscore_index = base_file_name.rfind('_')
if last_underscore_index != -1:
    base_file_name = base_file_name[:last_underscore_index]


# Define the pearson CSV file path
pearson_csv = f'F:/pearson/{base_file_name}_pearson2.csv'  # Replace with the path to your input CSV file


# Extract the base file name from the input CSV path (without extension)
base_file_name = os.path.splitext(os.path.basename(pearson_csv))[0]

# Define user-defined titles
column_title = "Distance-based"
row_title = "Time-based"

# Define file paths for output CSV and plot files based on the base file name
column_output_csv = f'F:/pearson/Distance/output/{base_file_name}_distance.csv'
column_output_plot = f'F:/pearson/Distance/plot/{base_file_name}_distance_plot.png'
row_output_csv = f'F:/pearson/Time/output/{base_file_name}_time.csv'
row_output_plot = f'F:/pearson/Time/plot/{base_file_name}_time_plot.png'







# Calculate Pearson Velocity Coeffecient

In [15]:
# Read the CSV file
df = pd.read_csv(input_csv_path)
# Get the unique time points
time_points = df['Time'].unique()

# Create empty lists to store the arrays
x_vals = []
y_vals = []
z_vals = []
vx_vals = []
vy_vals = []
vz_vals = []

# Loop through each time point and extract the arrays
for t in time_points:
    # Get the rows for the current time point
    rows = df[df['Time'] == t]

    # Extract the arrays
    x = rows['Position X'].values
    y = rows['Position Y'].values
    z = rows['Position Z'].values
    vx = rows['Velocity X'].values
    vy = rows['Velocity Y'].values
    vz = rows['Velocity Z'].values

    # Append the arrays to the lists
    x_vals.append(x)
    y_vals.append(y)
    z_vals.append(z)
    vx_vals.append(vx)
    vy_vals.append(vy)
    vz_vals.append(vz)

# Define the width or size of each distance bin
dr = 2  # Modify this value according to your requirements

# Create an empty dictionary to store the correlation coefficient
C = {}

# Loop through each time point
for t in range(len(time_points)):
    # Get the arrays for the current time point
    current_x = x_vals[t]
    current_y = y_vals[t]
    current_z = z_vals[t]
    current_vx = vx_vals[t]
    current_vy = vy_vals[t]
    current_vz = vz_vals[t]

    # Calculate the number of particles
    num_particles = len(current_x)

    # Calculate the maximum distance between particles at the current time point
    max_distance = 0

    # Loop through each particle pair
    for i in range(num_particles - 1):
        for j in range(i + 1, num_particles):
            # Calculate the distance between the particles
            distance = np.sqrt((current_x[i] - current_x[j]) ** 2 +
                               (current_y[i] - current_y[j]) ** 2 +
                               (current_z[i] - current_z[j]) ** 2)

            max_distance = max(max_distance, distance)

    # Calculate the total number of distance bins
    num_bins = int(np.ceil(max_distance / dr))  # Calculate based on 0.1 micron interval

    # Create an empty array to store the correlation coefficient for each distance bin
    corr_coeff = np.zeros(num_bins)

    # Create an empty array to store the count of particle pairs in each distance bin
    pair_counts = np.zeros(num_bins)

    # Loop through each particle pair
    for i in range(num_particles - 1):
        for j in range(i + 1, num_particles):
            # Calculate the distance between the particles
            distance = np.sqrt((current_x[i] - current_x[j]) ** 2 +
                               (current_y[i] - current_y[j]) ** 2 +
                               (current_z[i] - current_z[j]) ** 2)

            # Find the bin index for the distance (use floor to round down)
            bin_index = int(distance / dr)  # Use 0.1 micron interval

            # Ensure that bin_index is within the range of pair_counts
            if bin_index < num_bins:
                # Increment the count of particle pairs in the corresponding bin
                pair_counts[bin_index] += 1


                # Calculate the correlation coefficient (Pearson)"""
                correlation = (np.corrcoef([current_vx[i], current_vy[i], current_vz[i]],[current_vx[j], current_vy[j], current_vz[j]])[0, 1])

                # Increment the corresponding bin with the correlation coefficient
                corr_coeff[bin_index] += correlation

    # Normalize the correlation coefficient by the number of particle pairs in each bin
    corr_coeff /= pair_counts  # Add 1 to avoid division by zero

    # Store the correlation coefficient array for the current time point
    C[t] = corr_coeff



  corr_coeff /= pair_counts  # Add 1 to avoid division by zero


In [16]:
# Find the length of the longest correlation array in C
max_corr_length = max(len(correlation) for correlation in C.values())

# Define the header
last_distance_bin_index = max_corr_length  # Index of the last "Distance Bin"
header = ['Time Point'] + [f'Distance Bin {i}' for i in range(last_distance_bin_index)]

# Export the velocity correlation data as CSV
with open(pearson_csv, 'w', newline='') as f:
    writer = csv.writer(f)
    
    writer.writerow(header)
    
    # Write the data rows
    for t, correlation in C.items():
        data_row = [t] + list(correlation[:last_distance_bin_index])
        writer.writerow(data_row)

print(f"Velocity correlation data has been exported to {pearson_csv}.")


Velocity correlation data has been exported to F:/pearson/23_03_23_E6_PVDRNz_pearson2.csv.
