In [None]:
from ChoicePlotlyNew import *

In [None]:
directory = '/Users/apaula/ownCloud/MatrexVR1/VR4(DrosophilaArray)/20241126_floor_Data/RunData'
trim_seconds = 1

In [None]:
subdirectories = [os.path.join(directory, d) for d in os.listdir(directory) 
                    if os.path.isdir(os.path.join(directory, d))]

if not subdirectories:
    print(f"No subdirectories found in directory: {directory}")
combined_dfs = []
for subdir in subdirectories:
    subfolder_name = os.path.basename(subdir)
    print(f"Processing subfolder: {subfolder_name}")
    file_paths = [os.path.join(subdir, f) for f in os.listdir(subdir) if f.endswith('.csv')]

    if not file_paths:
        print(f"No CSV files found in subfolder: {subdir}")
        continue

    dfs = []
    for f in file_paths:
        df = process_dataframe(load_csv(f), trim_seconds)
        if not df.empty:
            dfs.append(df)
        else:
            print(f"No data loaded from {f}")

    if not dfs:
        print(f"No data frames were loaded for subfolder: {subfolder_name}")
        continue

    combined_df = pd.concat(dfs, ignore_index=True)
    combined_dfs.append(combined_df)
df = pd.concat(combined_dfs)

In [None]:
df.info()

In [None]:
df['elapsed_time'].describe()

In [None]:
df

In [None]:
# Create a unique identifier for each trial
df['UniqueTrialID'] = df.groupby(['SourceFile', 'ConfigFile', 'CurrentTrial']).ngroup()


In [None]:
# Calculate the number of unique trial IDs
num_unique_trials = df['UniqueTrialID'].nunique()

# Display the result
print(f'There are {num_unique_trials} unique trial IDs.')

In [None]:
import numpy as np
import pandas as pd

# Ensure the DataFrame is sorted by UniqueTrialID and Current Time
df = df.sort_values(by=['UniqueTrialID', 'Current Time'])

# Calculate differences in positions within each trial
df['delta_x'] = df.groupby('UniqueTrialID')['GameObjectPosX'].diff()
df['delta_y'] = df.groupby('UniqueTrialID')['GameObjectPosY'].diff()
df['delta_z'] = df.groupby('UniqueTrialID')['GameObjectPosZ'].diff()

# Compute the Euclidean distance between consecutive positions
df['step_distance'] = np.sqrt(
    df['delta_x']**2 + df['delta_y']**2 + df['delta_z']**2
)

# Replace NaN values with zero (first position in each trial)
df['step_distance'] = df['step_distance'].fillna(0)


In [None]:
# Calculate total displacement per UniqueTrialID
total_displacement = df.groupby('UniqueTrialID')['step_distance'].sum().reset_index()
total_displacement.rename(columns={'step_distance': 'TotalDisplacement'}, inplace=True)

# Merge total displacement back to the main DataFrame
df = df.merge(total_displacement, on='UniqueTrialID', how='left')

In [None]:
# Define displacement thresholds in the same units as your data
min_displacement = 5    # in centimeters
max_displacement = 100   # in centimeters

In [None]:
# Define displacement thresholds in the same units as your data
min_displacement = 0    # in centimeters
max_displacement = 50   # in centimeters

# Get unique trial IDs for each category
stationary_trial_ids = total_displacement[total_displacement['TotalDisplacement'] < min_displacement]['UniqueTrialID'].unique()
normal_trial_ids = total_displacement[
    (total_displacement['TotalDisplacement'] >= min_displacement) &
    (total_displacement['TotalDisplacement'] <= max_displacement)
]['UniqueTrialID'].unique()
excessive_trial_ids = total_displacement[total_displacement['TotalDisplacement'] > max_displacement]['UniqueTrialID'].unique()

# DataFrames for each group
df_stationary = df[df['UniqueTrialID'].isin(stationary_trial_ids)].reset_index(drop=True)
df_normal = df[df['UniqueTrialID'].isin(normal_trial_ids)].reset_index(drop=True)
df_excessive = df[df['UniqueTrialID'].isin(excessive_trial_ids)].reset_index(drop=True)

In [None]:
import matplotlib.pyplot as plt

def plot_trajectories(df_group, group_name, sample_size=None):
    plt.figure(figsize=(10, 8))
    unique_trials = df_group['UniqueTrialID'].unique()
    
    # Optionally sample trials if there are too many
    if sample_size and len(unique_trials) > sample_size:
        np.random.seed(42)
        unique_trials = np.random.choice(unique_trials, size=sample_size, replace=False)
    
    for trial_id in unique_trials:
        trial_data = df_group[df_group['UniqueTrialID'] == trial_id]
        plt.plot(trial_data['GameObjectPosX'], trial_data['GameObjectPosZ'], alpha=0.5)
    
    plt.axis('equal')
    plt.xlabel('GameObjectPosX')
    plt.ylabel('GameObjectPosZ')
    plt.title(f'Trajectories of {group_name} Trials')
    plt.show()


In [None]:
print(f"Number of stationary trials: {len(stationary_trial_ids)}")
plot_trajectories(df_stationary, 'Stationary')


In [None]:
print(f"Number of normal moving trials: {len(normal_trial_ids)}")
plot_trajectories(df_normal, 'Normal Moving')  # Adjust sample_size as needed

In [None]:
print(f"Number of excessive moving trials: {len(excessive_trial_ids)}")
plot_trajectories(df_excessive, 'Excessive Moving')


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming 'df' is your DataFrame containing the data

# Group data to get the maximum displacement per trial within each source file
max_displacement_per_trial = df_normal.groupby(['SourceFile', 'UniqueTrialID'])['TotalDisplacement'].max().reset_index()

# Now aggregate this data to prepare for plotting
plot_data = max_displacement_per_trial.groupby('SourceFile')['TotalDisplacement'].apply(list).reset_index()

# Plotting each source file's displacement distribution
plt.figure(figsize=(20, 10))  # Adjust the figure size as needed
sns.boxplot(data=max_displacement_per_trial, x='SourceFile', y='TotalDisplacement')
plt.xticks(rotation=90)  # Rotate labels for better readability if necessary
plt.xlabel('Source File')
plt.ylabel('Max Total Displacement per Trial (cm)')
plt.title('Distribution of Max Total Displacement per Trial for Each Source File')
plt.grid(True)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming 'df' is your DataFrame containing the data

# Group data to get the maximum displacement per trial within each source file
max_displacement_per_trial = df_normal.groupby(['SourceFile', 'UniqueTrialID'])['TotalDisplacement'].max().reset_index()

# Calculate mean displacements for sorting
mean_displacements = max_displacement_per_trial.groupby('SourceFile')['TotalDisplacement'].mean().reset_index()

# Sort the source files by mean displacement
mean_displacements = mean_displacements.sort_values('TotalDisplacement', ascending=True)
sorted_source_files = mean_displacements['SourceFile'].tolist()

# Now use this order to plot
plt.figure(figsize=(20, 10))  # Adjust the figure size as needed
sorted_box_plot = sns.boxplot(data=max_displacement_per_trial, x='SourceFile', y='TotalDisplacement', order=sorted_source_files)
plt.xticks(rotation=90)  # Rotate labels for better readability if necessary
plt.xlabel('Source File')
plt.ylabel('Max Total Displacement per Trial (cm)')
plt.title('Distribution of Max Total Displacement per Trial for Each Source File Sorted by Mean Displacement')
plt.grid(True)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming 'df' is your DataFrame containing the data

# Group data to get the total or average displacement per CurrentStep
displacement_per_step = df_normal.groupby('CurrentStep')['TotalDisplacement'].mean().reset_index()

# Plotting the displacement per step
plt.figure(figsize=(12, 6))  # Adjust the figure size as needed
sns.barplot(data=displacement_per_step, x='CurrentStep', y='TotalDisplacement')
plt.xlabel('Current Step')
plt.ylabel('Average Total Displacement (cm)')
plt.title('Average Total Displacement by Current Step')
plt.grid(True)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming 'df' is your DataFrame containing the data

# Plotting the displacement per step with a violin plot
plt.figure(figsize=(12, 6))  # Adjust the figure size as needed
sns.violinplot(data=df_normal, x='CurrentStep', y='TotalDisplacement', scale='width', inner='quartile')
plt.xlabel('Current Step')

plt.ylabel('Total Displacement (cm)')
plt.title('Distribution of Total Displacement by Current Step')
plt.grid(True)
plt.show()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming 'df' is your DataFrame containing the data

# Step 1: Extract the last position of each trial
last_positions = df.groupby('UniqueTrialID').last().reset_index()

# Step 2: Calculate the angle from the origin
last_positions['Angle'] = np.degrees(np.arctan2(last_positions['GameObjectPosZ'], last_positions['GameObjectPosX']))

# Adjust angles to ensure all are positive
last_positions['Angle'] = last_positions['Angle'].apply(lambda x: x + 360 if x < 0 else x)

# Step 3: Plot histogram of angles for each 'CurrentStep'
unique_steps = last_positions['CurrentStep'].unique()
plt.figure(figsize=(15, 10))
for i, step in enumerate(sorted(unique_steps)):
    plt.subplot(len(unique_steps)//2 + 1, 2, i + 1)  # Arranging subplots
    plt.hist(last_positions[last_positions['CurrentStep'] == step]['Angle'], bins=36, range=[0, 360], color='skyblue', edgecolor='black')
    plt.title(f'Current Step {step}')
    plt.xlabel('Directional Angle (degrees)')
    plt.ylabel('Frequency')
    plt.xlim([0, 360])
    plt.xticks(np.arange(0, 361, 45))  # Setting ticks every 45 degrees

plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming 'df' is your DataFrame containing the data

# Step 1: Extract the last position of each trial
last_positions = df_normal.groupby('UniqueTrialID').last().reset_index()

# Step 2: Calculate the Euclidean distance from the origin
last_positions['DistanceFromOrigin'] = np.sqrt(last_positions['GameObjectPosX']**2 + last_positions['GameObjectPosZ']**2)

# Step 3: Plot the distribution of distances
plt.figure(figsize=(12, 6))
sns.histplot(last_positions['DistanceFromOrigin'], bins=30, kde=True, color='blue')
plt.xlabel('Distance from Origin (units)')
plt.ylabel('Frequency')
plt.title('Distribution of Final Distances from Origin')
plt.grid(True)
plt.show()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import mannwhitneyu

# Assuming 'df' is your DataFrame and it has a column 'TrialType' to distinguish the trials

# Step 1: Calculate the Euclidean distance from the origin for the last position in each trial
df_normal['DistanceFromOrigin'] = np.sqrt(df_normal['GameObjectPosX']**2 + df_normal['GameObjectPosZ']**2)

# Separate the data by trial type
df_trial1 = df_normal[df_normal['CurrentStep'] == 0]
df_trial2 = df_normal[df_normal['CurrentStep'] == 1]

# Step 2: Plot violin plots to compare distributions
plt.figure(figsize=(10, 6))
sns.violinplot(x='CurrentStep', y='DistanceFromOrigin', data=df_normal)
plt.title('Comparison of Final Distances from Origin by step')
plt.grid(True)
plt.show()

# Step 3: Density Plot
plt.figure(figsize=(10, 6))
sns.kdeplot(df_trial1['DistanceFromOrigin'], label='Trial1', fill=True)
sns.kdeplot(df_trial2['DistanceFromOrigin'], label='Trial2', fill=True)
plt.title('Density Plot of Final Distances from Origin')
plt.xlabel('Distance from Origin (units)')
plt.ylabel('Density')
plt.legend()
plt.grid(True)
plt.show()

# Step 4: Statistical Test
stat, p = mannwhitneyu(df_trial1['DistanceFromOrigin'], df_trial2['DistanceFromOrigin'])
print(f'Mann-Whitney U test results: U = {stat}, p-value = {p}')


In [None]:
import pandas as pd

# Assuming 'df' is your DataFrame containing the data

# Reset 'trial_time' to zero at the start of each trial by subtracting the minimum 'elapsed_time' for each trial
df_normal['trial_time'] = df_normal.groupby('UniqueTrialID')['elapsed_time'].transform(lambda x: x - x.min())

# Now, 'trial_time' is the time elapsed since the start of each trial


In [None]:
import matplotlib.pyplot as plt

def plot_single_frame(df_group, trial_time_point, ax):
    """
    Plot trajectories up to a given trial time point on a provided Axes object with fixed axis limits.

    Args:
    df_group (DataFrame): The DataFrame containing trajectory data.
    trial_time_point (float): The trial time point up to which trajectories should be drawn.
    ax (matplotlib.axes.Axes): The Axes object on which to draw the plot.
    """
    unique_trials = df_group['UniqueTrialID'].unique()
    
    for trial_id in unique_trials:
        trial_data = df_group[(df_group['UniqueTrialID'] == trial_id) & (df_group['trial_time'] <= trial_time_point)]
        ax.plot(trial_data['GameObjectPosX'], trial_data['GameObjectPosZ'], alpha=0.2)

    ax.set_aspect('equal')
    ax.set_xlabel('GameObjectPosX')
    ax.set_ylabel('GameObjectPosZ')
    ax.set_title(f'Trajectories up to {trial_time_point} seconds')
    ax.set_xlim([-25, 25])
    ax.set_ylim([-25, 25])

# Example of downsampling the data
df_normal['index_mod'] = df_normal.index % 6  # Keep every 10th record
df_reduced = df_normal[df_normal['index_mod'] == 0]

# Example usage: Plotting a single frame for a specific trial time point
fig, ax = plt.subplots(figsize=(10, 8))
plot_single_frame(df_reduced, 15, ax)  # Draw trajectories up to 30 seconds of trial time
plt.show()


In [None]:
import matplotlib.pyplot as plt

def plot_single_frame(df_group, trial_time_point, ax):
    """ 
    Helper function to plot data on the given axes.
    """
    plt.close()
    ax.set_xlim([-25, 25])
    ax.set_ylim([-25, 25])
    ax.set_aspect('equal')
    ax.set_xlabel('GameObjectPosX')
    ax.set_ylabel('GameObjectPosZ')
    ax.set_title(f'Trajectories up to {trial_time_point} seconds')

    unique_trials = df_group['UniqueTrialID'].unique()
    for trial_id in unique_trials:
        trial_data = df_group[(df_group['UniqueTrialID'] == trial_id) & (df_group['trial_time'] <= trial_time_point)]
        ax.plot(trial_data['GameObjectPosX'], trial_data['GameObjectPosZ'], alpha=0.2)

def save_frame(df_group, trial_time_point, ax, frame_number, output_dir):
    """
    Generate and save a single frame of trajectory data.
    """
    ax.clear()  # Clear previous frame content
    plot_single_frame(df_group, trial_time_point, ax)  # Redraw the content for the new frame
    filename = f"{output_dir}/frame_{frame_number:04d}.png"
    plt.savefig(filename, dpi=100)
    plt.close()


# Create a directory for the frames
output_dir = '/Users/apaula/src/VRDataAnalysis/Vtk/FlyAnimation'
os.makedirs(output_dir, exist_ok=True)

# Prepare the figure and axes
fig, ax = plt.subplots(figsize=(10, 8))

# Generate frames
total_seconds = 20
fps = 10
for i in range(total_seconds * fps + 1):
    trial_time_point = i / fps
    save_frame(df_reduced, trial_time_point, ax, i, output_dir)
    print(f"Saved frame {i} for time {trial_time_point:.2f}s")


In [None]:
import matplotlib.pyplot as plt
import os

def plot_single_frame(df_group, trial_time_point, frame_number, output_dir):
    """ 
    Generate, plot, and save a single frame of trajectory data.
    """
    fig, ax = plt.subplots(figsize=(10, 8))  # Move figure and axes creation inside the function
    ax.set_xlim([-25, 25])
    ax.set_ylim([-25, 25])
    ax.set_aspect('equal')
    ax.set_xlabel('GameObjectPosX')
    ax.set_ylabel('GameObjectPosZ')
    ax.set_title(f'Trajectories up to {trial_time_point} seconds')

    # Plotting data for this frame
    unique_trials = df_group['UniqueTrialID'].unique()
    for trial_id in unique_trials:
        trial_data = df_group[(df_group['UniqueTrialID'] == trial_id) & (df_group['trial_time'] <= trial_time_point)]
        if not trial_data.empty:
            ax.plot(trial_data['GameObjectPosX'], trial_data['GameObjectPosZ'], alpha=0.2)

    # Save the frame
    filename = f"{output_dir}/frame_{frame_number:04d}.png"
    plt.savefig(filename, dpi=100)
    plt.close(fig)  # Ensure the figure is closed after saving

# Directory setup
output_dir = '/Users/apaula/src/VRDataAnalysis/Vtk/FlyAnimation'
os.makedirs(output_dir, exist_ok=True)

# Generating frames
total_seconds = 20
fps = 10
for i in range(total_seconds * fps + 1):
    trial_time_point = i / fps
    plot_single_frame(df_reduced, trial_time_point, i, output_dir)
    if i < 5:  # Print status for the first few frames to check progress
        print(f"Frame {i} saved for time {trial_time_point:.2f}s")
