In [2]:
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import acf

# Read the uploaded files
cluster_data_path = '/Users/yangfanzhou/Desktop/1.8/ClusterResult/cluster_result_dbscan.csv'  
original_data_path = '/Users/yangfanzhou/Desktop/1.8/Cluster_Data_1.86_withposition(cleaned).xlsx'  

# Load the data
clustered_data = pd.read_csv(cluster_data_path)
original_data = pd.read_excel(original_data_path)

# 1. Calculate the mean of absolute SpeedChange
clustered_data['AbsSpeedChange'] = clustered_data['SpeedChange'].abs()
mean_abs_speed_change = clustered_data.groupby('Cluster')['AbsSpeedChange'].mean().reset_index()

# 2. Calculate trajectory length (total path length)
def calculate_trajectory_length(df):
    df = df.sort_values(by=['ID', 'Label'])
    distances = np.sqrt(np.diff(df['Positionx'])**2 + np.diff(df['Positiony'])**2)
    return distances.sum()

trajectory_lengths = original_data.groupby(['ID', 'Label']).apply(calculate_trajectory_length).reset_index()
trajectory_lengths.columns = ['ID', 'Label', 'Total_travel_path_length']

merged_data_with_length = pd.merge(clustered_data, trajectory_lengths, on=['ID', 'Label'])
mean_travel_path_length = merged_data_with_length.groupby('Cluster')['Total_travel_path_length'].mean().reset_index()

# 3. Calculate the standard deviation and frequency of angle changes
angle_change_std = clustered_data.groupby('Cluster')['AngleChange'].std().reset_index(name='AngleChangeStd')
angle_change_var = clustered_data.groupby('Cluster')['AngleChange'].var().reset_index(name='AngleChangeVar')
clustered_data['AngleChangeFrequency'] = clustered_data['AngleChange'].diff().abs() > 10  # Threshold of 10 can be adjusted as needed
angle_change_frequency = clustered_data.groupby('Cluster')['AngleChangeFrequency'].mean().reset_index()

# 4. Calculate the correlation between SpeedChange and AngleChange
correlation_speed_angle = clustered_data.groupby('Cluster').apply(lambda x: x['SpeedChange'].corr(x['AngleChange'])).reset_index()
correlation_speed_angle.columns = ['Cluster', 'Speed_Angle_Correlation']

# 5. Calculate the average duration of trajectories (average number of rows, i.e., unit time)
average_travel_time_length = clustered_data.groupby(['Cluster', 'Label']).size().groupby('Cluster').mean().reset_index()
average_travel_time_length.columns = ['Cluster', 'Average_travel_time_length']

# 6. Calculate trajectory curvature (path curvature)
def calculate_curvature(df):
    if len(df) < 3:
        return 0
    df = df.sort_values(by=['ID', 'Label'])
    x1, y1 = df['Positionx'].shift(1), df['Positiony'].shift(1)
    x2, y2 = df['Positionx'], df['Positiony']
    x3, y3 = df['Positionx'].shift(-1), df['Positiony'].shift(-1)
    
    curvature = abs((x3 - x2) * (y2 - y1) - (y3 - y2) * (x2 - x1)) / \
                np.sqrt(((x3 - x2)**2 + (y3 - y2)**2) * ((x2 - x1)**2 + (y2 - y1)**2))
    return curvature.mean()

curvature_data = original_data.groupby(['ID', 'Label']).apply(calculate_curvature).reset_index()
curvature_data.columns = ['ID', 'Label', 'Curvature']

merged_data_with_curvature = pd.merge(merged_data_with_length, curvature_data, on=['ID', 'Label'])
mean_curvature = merged_data_with_curvature.groupby('Cluster')['Curvature'].mean().reset_index()

# 7. Calculate acceleration frequency by evaluating pedestrian behavior through acceleration (speed change rate). 
# Pedestrians with more urgent behavior may show higher acceleration variation frequencies.
clustered_data['Acceleration'] = clustered_data['SpeedChange'].diff()
acceleration_frequency = clustered_data.groupby('Cluster')['Acceleration'].apply(lambda x: (x.abs() > 0.1).mean()).reset_index()
acceleration_frequency.columns = ['Cluster', 'AccelerationFrequency']

# 8. Analyze movement periodicity (autocorrelation analysis): Check pedestrian behavior periodicity using autocorrelation analysis. 
# If pedestrian behavior is repetitive (e.g., small step movements in a queue), it will show stronger periodicity.
def calculate_autocorrelation(series, lags=10):
    return acf(series, nlags=lags, fft=False)[1:].mean()  # Calculate the mean autocorrelation between lag 1 and lags

autocorrelation_analysis = clustered_data.groupby('Cluster')['SpeedChange'].apply(calculate_autocorrelation).reset_index()
autocorrelation_analysis.columns = ['Cluster', 'AverageAutocorrelation']

# 9. Calculate the continuity of angle changes (smoothness)
# Angle smoothness standard deviation (AngleSmoothnessStd) measures the smoothness of angle changes by calculating the difference in angle changes.
# Specifically, it calculates the standard deviation of the absolute difference between angle changes at adjacent time points. 
# This metric evaluates the continuity or smoothness of directional changes during pedestrian movement.
clustered_data['AngleSmoothness'] = clustered_data['AngleChange'].diff().abs()
angle_smoothness = clustered_data.groupby('Cluster')['AngleSmoothness'].std().reset_index()
angle_smoothness.columns = ['Cluster', 'AngleSmoothnessStd']

# Merge all analysis results
analysis_result = mean_abs_speed_change.merge(mean_travel_path_length, on='Cluster') \
                                       .merge(angle_change_std, on='Cluster') \
                                       .merge(angle_change_var, on='Cluster') \
                                       .merge(angle_change_frequency, on='Cluster') \
                                       .merge(correlation_speed_angle, on='Cluster') \
                                       .merge(average_travel_time_length, on='Cluster') \
                                       .merge(mean_curvature, on='Cluster') \
                                       .merge(acceleration_frequency, on='Cluster') \
                                       .merge(autocorrelation_analysis, on='Cluster') \
                                       .merge(angle_smoothness, on='Cluster')

# Display the result
print(analysis_result)


  trajectory_lengths = original_data.groupby(['ID', 'Label']).apply(calculate_trajectory_length).reset_index()
  correlation_speed_angle = clustered_data.groupby('Cluster').apply(lambda x: x['SpeedChange'].corr(x['AngleChange'])).reset_index()


   Cluster  AbsSpeedChange  Total_travel_path_length  AngleChangeStd  \
0       -1        0.178838                 10.525398       69.250946   
1        0        0.198584                  2.182734       60.757883   

   AngleChangeVar  AngleChangeFrequency  Speed_Angle_Correlation  \
0     4795.693536              0.717949                 0.173291   
1     3691.520368              0.558391                 0.093964   

   Average_travel_time_length  Curvature  AccelerationFrequency  \
0                      85.800   0.449675               0.675991   
1                     328.175   0.506246               0.688581   

   AverageAutocorrelation  AngleSmoothnessStd  
0               -0.047939           59.474182  
1               -0.044210           53.667862  


  curvature_data = original_data.groupby(['ID', 'Label']).apply(calculate_curvature).reset_index()
