# Functions for Calculating Features

Example Data

In [1]:
# Imports
import numpy as np
import pandas as pd

In [2]:
# Sample Data for a Track
sample_data = pd.read_csv('data_for_sample.csv')
data = pd.DataFrame(sample_data)
print(data)

      track_id          xc          yc     u_dot      v_dot        s_dot  \
0         1330  117.405639  104.372970  6.973857  22.796302  4094.324244   
1         1330  123.302195  123.913192  6.585702  21.623125  4154.148577   
2         1330  111.683189  400.633206 -0.382109  28.731078  1993.790100   
3         1330  821.635502  527.801237  5.468046  -3.778739    82.335724   
4         1330  883.833486  309.388663  2.265553   0.529649    -4.429363   
...        ...         ...         ...       ...        ...          ...   
3779      1330  864.092844  310.008467  1.328953   1.012996    28.693940   
3780      1330  866.778571  306.436494  1.451355   0.599358    28.197714   
3781      1330  872.650345  311.583098  1.850148   1.009592    13.827257   
3782      1330  879.723669  314.490788  2.321362   1.180831     5.170951   
3783      1330  882.995121  312.947496  2.407075   0.935072    -2.986053   

       frame  
0     382058  
1     382059  
2     382068  
3     382158  
4     383058

Create Simple Features

In [3]:
def calculate_speed(df):
    df['speed'] = np.sqrt(df['u_dot']**2 + df['v_dot']**2)
    return df

def calculate_outreach_ratio(df):
    start_x, start_y = df.iloc[0][['xc', 'yc']]
    end_x, end_y = df.iloc[-1][['xc', 'yc']]
    displacement = np.sqrt((end_x - start_x)**2 + (end_y - start_y)**2)
    path_length = df['speed'].sum()
    outreach_ratio = displacement / path_length if path_length != 0 else 0
    return outreach_ratio

def calculate_acceleration(u_dot, v_dot, time_intervals):
    u_dot = np.array(u_dot)
    v_dot = np.array(v_dot)
    time_intervals = np.array(time_intervals)
    ax = np.diff(u_dot) / time_intervals[:-1]
    ay = np.diff(v_dot) / time_intervals[:-1]
    acceleration = np.sqrt(ax**2 + ay**2)
    return ax, ay, acceleration

def calculate_distance_traveled(xc, yc):
    xc = np.array(xc)
    yc = np.array(yc)
    dx = np.diff(xc)
    dy = np.diff(yc)
    distances = np.sqrt(dx**2 + dy**2)
    distance_traveled = np.sum(distances)
    return distance_traveled

In [4]:
features = []
grouped = data.groupby('track_id')

for track_id, group in grouped:
    group = group.sort_values(by='frame')
    speed = calculate_speed(group)['speed'].mean()
    outreach_ratio = calculate_outreach_ratio(group)
    time_intervals = group['frame'].diff().fillna(1).values  # Assume frame difference as time interval
    ax, ay, acceleration = calculate_acceleration(group['u_dot'], group['v_dot'], time_intervals)
    mean_acceleration = np.mean(acceleration)
    distance_traveled = calculate_distance_traveled(group['xc'], group['yc'])
    features.append({
        'track_id': track_id,
        'speed': speed,
        'outreach_ratio': outreach_ratio,
        'mean_acceleration': mean_acceleration,
        'distance_traveled': distance_traveled
    })

features_df = pd.DataFrame(features)
print(features_df.head())

   track_id     speed  outreach_ratio  mean_acceleration  distance_traveled
0      1330  2.669734        0.086335           0.281946        15439.79521


Create Clustering Feature

In [5]:
# Imports
import joblib

# Load Object
gm = joblib.load('gaussian_mixture_model.pkl')
with open('labels.pkl', 'rb') as f:
    labels = joblib.load(f)

In [6]:
# Remove Track ID and Cluster
features_df_no_track_id = features_df.drop('track_id', axis=1)
predicted_clusters = gm.predict(features_df_no_track_id)
print(predicted_clusters)

[1]




In [7]:
# Add to DF
features_df = features_df.assign(predicted_clusters=predicted_clusters)
print(features_df)

   track_id     speed  outreach_ratio  mean_acceleration  distance_traveled  \
0      1330  2.669734        0.086335           0.281946        15439.79521   

   predicted_clusters  
0                   1  


Now use cutoffs to classify. In this example, there are no cutoffs that help.