In [2]:
import pandas as pd

# Load the data
data = pd.read_csv('combined_data.csv')

# Display the first few rows of the data
data.head()


Unnamed: 0,ID,Timestamp,X,Y,Button,Duration
0,002-tf2-315,1678903000.0,558,301,-1,-1.0
1,002-tf2-315,1678903000.0,550,290,-1,-1.0
2,002-tf2-315,1678903000.0,537,283,-1,-1.0
3,002-tf2-315,1678903000.0,526,280,-1,-1.0
4,002-tf2-315,1678903000.0,510,276,-1,-1.0


In [3]:
# Filter out data related to the game "Poly Bridge"
tf_data = data[data['ID'].str.contains("tf2")]

# Display the first few rows of the filtered data
tf_data.head()


Unnamed: 0,ID,Timestamp,X,Y,Button,Duration
0,002-tf2-315,1678903000.0,558,301,-1,-1.0
1,002-tf2-315,1678903000.0,550,290,-1,-1.0
2,002-tf2-315,1678903000.0,537,283,-1,-1.0
3,002-tf2-315,1678903000.0,526,280,-1,-1.0
4,002-tf2-315,1678903000.0,510,276,-1,-1.0


In [4]:
import numpy as np

def compute_additional_features(data):
    # Calculate differences for X and Y to compute velocity, acceleration, and angle
    data['Delta_X'] = data['X'].diff().fillna(0)
    data['Delta_Y'] = data['Y'].diff().fillna(0)
    
    # Compute Movement Distance (Euclidean distance)
    data['Movement_Distance'] = np.sqrt(data['Delta_X']**2 + data['Delta_Y']**2)
    
    # Compute Velocity (Distance / Time) - Assuming constant time intervals, the velocity is equivalent to movement distance
    data['Velocity'] = data['Movement_Distance']
    
    # Compute Acceleration (Change in Velocity / Time) - Using the same assumption as above
    data['Acceleration'] = data['Velocity'].diff().fillna(0)
    
    # Compute Angle of movement(angle of movement between consecutive data points using trigonometry.)
    data['Angle'] = np.arctan2(data['Delta_Y'], data['Delta_X']).fillna(0)
    
    # Calculate Jerk (rate of change of Acceleration)
    # The rate of change of acceleration. It can capture sudden movements.
    data['Jerk'] = data['Acceleration'].diff().fillna(0)
    
    # Calculate Curvature (change in angle over change in distance)
    # The degree to which a curve deviates from being flat or straight.
    data['Curvature'] = data['Angle'].diff().fillna(0) / (data['Movement_Distance'] + 1e-10)  # added a small value to prevent division by zero
    
    # Calculate Straightness (Euclidean distance between start and end over trajectory length)
    # Since we're working with individual points, we'll calculate this when creating sequences
    
    # Calculate Directional Changes (difference in angles between consecutive points)
    # Counting the number of times the movement changes direction can be indicative of certain behavioral patterns.
    data['Direction_Change'] = (data['Angle'].diff().fillna(0) != 0).astype(int)

    return data

# Apply the function to compute the additional features
tf_data = compute_additional_features(tf_data)

# Display the first few rows with the new features
tf_data.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Delta_X'] = data['X'].diff().fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Delta_Y'] = data['Y'].diff().fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Movement_Distance'] = np.sqrt(data['Delta_X']**2 + data['Delta_Y']**2)
A value is trying to be set on a copy 

Unnamed: 0,ID,Timestamp,X,Y,Button,Duration,Delta_X,Delta_Y,Movement_Distance,Velocity,Acceleration,Angle,Jerk,Curvature,Direction_Change
0,002-tf2-315,1678903000.0,558,301,-1,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,002-tf2-315,1678903000.0,550,290,-1,-1.0,-8.0,-11.0,13.601471,13.601471,13.601471,-2.199593,13.601471,-0.161717,1
2,002-tf2-315,1678903000.0,537,283,-1,-1.0,-13.0,-7.0,14.764823,14.764823,1.163353,-2.647651,-12.438118,-0.030346,1
3,002-tf2-315,1678903000.0,526,280,-1,-1.0,-11.0,-3.0,11.401754,11.401754,-3.363069,-2.875341,-4.526421,-0.01997,1
4,002-tf2-315,1678903000.0,510,276,-1,-1.0,-16.0,-4.0,16.492423,16.492423,5.090668,-2.896614,8.453737,-0.00129,1


In [6]:

def compute_stop_duration(data, threshold=0.001):
    """
    Compute the stop duration based on movement distance.
    """
    # Identify stops (when movement distance is below threshold)
    data['Is_Stop'] = (data['Movement_Distance'] < threshold).astype(int)
    
    # Calculate stop duration
    data['Stop_Duration'] = data['Is_Stop'] * data['Duration']

    return data

# Apply the function to compute the stop duration
tf_data = compute_stop_duration(tf_data)

# Display the first few rows with the new feature
tf_data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Is_Stop'] = (data['Movement_Distance'] < threshold).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Stop_Duration'] = data['Is_Stop'] * data['Duration']


Unnamed: 0,ID,Timestamp,X,Y,Button,Duration,Delta_X,Delta_Y,Movement_Distance,Velocity,Acceleration,Angle,Jerk,Curvature,Direction_Change,Is_Stop,Stop_Duration
0,002-tf2-315,1678903000.0,558,301,-1,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,-1.0
1,002-tf2-315,1678903000.0,550,290,-1,-1.0,-8.0,-11.0,13.601471,13.601471,13.601471,-2.199593,13.601471,-0.161717,1,0,-0.0
2,002-tf2-315,1678903000.0,537,283,-1,-1.0,-13.0,-7.0,14.764823,14.764823,1.163353,-2.647651,-12.438118,-0.030346,1,0,-0.0
3,002-tf2-315,1678903000.0,526,280,-1,-1.0,-11.0,-3.0,11.401754,11.401754,-3.363069,-2.875341,-4.526421,-0.01997,1,0,-0.0
4,002-tf2-315,1678903000.0,510,276,-1,-1.0,-16.0,-4.0,16.492423,16.492423,5.090668,-2.896614,8.453737,-0.00129,1,0,-0.0


In [7]:
selected_columns = ['Y', 'Stop_Duration', 'X', 'Jerk', 'Direction_Change', 'Movement_Distance', 'Acceleration', 'Button', 'Angle', 'ID']
selected_tf2 = tf_data[selected_columns]

In [8]:
selected_tf2

Unnamed: 0,Y,Stop_Duration,X,Jerk,Direction_Change,Movement_Distance,Acceleration,Button,Angle,ID
0,301,-1.0,558,0.000000,0,0.000000,0.000000,-1,0.000000,002-tf2-315
1,290,-0.0,550,13.601471,1,13.601471,13.601471,-1,-2.199593,002-tf2-315
2,283,-0.0,537,-12.438118,1,14.764823,1.163353,-1,-2.647651,002-tf2-315
3,280,-0.0,526,-4.526421,1,11.401754,-3.363069,-1,-2.875341,002-tf2-315
4,276,-0.0,510,8.453737,1,16.492423,5.090668,-1,-2.896614,002-tf2-315
...,...,...,...,...,...,...,...,...,...,...
1202694,539,-0.0,961,0.000000,1,1.414214,0.000000,-1,-0.785398,021-tf2-403
1202695,539,-0.0,960,-0.414214,1,1.000000,-0.414214,-1,3.141593,021-tf2-403
1202696,540,-0.0,961,0.828427,1,1.414214,0.414214,-1,0.785398,021-tf2-403
1202697,540,-1.0,961,-1.828427,1,0.000000,-1.414214,-1,0.000000,021-tf2-403


In [10]:
# Drop first row each dataset
def drop_first_row(group):
    return group.iloc[1:]

# Apply the function to each group defined by 'User_ID' and concatenate the results
selected_tf2 = selected_tf2.groupby('ID').apply(drop_first_row).reset_index(drop=True)

In [11]:
selected_tf2

Unnamed: 0,Y,Stop_Duration,X,Jerk,Direction_Change,Movement_Distance,Acceleration,Button,Angle,ID
0,500,-0.0,579,-319.936258,1,2.236068,-164.440265,-1,-0.463648,001-tf2-315
1,499,-0.0,580,163.618411,1,1.414214,-0.821854,-1,-0.785398,001-tf2-315
2,499,-0.0,581,0.407641,1,1.000000,-0.414214,-1,0.000000,001-tf2-315
3,499,-0.0,583,1.414214,0,2.000000,1.000000,-1,0.000000,001-tf2-315
4,499,-0.0,584,-2.000000,0,1.000000,-1.000000,-1,0.000000,001-tf2-315
...,...,...,...,...,...,...,...,...,...,...
653552,539,-0.0,961,0.000000,1,1.414214,0.000000,-1,-0.785398,021-tf2-403
653553,539,-0.0,960,-0.414214,1,1.000000,-0.414214,-1,3.141593,021-tf2-403
653554,540,-0.0,961,0.828427,1,1.414214,0.414214,-1,0.785398,021-tf2-403
653555,540,-1.0,961,-1.828427,1,0.000000,-1.414214,-1,0.000000,021-tf2-403


In [12]:
# CHeck missing value

selected_tf2.isnull().sum()

Y                    0
Stop_Duration        0
X                    0
Jerk                 0
Direction_Change     0
Movement_Distance    0
Acceleration         0
Button               0
Angle                0
ID                   0
dtype: int64

In [13]:
selected_tf2.to_csv('TF2_selected.csv')