In [60]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import pickle

In [61]:
def calculate_angle(p1, p2, p3):
    """Calculate angle between three points."""
    v1 = np.array(p1) - np.array(p2)
    v2 = np.array(p3) - np.array(p2)
    dot_product = np.dot(v1, v2)
    magnitude_v1 = np.linalg.norm(v1)
    magnitude_v2 = np.linalg.norm(v2)
    if magnitude_v1 == 0 or magnitude_v2 == 0:
        return None
    angle_radians = np.arccos(dot_product / (magnitude_v1 * magnitude_v2))
    return np.degrees(angle_radians)

In [62]:
def calculate_center_point(points):
    """Calculate centroid of multiple points."""
    return np.mean(points, axis=0)

def calculate_velocity(positions, frame_time):
    """Calculate velocity from position differences."""
    return np.gradient(positions, frame_time, axis=0)

def calculate_acceleration(velocities, frame_time):
    """Calculate acceleration from velocities."""
    return np.gradient(velocities, frame_time, axis=0)

In [63]:
# Read input data
input_file = "merged_output.csv"
output_file = "enhanced_motion_features.csv"
frame_time = 0.033  # 30 FPS

df = pd.read_csv(input_file)

In [64]:
from scipy.signal import butter, filtfilt, savgol_filter

# Crear los coeficientes del filtro Butterworth
b, a = butter(N=3, Wn=0.933333, btype='low')

filtered_dataset = df.copy()
smoothed_dataset = df.copy()

video_ids = df["videoid"].unique()

for video_id in video_ids:
    # Filtrar los datos por el ID del video actual
    video_data = df[df["videoid"] == video_id]
    # Identificar columnas numéricas (exceptuando 'frame')
    numeric_columns = video_data.select_dtypes(include=['number']).columns
    columns_to_filter = [col for col in numeric_columns if col != 'frame']
    # Crear copia de los datos originales para aplicar los filtros
    filtered_data = video_data.copy()
    # Aplicar el filtro Butterworth a las columnas seleccionadas
    for column in columns_to_filter:
        filtered_data[column] = filtfilt(b, a, video_data[column])
    # Crear copia para aplicar el filtro Savitzky-Golay
    smoothed_data = filtered_data.copy()
    for column in columns_to_filter:
        smoothed_data[column] = savgol_filter(filtered_data[column], window_length=11, polyorder=3)
    # Reemplazar los datos filtrados y suavizados en los DataFrames finales
    filtered_dataset.loc[video_data.index, columns_to_filter] = filtered_data[columns_to_filter]
    smoothed_dataset.loc[video_data.index, columns_to_filter] = smoothed_data[columns_to_filter]

In [65]:
current_dataset = filtered_dataset

keypoints = {
    'LEFT_SHOULDER': ['LEFT_SHOULDER_x', 'LEFT_SHOULDER_y'],
    'RIGHT_SHOULDER': ['RIGHT_SHOULDER_x', 'RIGHT_SHOULDER_y'],
    'LEFT_HIP': ['LEFT_HIP_x', 'LEFT_HIP_y'],
    'RIGHT_HIP': ['RIGHT_HIP_x', 'RIGHT_HIP_y'],
    'LEFT_KNEE': ['LEFT_KNEE_x', 'LEFT_KNEE_y'],
    'RIGHT_KNEE': ['RIGHT_KNEE_x', 'RIGHT_KNEE_y'],
    'LEFT_ANKLE': ['LEFT_ANKLE_x', 'LEFT_ANKLE_y'],
    'RIGHT_ANKLE': ['RIGHT_ANKLE_x', 'RIGHT_ANKLE_y'],
    'LEFT_ELBOW': ['LEFT_ELBOW_x', 'LEFT_ELBOW_y'],
    'RIGHT_ELBOW': ['RIGHT_ELBOW_x', 'RIGHT_ELBOW_y'],
    'LEFT_WRIST': ['LEFT_WRIST_x', 'LEFT_WRIST_y'],
    'RIGHT_WRIST': ['RIGHT_WRIST_x', 'RIGHT_WRIST_y']
}

In [66]:
for row_idx, row in current_dataset.iterrows():
    # Knee angles
    current_dataset.loc[row_idx, 'LEFT_KNEE_ANGLE'] = calculate_angle(
        [row[keypoints['LEFT_HIP'][0]], row[keypoints['LEFT_HIP'][1]]],
        [row[keypoints['LEFT_KNEE'][0]], row[keypoints['LEFT_KNEE'][1]]],
        [row[keypoints['LEFT_ANKLE'][0]], row[keypoints['LEFT_ANKLE'][1]]]
    )
    current_dataset.loc[row_idx, 'RIGHT_KNEE_ANGLE'] = calculate_angle(
        [row[keypoints['RIGHT_HIP'][0]], row[keypoints['RIGHT_HIP'][1]]],
        [row[keypoints['RIGHT_KNEE'][0]], row[keypoints['RIGHT_KNEE'][1]]],
        [row[keypoints['RIGHT_ANKLE'][0]], row[keypoints['RIGHT_ANKLE'][1]]]
    )
    
    # Hip angles
    current_dataset.loc[row_idx, 'LEFT_HIP_ANGLE'] = calculate_angle(
        [row[keypoints['LEFT_SHOULDER'][0]], row[keypoints['LEFT_SHOULDER'][1]]],
        [row[keypoints['LEFT_HIP'][0]], row[keypoints['LEFT_HIP'][1]]],
        [row[keypoints['LEFT_KNEE'][0]], row[keypoints['LEFT_KNEE'][1]]]
    )
    current_dataset.loc[row_idx, 'RIGHT_HIP_ANGLE'] = calculate_angle(
        [row[keypoints['RIGHT_SHOULDER'][0]], row[keypoints['RIGHT_SHOULDER'][1]]],
        [row[keypoints['RIGHT_HIP'][0]], row[keypoints['RIGHT_HIP'][1]]],
        [row[keypoints['RIGHT_KNEE'][0]], row[keypoints['RIGHT_KNEE'][1]]]
    )

In [67]:
current_dataset['center_x'] = (current_dataset['LEFT_HIP_x'] + current_dataset['RIGHT_HIP_x']) / 2
current_dataset['center_y'] = (current_dataset['LEFT_HIP_y'] + current_dataset['RIGHT_HIP_y']) / 2

for joint_pair in [
    ('shoulder', ['LEFT_SHOULDER', 'RIGHT_SHOULDER']),
    ('hip', ['LEFT_HIP', 'RIGHT_HIP']),
    ('knee', ['LEFT_KNEE', 'RIGHT_KNEE']),
    ('ankle', ['LEFT_ANKLE', 'RIGHT_ANKLE'])
]:
    name, (left, right) = joint_pair
    current_dataset[f'{name}_distance'] = np.sqrt(
        (current_dataset[keypoints[left][0]] - current_dataset[keypoints[right][0]])**2 +
        (current_dataset[keypoints[left][1]] - current_dataset[keypoints[right][1]])**2
    )

# Calculate velocities and accelerations
print("Calculating velocities and accelerations...")
# Center of mass velocity and acceleration
center_positions = current_dataset[['center_x', 'center_y']].values
center_velocities = calculate_velocity(center_positions, frame_time)
center_accelerations = calculate_acceleration(center_velocities, frame_time)

current_dataset['center_velocity_x'] = center_velocities[:, 0]
current_dataset['center_velocity_y'] = center_velocities[:, 1]
current_dataset['center_acceleration_x'] = center_accelerations[:, 0]
current_dataset['center_acceleration_y'] = center_accelerations[:, 1]

print("Calculating angular velocities...")
for angle in ['LEFT_KNEE_ANGLE', 'RIGHT_KNEE_ANGLE', 'LEFT_HIP_ANGLE', 'RIGHT_HIP_ANGLE']:
    current_dataset[f'{angle}_velocity'] = np.gradient(current_dataset[angle], frame_time)

# Calculate step length (distance between ankles)
current_dataset['step_length'] = np.sqrt(
    (current_dataset['LEFT_ANKLE_x'] - current_dataset['RIGHT_ANKLE_x'])**2 +
    (current_dataset['LEFT_ANKLE_y'] - current_dataset['RIGHT_ANKLE_y'])**2
)

# Calculate body height (shoulder to ankle distance)
current_dataset['body_height_left'] = np.sqrt(
    (current_dataset['LEFT_SHOULDER_x'] - current_dataset['LEFT_ANKLE_x'])**2 +
    (current_dataset['LEFT_SHOULDER_y'] - current_dataset['LEFT_ANKLE_y'])**2
)
current_dataset['body_height_right'] = np.sqrt(
    (current_dataset['RIGHT_SHOULDER_x'] - current_dataset['RIGHT_ANKLE_x'])**2 +
    (current_dataset['RIGHT_SHOULDER_y'] - current_dataset['RIGHT_ANKLE_y'])**2
)

# Calculate movement features
print("Calculating movement features...")
# Vertical displacement (useful for jump detection)
current_dataset['vertical_displacement'] = np.gradient(current_dataset['center_y'], frame_time)
current_dataset['vertical_velocity'] = np.gradient(current_dataset['vertical_displacement'], frame_time)

# Body rotation (angle between shoulders and hips)
current_dataset['body_rotation'] = np.arctan2(
    current_dataset['RIGHT_SHOULDER_y'] - current_dataset['LEFT_SHOULDER_y'],
    current_dataset['RIGHT_SHOULDER_x'] - current_dataset['LEFT_SHOULDER_x']
) - np.arctan2(
    current_dataset['RIGHT_HIP_y'] - current_dataset['LEFT_HIP_y'],
    current_dataset['RIGHT_HIP_x'] - current_dataset['LEFT_HIP_x']
)
current_dataset['body_rotation'] = np.degrees(current_dataset['body_rotation'])

current_dataset['hand_distance_x'] = current_dataset['LEFT_WRIST_x'] - current_dataset['RIGHT_WRIST_x']

# Calculate knee to hip distance
current_dataset['knee_hip_distance_left'] = current_dataset['LEFT_KNEE_y'] - current_dataset['LEFT_HIP_y']

current_dataset['knee_hip_distance_right'] = current_dataset['RIGHT_KNEE_y'] - current_dataset['RIGHT_HIP_y']

current_dataset['hip_ankle_distance_left'] =current_dataset['LEFT_HIP_y'] - current_dataset['LEFT_ANKLE_y']
current_dataset['hip_ankle_distance_right'] = current_dataset['RIGHT_HIP_y'] - current_dataset['RIGHT_ANKLE_y']


# Calculate rolling statistics for temporal context
window_size = 5  # Adjust based on your needs
for col in ['vertical_displacement', 'body_rotation', 'step_length']:
    current_dataset[f'{col}_rolling_mean'] = current_dataset[col].rolling(window=window_size, center=True).mean()
    current_dataset[f'{col}_rolling_std'] = current_dataset[col].rolling(window=window_size, center=True).std()

# Fill NaN values
current_dataset.fillna(0, inplace=True)

# Save enhanced features
print("Saving enhanced features...")
current_dataset.to_csv(output_file, index=False)

Calculating velocities and accelerations...
Calculating angular velocities...
Calculating movement features...
Saving enhanced features...


In [68]:
feature_columns = [
    'LEFT_KNEE_ANGLE', 'RIGHT_KNEE_ANGLE', 'LEFT_HIP_ANGLE', 'RIGHT_HIP_ANGLE',
    'shoulder_distance', 'hip_distance', 'knee_distance', 'ankle_distance',
    'center_velocity_x', 'center_velocity_y', 'center_acceleration_x', 'center_acceleration_y',
    'LEFT_KNEE_ANGLE_velocity', 'RIGHT_KNEE_ANGLE_velocity',
    'LEFT_HIP_ANGLE_velocity', 'RIGHT_HIP_ANGLE_velocity',
    'step_length', 'body_height_left', 'body_height_right',
    'vertical_displacement', 'vertical_velocity', 'body_rotation',
    'vertical_displacement_rolling_mean', 'vertical_displacement_rolling_std',
    'body_rotation_rolling_mean', 'body_rotation_rolling_std',
    'step_length_rolling_mean', 'step_length_rolling_std', 'hand_distance_x',
    'knee_hip_distance_left', 'knee_hip_distance_right',
    'hip_ankle_distance_left', 'hip_ankle_distance_right'

]

X = current_dataset[feature_columns]
y = current_dataset['annotation']

# Map labels to integers
label_map = {
    'Still': 0, 'approach': 1, 'back': 2, 'jump': 3, 'turn_left': 4,
    'walk_left': 5, 'turn_right': 6, 'walk_right': 7, 'sit': 8, 'stand': 9
}
y = y.map(label_map)

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train model with optimized parameters
print("Training model...")
model = xgb.XGBClassifier(
    n_estimators=200,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    use_label_encoder=False,
    eval_metric='mlogloss'
)

model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=list(label_map.keys())))

with open('xgboost_model.pkl', 'wb') as f:
    pickle.dump(model, f)
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

Training model...


Parameters: { "use_label_encoder" } are not used.



Model Accuracy: 91.11%

Classification Report:
              precision    recall  f1-score   support

       Still       0.86      0.92      0.89       558
    approach       0.97      0.94      0.95       213
        back       0.99      0.97      0.98       272
        jump       0.92      0.84      0.88        57
   turn_left       0.91      1.00      0.96        43
   walk_left       1.00      0.92      0.96        65
  turn_right       0.96      0.93      0.95        56
  walk_right       0.96      0.98      0.97        93
         sit       0.91      0.90      0.91       112
       stand       0.47      0.29      0.35        49

    accuracy                           0.91      1518
   macro avg       0.89      0.87      0.88      1518
weighted avg       0.91      0.91      0.91      1518

