In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
def fix_labels(label):
    if label=='Throwing':
        return 'Throwing things'
    return label

# Label Encoder

In [None]:
# import label encoder
from sklearn.preprocessing import LabelEncoder
data=pd.read_csv('./real_data/keypoints_with_labels_1.csv')
data=data.dropna(subset=['Action Label'])
y=data['Action Label'].to_numpy()
label_encoder = LabelEncoder()
data['Action Label'] = label_encoder .fit_transform(y)
# data['Action Label'].value_counts().plot(kind='bar')



# Load dataset

In [None]:
dfs = []
import os
for file in os.listdir('./real_data/'):
    print('file ', file)
    if file.endswith('.csv'):
        df = pd.read_csv(os.path.join('./real_data/', file)).dropna(subset=['Action Label'])
        print(df['Action Label'].unique())
        df['Action Label'] = df['Action Label'].apply(fix_labels)
        df['Action Label'] = label_encoder.transform(df['Action Label'])
        df['Action Label'] =df['Action Label'].astype('category')
        df['ID']= file.split('.')[0].split('_')[-1]
        df['ID']=df['ID'].astype('category')
        dfs.append(df)

In [None]:
final_df=pd.concat(dfs, ignore_index=True)
final_df['Action Label'].value_counts().plot(kind='bar')

# Data Visualization

In [None]:
for col in data.columns:
    if col in ['Action Label','frame_id']:
        continue
    sns.boxplot(x='Action Label', y=col, data=final_df)
    plt.xticks(rotation=90)

    plt.show()

# Split train-test

In [None]:
final_df_train=final_df[final_df['ID']!='1']
final_df_test=final_df[final_df['ID']=='1']

In [None]:
final_df_test

# Extract time series domain features
## In this tutorial tsfel was chosen to demonstrate to you. But you can use any library or you may even self writing code for that :)

In [None]:
personal_dir='./tsfel_feat.json'
feature_path='./custom_features.py'

In [None]:
from tsfel.utils.add_personal_features import add_feature_json
add_feature_json('./custom_features.py',personal_dir)

In [None]:
import tsfel
cfg= tsfel.get_features_by_domain(['statistical','Custom','temporal'],json_path=personal_dir)
# You can chose from ['statistical', 'temporal', 'spectral', 'fractal', 'Custom'] domains


# Extract time series domain features using TSFEL
final_df_train_tsfel = tsfel.time_series_features_extractor(cfg, final_df_train,
                                                            window_size=30, overlap=0.5, fs=30,
                                                            features_path=feature_path)
final_df_test_tsfel = tsfel.time_series_features_extractor(cfg, final_df_test, 
                                                           window_size=30, overlap=0.5, fs=30,
                                                           features_path=feature_path)



In [None]:
# save Action Label as feature
label_train = final_df_train_tsfel['Action Label_Mode']
label_test = final_df_test_tsfel['Action Label_Mode']

# remove Action Label and id columns from train set
final_df_train_tsfel = final_df_train_tsfel.loc[:, ~final_df_train_tsfel.columns.str.contains('Action Label|id|ID')]
final_df_train_tsfel['Action Label'] = label_train  #add Action Label column back

# remove Action Label and id columns from test set
final_df_test_tsfel = final_df_test_tsfel.loc[:, ~final_df_test_tsfel.columns.str.contains('Action Label|id|ID')]
final_df_test_tsfel['Action Label'] = label_test #add Action Label column back

In [None]:
X_train = final_df_train_tsfel.drop(columns=['Action Label']).astype(float)
y_train= final_df_train_tsfel['Action Label'].astype(int)
X_test = final_df_test_tsfel.drop(columns=['Action Label']).astype(float)
y_test= final_df_test_tsfel['Action Label'].astype(int)

In [None]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

# Machine learning models

In [None]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# model = XGBClassifier(eval_metric='mlogloss',base_score=0.5, use_label_encoder=True)
model=HistGradientBoostingClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
ax,fig=plt.subplots(figsize=(10, 7))
sns.heatmap(confusion_matrix(y_test, y_pred),annot=True,fmt='d', cmap='Blues')

In [None]:
labels= label_encoder.classes_
labels

## Why the result so poor???

In [None]:
for col in data.columns:
    if col in ['Action Label','frame_id']:
        continue
    sns.boxplot(x='Action Label', y=col, data=final_df,hue='ID')
    plt.xticks(rotation=90)

    plt.show()

# Relative features extraction

## In this tutorial, I presented the relative position feature, which is the relative position of one body part to another. This tutorial will select left ear as basepoint for the feature.

### $$ relative\_position = position_A - position_B $$

### where $A,\;B $ is the part of body
#### <center>____________________!!!You should note that x, y axis matters!!!____________________</center>

In [None]:

for col in final_df_train.columns:
    if col in ['Action Label','frame_id','ID']:
        continue
    if col.endswith('_x'):
        final_df_train[col]=final_df_train['left_ear_x']-final_df_train[col]
    if col.endswith('_y'):
        final_df_train[col]=final_df_train['left_ear_y']-final_df_train[col]
for col in final_df_test.columns:
    if col in ['Action Label','frame_id','ID']:
        continue
    if col.endswith('_x'):
        final_df_test[col]=final_df_test['left_ear_x']-final_df_test[col]
    if col.endswith('_y'):
        final_df_test[col]=final_df_test['left_ear_y']-final_df_test[col]

# You may define a function to extract more feature such as angle, or velocity, or acceleration:
### $$\theta=arccos(\frac{\overrightarrow{x}.\overrightarrow{y}}{ \|x\| \|y\|}) $$
### $$ Vel=\sqrt{\Delta x^2+\Delta y^2} $$
### $$ Acc=\Delta Vel $$

#### this part is for you :))))))))

In [9]:
import tsfel
cfg= tsfel.get_features_by_domain(['statistical','Custom','temporal'],json_path=personal_dir)
# You can chose from ['statistical', 'temporal', 'spectral', 'fractal', 'Custom'] domains


# Extract time series domain features using TSFEL
final_df_train_tsfel = tsfel.time_series_features_extractor(cfg, final_df_train,
                                                            window_size=30, overlap=0.5, fs=30,
                                                            features_path=feature_path)
final_df_test_tsfel = tsfel.time_series_features_extractor(cfg, final_df_test, 
                                                           window_size=30, overlap=0.5, fs=30,
                                                           features_path=feature_path)



NameError: name 'personal_dir' is not defined

In [None]:
# save Action Label as feature
label_train = final_df_train_tsfel['Action Label_Mode']
label_test = final_df_test_tsfel['Action Label_Mode']

# remove Action Label and id columns from train set
final_df_train_tsfel = final_df_train_tsfel.loc[:, ~final_df_train_tsfel.columns.str.contains('Action Label|id|ID')]
final_df_train_tsfel['Action Label'] = label_train  #add Action Label column back

# remove Action Label and id columns from test set
final_df_test_tsfel = final_df_test_tsfel.loc[:, ~final_df_test_tsfel.columns.str.contains('Action Label|id|ID')]
final_df_test_tsfel['Action Label'] = label_test #add Action Label column back

In [None]:
X_train = final_df_train_tsfel.drop(columns=['Action Label']).astype(float)
y_train= final_df_train_tsfel['Action Label'].astype(int)
X_test = final_df_test_tsfel.drop(columns=['Action Label']).astype(float)
y_test= final_df_test_tsfel['Action Label'].astype(int)

In [None]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# model = XGBClassifier(eval_metric='mlogloss',base_score=0.5, use_label_encoder=True)
model=HistGradientBoostingClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
ax,fig=plt.subplots(figsize=(10, 7))
sns.heatmap(confusion_matrix(y_test, y_pred),annot=True,fmt='d', cmap='Blues')

In [None]:
labels= label_encoder.classes_
labels