In [78]:
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
import os


In [79]:


# List all CSV files in the current directory
csv_files = [os.path.join('./GestureDatasetBigger', f) for f in os.listdir('./GestureDatasetBigger') if f.endswith('_gestures.csv')]


# Load and combine
all_data = pd.concat(
    [pd.read_csv(file) for file in csv_files],
    ignore_index=True
)

# Check the structure
print(all_data.head())

                              gestureId username   shape  pointIndex   x  \
0  1c32e835-15ca-4fd2-ada0-654541470796    James  square           0  66   
1  1c32e835-15ca-4fd2-ada0-654541470796    James  square           1  67   
2  1c32e835-15ca-4fd2-ada0-654541470796    James  square           2  68   
3  1c32e835-15ca-4fd2-ada0-654541470796    James  square           3  71   
4  1c32e835-15ca-4fd2-ada0-654541470796    James  square           4  75   

           y          time  velocityX  velocityY    speed  gestureStartTime  \
0  76.671875  1.748716e+09      0.000      0.000    0.000      1.748716e+09   
1  77.671875  1.748716e+09     55.556     55.556   78.568      1.748716e+09   
2  77.671875  1.748716e+09    124.998      0.000  124.998      1.748716e+09   
3  77.671875  1.748716e+09    333.340      0.000  333.340      1.748716e+09   
4  78.671875  1.748716e+09    799.982    199.995  824.602      1.748716e+09   

   gestureEndTime  gestureDuration  
0    1.748716e+09            1.

In [80]:
def preprocess_gestures(df, target_num_points=16, target_width=50, target_height=40):
    processed_gestures = []
    gesture_labels = []
    gesture_users = []

    for gesture_id, group in df.groupby('gestureId'):
        x = group['x'].values
        y = group['y'].values
            # Skip if too few points

        # Center at geometric center
        center_x = np.mean(x)
        center_y = np.mean(y)
        x_centered = x - center_x
        y_centered = y - center_y

        # Scale proportionally
        max_x = np.max(np.abs(x_centered))
        max_y = np.max(np.abs(y_centered))
        scale_x = target_width / (2 * max_x) if max_x > 0 else 1.0
        scale_y = target_height / (2 * max_y) if max_y > 0 else 1.0
        scale_factor = min(scale_x, scale_y)
        x_scaled = x_centered * scale_factor
        y_scaled = y_centered * scale_factor

        # Resample to target_num_points
        distances = np.sqrt(np.diff(x_scaled)**2 + np.diff(y_scaled)**2)
        cumulative_distance = np.insert(np.cumsum(distances), 0, 0)
        total_distance = cumulative_distance[-1]
        if total_distance == 0:
            continue

        target_distances = np.linspace(0, total_distance, target_num_points)
        interp_func_x = interp1d(cumulative_distance, x_scaled, kind='linear')
        interp_func_y = interp1d(cumulative_distance, y_scaled, kind='linear')
        x_resampled = interp_func_x(target_distances)
        y_resampled = interp_func_y(target_distances)

        gesture_points = np.column_stack((x_resampled, y_resampled))
        processed_gestures.append(gesture_points)
        gesture_labels.append(group['shape'].iloc[0])
        gesture_users.append(group['username'].iloc[0])

    return np.array(processed_gestures), gesture_labels, gesture_users


In [81]:
processed_gestures, gesture_labels, gesture_users = preprocess_gestures(all_data)

print(f"Processed Gestures Shape: {processed_gestures.shape}")
print(f"First 5 Labels: {gesture_labels[:5]}")
print(f"First 5 Users: {gesture_users[:5]}")


Processed Gestures Shape: (750, 16, 2)
First 5 Labels: ['triangle', 'circle', 'square', 'square', 'square']
First 5 Users: ['Ian', 'Gary', 'Ian', 'Hanlin', 'Rohan']


In [82]:
X = processed_gestures.reshape((processed_gestures.shape[0], -1))
print(X.shape)



(750, 32)


In [83]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(gesture_labels)
print(y[:5])


[2 0 1 1 1]


In [84]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [85]:
from sklearn.neighbors import KNeighborsClassifier

k = 3  # or tune as needed
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)


In [86]:
accuracy = knn.score(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")


Test Accuracy: 0.98


In [87]:
from sklearn.metrics import confusion_matrix, classification_report

y_pred = knn.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


[[50  0  0]
 [ 0 50  0]
 [ 3  0 47]]
              precision    recall  f1-score   support

      circle       0.94      1.00      0.97        50
      square       1.00      1.00      1.00        50
    triangle       1.00      0.94      0.97        50

    accuracy                           0.98       150
   macro avg       0.98      0.98      0.98       150
weighted avg       0.98      0.98      0.98       150



In [88]:
    # Example: classify the first test sample
predicted_label = knn.predict([X_test[0]])
label_name = label_encoder.inverse_transform(predicted_label)
print(f"Predicted Label: {label_name[0]}")


Predicted Label: triangle


In [89]:
import joblib

# Save the kNN model
joblib.dump(knn, 'knn_model.pkl')

# Save the label encoder
joblib.dump(label_encoder, 'label_encoder.pkl')


['label_encoder.pkl']

In [90]:
import pandas as pd

test_square = pd.read_csv('./TestGestures/testsquare1_gestures.csv')
test_circle = pd.read_csv('./TestGestures/testcircle_gestures.csv')
test_triangle = pd.read_csv('./TestGestures/againtriangle_gestures.csv')

test_data = pd.concat([test_square, test_circle, test_triangle], ignore_index=True)


In [91]:
import numpy as np
from scipy.interpolate import interp1d

def preprocess_gestures(df, target_num_points=16, target_width=50, target_height=40):
    processed_gestures = []
    gesture_labels = []
    gesture_users = []

    for gesture_id, group in df.groupby('gestureId'):
        x = group['x'].values
        y = group['y'].values

        # Center at geometric center
        center_x = np.mean(x)
        center_y = np.mean(y)
        x_centered = x - center_x
        y_centered = y - center_y

        # Scale proportionally
        max_x = np.max(np.abs(x_centered))
        max_y = np.max(np.abs(y_centered))
        scale_x = target_width / (2 * max_x) if max_x > 0 else 1.0
        scale_y = target_height / (2 * max_y) if max_y > 0 else 1.0
        scale_factor = min(scale_x, scale_y)
        x_scaled = x_centered * scale_factor
        y_scaled = y_centered * scale_factor

        # Resample to target_num_points
        distances = np.sqrt(np.diff(x_scaled)**2 + np.diff(y_scaled)**2)
        cumulative_distance = np.insert(np.cumsum(distances), 0, 0)
        total_distance = cumulative_distance[-1]
        if total_distance == 0:
            continue

        target_distances = np.linspace(0, total_distance, target_num_points)
        interp_func_x = interp1d(cumulative_distance, x_scaled, kind='linear')
        interp_func_y = interp1d(cumulative_distance, y_scaled, kind='linear')
        x_resampled = interp_func_x(target_distances)
        y_resampled = interp_func_y(target_distances)

        gesture_points = np.column_stack((x_resampled, y_resampled))
        processed_gestures.append(gesture_points)
        gesture_labels.append(group['shape'].iloc[0])
        gesture_users.append(group['username'].iloc[0])

    return np.array(processed_gestures), gesture_labels, gesture_users


In [92]:
processed_test_gestures, test_labels, test_users = preprocess_gestures(test_data)
X_test = processed_test_gestures.reshape((processed_test_gestures.shape[0], -1))


In [93]:
from sklearn.metrics import classification_report
y_test_encoded = label_encoder.transform(test_labels)
accuracy = knn.score(X_test, y_test_encoded)
print(f"Test Accuracy: {accuracy:.2f}")

y_test_pred = knn.predict(X_test)
print(classification_report(y_test_encoded, y_test_pred, target_names=label_encoder.classes_))


Test Accuracy: 1.00
              precision    recall  f1-score   support

      circle       1.00      1.00      1.00         1
      square       1.00      1.00      1.00         1
    triangle       1.00      1.00      1.00         1

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3



In [94]:
# See predictions for each test dataset separately
def show_predictions_for_dataset(df_name, df):
    # Process the data
    processed_gestures, true_labels, _ = preprocess_gestures(df)
    X = processed_gestures.reshape((processed_gestures.shape[0], -1))
    
    # Get predictions
    y_pred = knn.predict(X)
    predicted_labels = label_encoder.inverse_transform(y_pred)
    
    # Display results
    print(f"\n--- Predictions for {df_name} ---")
    print(f"Total samples: {len(true_labels)}")
    
    # Create comparison table
    results = pd.DataFrame({
        'True Label': true_labels,
        'Predicted Label': predicted_labels,
        'Correct': [t == p for t, p in zip(true_labels, predicted_labels)]
    })
    
    print(f"Accuracy: {results['Correct'].mean():.2f}")
    print("\nSample predictions:")
    display(results.head(10))
    
    # Confusion matrix for this dataset
    from sklearn.metrics import confusion_matrix
    y_true_encoded = label_encoder.transform(true_labels)
    cm = confusion_matrix(y_true_encoded, y_pred)
    print("\nConfusion Matrix:")
    print(cm)

# Test each dataset separately
show_predictions_for_dataset("test_square", test_square)
show_predictions_for_dataset("test_circle", test_circle)
show_predictions_for_dataset("test_triangle", test_triangle)


--- Predictions for test_square ---
Total samples: 1
Accuracy: 1.00

Sample predictions:


Unnamed: 0,True Label,Predicted Label,Correct
0,square,square,True



Confusion Matrix:
[[1]]

--- Predictions for test_circle ---
Total samples: 1
Accuracy: 1.00

Sample predictions:




Unnamed: 0,True Label,Predicted Label,Correct
0,circle,circle,True



Confusion Matrix:
[[1]]

--- Predictions for test_triangle ---
Total samples: 1
Accuracy: 1.00

Sample predictions:




Unnamed: 0,True Label,Predicted Label,Correct
0,triangle,triangle,True



Confusion Matrix:
[[1]]


