In [None]:
```json
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report\n",
    "import itertools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Simulate Hand Landmark Data Generation\n",
    "NUM_LANDMARKS = 21\n",
    "NUM_SAMPLES_PER_GESTURE = 200\n",
    "GESTURES = ['Open_Hand', 'Closed_Fist', 'Pinch']\n",
    "NOISE_LEVEL = 0.03\n",
    "\n",
    "def generate_base_pose(gesture):\n",
    "    # Simplified canonical poses (relative to wrist at origin)\n",
    "    pose = np.zeros((NUM_LANDMARKS, 3))\n",
    "    # Wrist\n",
    "    pose[0] = [0, 0, 0]\n",
    "    \n",
    "    if gesture == 'Open_Hand':\n",
    "        # Thumb\n",
    "        pose[1] = [0.05, -0.02, 0.01]\n",
    "        pose[2] = [0.1, -0.04, 0.02]\n",
    "        pose[3] = [0.15, -0.06, 0.03]\n",
    "        pose[4] = [0.2, -0.08, 0.04]\n",
    "        # Index\n",
    "        pose[5] = [0.01, 0.05, 0]\n",
    "        pose[6] = [0.02, 0.1, 0]\n",
    "        pose[7] = [0.03, 0.15, 0]\n",
    "        pose[8] = [0.04, 0.2, 0]\n",
    "        # Middle\n",
    "        pose[9] = [0, 0.06, 0]\n",
    "        pose[10] = [0, 0.12, 0]\n",
    "        pose[11] = [0, 0.18, 0]\n",
    "        pose[12] = [0, 0.24, 0]\n",
    "        # Ring\n",
    "        pose[13] = [-0.01, 0.05, 0]\n",
    "        pose[14] = [-0.02, 0.1, 0]\n",
    "        pose[15] = [-0.03, 0.15, 0]\n",
    "        pose[16] = [-0.04, 0.2, 0]\n",
    "        # Pinky\n",
    "        pose[17] = [-0.02, 0.04, 0]\n",
    "        pose[18] = [-0.04, 0.08, 0]\n",
    "        pose[19] = [-0.06, 0.12, 0]\n",
    "        pose[20] = [-0.08, 0.16, 0]\n",
    "        \n",
    "    elif gesture == 'Closed_Fist':\n",
    "        # Thumb (folded)\n",
    "        pose[1] = [0.03, 0.01, 0.02]\n",
    "        pose[2] = [0.05, 0.03, 0.04]\n",
    "        pose[3] = [0.06, 0.05, 0.06]\n",
    "        pose[4] = [0.07, 0.07, 0.08]\n",
    "        # Fingers curled\n",
    "        pose[5] = [0.01, 0.05, 0.01]\n",
    "        pose[6] = [0.02, 0.08, 0.03]\n",
    "        pose[7] = [0.03, 0.08, 0.06]\n",
    "        pose[8] = [0.04, 0.08, 0.09]\n",
    "        pose[9] = [0, 0.06, 0.01]\n",
    "        pose[10] = [0, 0.09, 0.03]\n",
    "        pose[11] = [0, 0.09, 0.06]\n",
    "        pose[12] = [0, 0.09, 0.09]\n",
    "        pose[13] = [-0.01, 0.05, 0.01]\n",
    "        pose[14] = [-0.02, 0.08, 0.03]\n",
    "        pose[15] = [-0.03, 0.08, 0.06]\n",
    "        pose[16] = [-0.04, 0.08, 0.09]\n",
    "        pose[17] = [-0.02, 0.04, 0.01]\n",
    "        pose[18] = [-0.04, 0.07, 0.03]\n",
    "        pose[19] = [-0.05, 0.07, 0.06]\n",
    "        pose[20] = [-0.06, 0.07, 0.09]\n",
    "        \n",
    "    elif gesture == 'Pinch': # Thumb and Index finger tips close\n",
    "        # Thumb\n",
    "        pose[1] = [0.04, -0.01, 0.01]\n",
    "        pose[2] = [0.08, -0.02, 0.02]\n",
    "        pose[3] = [0.12, -0.03, 0.03]\n",
    "        pose[4] = [0.15, 0.02, 0.04] # Tip closer to index\n",
    "        # Index\n",
    "        pose[5] = [0.01, 0.05, 0]\n",
    "        pose[6] = [0.02, 0.1, 0]\n",
    "        pose[7] = [0.03, 0.13, 0.01]\n",
    "        pose[8] = [0.13, 0.03, 0.03] # Tip closer to thumb\n",
    "        # Other fingers slightly open/relaxed\n",
    "        pose[9] = [0, 0.06, 0]\n",
    "        pose[10] = [0, 0.12, 0]\n",
    "        pose[11] = [0, 0.18, 0]\n",
    "        pose[12] = [0, 0.22, 0]\n",
    "        pose[13] = [-0.01, 0.05, 0]\n",
    "        pose[14] = [-0.02, 0.1, 0]\n",
    "        pose[15] = [-0.03, 0.15, 0]\n",
    "        pose[16] = [-0.04, 0.19, 0]\n",
    "        pose[17] = [-0.02, 0.04, 0]\n",
    "        pose[18] = [-0.04, 0.08, 0]\n",
    "        pose[19] = [-0.06, 0.12, 0]\n",
    "        pose[20] = [-0.08, 0.15, 0]\n",
    "        \n",
    "    return pose\n",
    "\n",
    "data = []\n",
    "labels = []\n",
    "\n",
    "for gesture in GESTURES:\n",
    "    base_pose = generate_base_pose(gesture)\n",
    "    for _ in range(NUM_SAMPLES_PER_GESTURE):\n",
    "        noisy_pose = base_pose + np.random.normal(0, NOISE_LEVEL, base_pose.shape)\n",
    "        # Simulate overall hand position/orientation variance\n",
    "        offset = np.random.rand(3) * 0.1 # Small random offset\n",
    "        noisy_pose += offset\n",
    "        data.append(noisy_pose.flatten()) # Flatten landmarks into a single feature vector\n",
    "        labels.append(gesture)\n",
    "\n",
    "# Create DataFrame\n",
    "feature_names = [f'landmark_{i}_{coord}' for i in range(NUM_LANDMARKS) for coord in ['x', 'y', 'z']]\n",
    "df = pd.DataFrame(data, columns=feature_names)\n",
    "df['gesture'] = labels\n",
    "\n",
    "# Shuffle DataFrame\n",
    "df = df.sample(frac=1).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data Inspection\n",
    "print(\"First 5 rows:\")\n",
    "print(df.head())\n",
    "print(\"\\nDataFrame Info:\")\n",
    "df.info()\n",
    "print(\"\\nDescriptive Statistics:\")\n",
    "print(df.describe())\n",
    "print(\"\\nGesture Distribution:\")\n",
    "print(df['gesture'].value_counts())\n",
    "print(\"\\nCheck for Missing Values:\")\n",
    "print(df.isnull().sum().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# EDA - Distributions\n",
    "plt.figure(figsize=(12, 6))\n",
    "sns.histplot(data=df, x='landmark_8_y', hue='gesture', kde=True) # Index finger tip Y coordinate\n",
    "plt.title('Distribution of Index Finger Tip Y-coordinate by Gesture')\n",
    "plt.show()\n",
    "\n",
    "plt.figure(figsize=(12, 6))\n",
    "sns.histplot(data=df, x='landmark_4_x', hue='gesture', kde=True) # Thumb tip X coordinate\n",
    "plt.title('Distribution of Thumb Tip X-coordinate by Gesture')\n",
    "plt.show()\n",
    "\n",
    "# Select a few landmark coordinates for pairplot\n",
    "sample_features = ['landmark_4_x', 'landmark_4_y', 'landmark_8_x', 'landmark_8_y', 'landmark_12_y', 'gesture']\n",
    "sns.pairplot(df[sample_features], hue='gesture', diag_kind='kde')\n",
    "plt.suptitle('Pairplot of Selected Landmark Coordinates', y=1.02)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# EDA - Visualization of Hand Skeletons (2D Projection)\n",
    "\n",
    "# Define connections between landmarks for plotting\n",
    "HAND_CONNECTIONS = [\n",
    "    (0, 1), (1, 2), (2, 3), (3, 4),         # Thumb\n",
    "    (0, 5), (5, 6), (6, 7), (7, 8),         # Index finger\n",
    "    (0, 9), (9, 10), (10, 11), (11, 12),    # Middle finger\n",
    "    (0, 13), (13, 14), (14, 15), (15, 16), # Ring finger\n",
    "    (0, 17), (17, 18), (18, 19), (19, 20), # Pinky\n",
    "    (5, 9), (9, 13), (13, 17)              # Palm\n",
    "]\n",
    "\n",
    "def plot_hand_skeleton(landmarks_row, ax, title):\n",
    "    landmarks = landmarks_row[feature_names].values.reshape(NUM_LANDMARKS, 3)\n",
    "    # Use x and y for 2D plot, invert y for typical image coordinates\n",
    "    x = landmarks[:, 0]\n",
    "    y = -landmarks[:, 1] \n",
    "    \n",
    "    ax.scatter(x, y, s=10, c='red')\n",
    "    for connection in HAND_CONNECTIONS:\n",
    "        start_idx, end_idx = connection\n",
    "        if start_idx < NUM_LANDMARKS and end_idx < NUM_LANDMARKS:\n",
    "             ax.plot([x[start_idx], x[end_idx]], [y[start_idx], y[end_idx]], 'blue')\n",
    "                \n",
    "    ax.set_title(title)\n",
    "    ax.set_xlabel('X')\n",
    "    ax.set_ylabel('Y')\n",
    "    ax.set_aspect('equal', adjustable='box')\n",
    "    ax.invert_yaxis() # Match typical image coordinate system if needed\n",
    "\n",
    "fig, axes = plt.subplots(1, len(GESTURES), figsize=(15, 5))\n",
    "for i, gesture in enumerate(GESTURES):\n",
    "    sample = df[df['gesture'] == gesture].iloc[0] # Take the first sample for each gesture\n",
    "    plot_hand_skeleton(sample, axes[i], f'Example: {gesture}')\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Statistical Analysis\n",
    "# Calculate mean landmark positions for each gesture\n",
    "mean_landmarks = df.groupby('gesture').mean()\n",
    "print(\"Mean Landmark Coordinates per Gesture:\")\n",
    "print(mean_landmarks)\n",
    "\n",
    "# Calculate standard deviation for a specific landmark (e.g., index tip y)\n",
    "std_dev_index_y = df.groupby('gesture')['landmark_8_y'].std()\n",
    "print(\"\\nStandard Deviation of Index Tip Y-coordinate per Gesture:\")\n",
    "print(std_dev_index_y)\n",
    "\n",
    "# Correlation matrix (only for a subset of features due to high dimensionality)\n",
    "subset_features = ['landmark_4_x', 'landmark_4_y', 'landmark_8_x', 'landmark_8_y', 'landmark_12_x', 'landmark_12_y']\n",
    "correlation_matrix = df[subset_features].corr()\n",
    "plt.figure(figsize=(10, 8))\n",
    "sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')\n",
    "plt.title('Correlation Matrix of Selected Landmark Coordinates')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Feature Engineering Experiments\n",
    "\n",
    "def calculate_distances(landmarks):\n",
    "    # Calculate distance between thumb tip (4) and index tip (8)\n",
    "    dist_thumb_index = np.linalg.norm(landmarks[4] - landmarks[8])\n",
    "    # Calculate distance between index tip (8) and middle tip (12)\n",
    "    dist_index_middle = np.linalg.norm(landmarks[8] - landmarks[12])\n",
    "    # Calculate average distance of fingertips (4, 8, 12, 16, 20) to wrist (0)\n",
    "    fingertip_indices = [4, 8, 12, 16, 20]\n",
    "    avg_dist_fingertips_wrist = np.mean([np.linalg.norm(landmarks[i] - landmarks[0]) for i in fingertip_indices])\n",
    "    return pd.Series([dist_thumb_index, dist_index_middle, avg_dist_fingertips_wrist])\n",
    "\n",
    "def normalize_landmarks(landmarks):\n",
    "    # Normalize relative to wrist (landmark 0)\n",
    "    wrist_pos = landmarks[0]\n",
    "    relative_landmarks = landmarks - wrist_pos\n",
    "    \n",
    "    # Optional: Scale based on a reference distance (e.g., wrist to middle finger base)\n",
    "    ref_distance = np.linalg.norm(relative_landmarks[9] - relative_landmarks[0])\n",
    "    if ref_distance > 1e-6: # Avoid division by zero\n",
    "         normalized_landmarks = relative_landmarks / ref_distance\n",
    "    else:\n",
    "         normalized_landmarks = relative_landmarks\n",
    "            \n",
    "    return normalized_landmarks.flatten()\n",
    "\n",
    "# Apply distance calculation\n",
    "landmark_data = df[feature_names].values.reshape(-1, NUM_LANDMARKS, 3)\n",
    "distances_df = pd.DataFrame([calculate_distances(lm) for lm in landmark_data])\n",
    "distances_df.columns = ['dist_thumb_index', 'dist_index_middle', 'avg_dist_fingertips_wrist']\n",
    "\n",
    "# Apply normalization\n",
    "normalized_data = np.array([normalize_landmarks(lm) for lm in landmark_data])\n",
    "normalized_feature_names = [f'norm_lm_{i}_{coord}' for i in range(NUM_LANDMARKS) for coord in ['x', 'y', 'z']]\n",
    "normalized_df = pd.DataFrame(normalized_data, columns=normalized_feature_names)\n",
    "\n",
    "# Combine engineered features with original data (or create a new feature DataFrame)\n",
    "df_features = pd.concat([df['gesture'], distances_df, normalized_df], axis=1)\n",
    "\n",
    "print(\"DataFrame with Engineered Features (Head):\")\n",
    "print(df_features.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Re-inspect Data with Engineered Features\n",
    "print(\"\\nEngineered Features Info:\")\n",
    "df_features.info()\n",
    "\n",
    "print(\"\\nEngineered Features Description:\")\n",
    "print(df_features[['dist_thumb_index', 'dist_index_middle', 'avg_dist_fingertips_wrist']].describe())\n",
    "\n",
    "plt.figure(figsize=(15, 5))\n",
    "plt.subplot(1, 3, 1)\n",
    "sns.boxplot(data=df_features, x='gesture', y='dist_thumb_index')\n",
    "plt.title('Thumb-Index Tip Distance')\n",
    "\n",
    "plt.subplot(1, 3, 2)\n",
    "sns.boxplot(data=df_features, x='gesture', y='dist_index_middle')\n",
    "plt.title('Index-Middle Tip Distance')\n",
    "\n",
    "plt.subplot(1, 3, 3)\n",
    "sns.boxplot(data=df_features, x='gesture', y='avg_dist_fingertips_wrist')\n",
    "plt.title('Avg Fingertip-Wrist Distance')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initial Model Testing - Preparation\n",
    "\n",
    "# Option 1: Use only engineered distance features\n",
    "# X = df_features[['dist_thumb_index', 'dist_index_middle', 'avg_dist_fingertips_wrist']]\n",
    "\n",
    "# Option 2: Use normalized landmark features (potentially better but higher dim)\n",
    "X = df_features.drop('gesture', axis=1) # Use all engineered features (distances + normalized coords)\n",
    "# X = normalized_df # Use only normalized coordinates\n",
    "\n",
    "y = df_features['gesture']\n",
    "\n",
    "# Encode labels\n",
    "le = LabelEncoder()\n",
    "y_encoded = le.fit_transform(y)\n",
    "print(f\"Label Encoding: {list(le.classes_)} -> {list(range(len(le.classes_)))}\")\n",
    "\n",
    "# Split data\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded)\n",
    "\n",
    "# Scale features\n",
    "scaler = StandardScaler()\n",
    "X_train_scaled = scaler.fit_transform(X_train)\n",
    "X_test_scaled = scaler.transform(X_test)\n",
    "\n",
    "print(f\"\\nTraining data shape: {X_train_scaled.shape}\")\n",
    "print(f\"Testing data shape: {X_test_scaled.shape}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initial Model Testing - Training (Logistic Regression)\n",
    "log_reg = LogisticRegression(random_state=42, max_iter=1000)\n",
    "log_reg.fit(X_train_scaled, y_train)\n",
    "print(\"Logistic Regression Model Trained.\")\n",
    "\n",
    "# Initial Model Testing - Training (Support Vector Machine)\n",
    "svm_clf = SVC(random_state=42, probability=True) # probability=True for consistency if needed later\n",
    "svm_clf.fit(X_train_scaled, y_train)\n",
    "print(\"SVM Classifier Model Trained.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initial Model Testing - Evaluation\n",
    "\n",
    "def evaluate_model(model, X_test_scaled, y_test, model_name):\n",
    "    y_pred = model.predict(X_test_scaled)\n",
    "    accuracy = accuracy_score(y_test, y_pred)\n",
    "    cm = confusion_matrix(y_test, y_pred)\n",
    "    cr = classification_report(y_test, y_pred, target_names=le.classes_)\n",
    "    \n",
    "    print(f\"--- Evaluation Results for {model_name} ---\")\n",
    "    print(f\"Accuracy: {accuracy:.4f}\")\n",
    "    print(\"\\nClassification Report:\")\n",
    "    print(cr)\n",
    "    print(\"\\nConfusion Matrix:\")\n",
    "    \n",
    "    plt.figure(figsize=(6, 5))\n",
    "    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=le.classes_, yticklabels=le.classes_)\n",
    "    plt.xlabel('Predicted Label')\n",
    "    plt.ylabel('True Label')\n",
    "    plt.title(f'{model_name} - Confusion Matrix')\n",
    "    plt.show()\n",
    "\n",
    "evaluate_model(log_reg, X_test_scaled, y_test, \"Logistic Regression\")\n",
    "evaluate_model(svm_clf, X_test_scaled, y_test, \"SVM Classifier\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "