In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# GaussianNB Naive Bayes - PyTorch Implementation\n",
    "## Brain Wave Classification for Drone Commands\n",
    "\n",
    "This notebook trains a Gaussian Naive Bayes classifier using PyTorch for EEG brain wave classification."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Import Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pathlib\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n",
    "import torch\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "# Import our custom GaussianNB model\n",
    "from gaussiannb_model import GaussianNB"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Data Loading\n",
    "\n",
    "Load brainwave data from .txt files.\n",
    "**Note:** Update `directory_path` to point to your brain wave data location."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# UPDATE THIS PATH to your brain wave data location\n",
    "directory_path = \"/path/to/your/brainwave_readings/\"\n",
    "core_dir = pathlib.Path(directory_path)\n",
    "skip_dirs = [\"Group1-8channels\"]  # Directories to skip\n",
    "\n",
    "dfs = []\n",
    "\n",
    "print(\"Loading brain wave data...\")\n",
    "for item in core_dir.rglob('*.txt'):\n",
    "    try:\n",
    "        if set(item.parts).isdisjoint(skip_dirs):\n",
    "            df = pd.read_csv(item, sep=',', header=4, on_bad_lines='skip')\n",
    "            df[\"src_filename\"] = str(item)\n",
    "            dfs.append(df)\n",
    "    except:\n",
    "        pass  # Skip problematic files\n",
    "\n",
    "# Filter out empty DataFrames\n",
    "dfs_nonempty = [df for df in dfs if not df.empty]\n",
    "\n",
    "# Concatenate all data\n",
    "eeg_data = pd.concat(dfs_nonempty, ignore_index=True)\n",
    "\n",
    "print(f\"Loaded {len(eeg_data)} samples from {len(dfs_nonempty)} files\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Label Assignment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eeg_data_fnl = eeg_data.copy()\n",
    "eeg_data_fnl[\"label_txt\"] = \"\"\n",
    "\n",
    "# Label names based on filename patterns\n",
    "label_names = [\"backward\", \"forward\", \"landing\", \"left\", \"right\", \"takeoff\", \"fowward\"]\n",
    "\n",
    "for label in label_names:\n",
    "    eeg_data_fnl[\"label_txt\"] = np.where(\n",
    "        (eeg_data_fnl[\"label_txt\"] == \"\") & eeg_data_fnl[\"src_filename\"].str.lower().str.contains(label),\n",
    "        label,\n",
    "        eeg_data_fnl[\"label_txt\"]\n",
    "    )\n",
    "\n",
    "# Correct typo\n",
    "eeg_data_fnl[\"label_txt\"] = np.where(\n",
    "    eeg_data_fnl[\"label_txt\"] == \"fowward\", \"forward\",\n",
    "    eeg_data_fnl[\"label_txt\"]\n",
    ")\n",
    "\n",
    "# Encode labels\n",
    "le = LabelEncoder()\n",
    "eeg_data_fnl[\"label\"] = le.fit_transform(eeg_data_fnl[\"label_txt\"])\n",
    "\n",
    "print(f\"\\nClass distribution:\")\n",
    "print(eeg_data_fnl[\"label_txt\"].value_counts())\n",
    "print(f\"\\nLabel mapping: {dict(enumerate(le.classes_))}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Data Preparation for PyTorch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Select feature columns (adjust based on your data)\n",
    "# Assuming 32 EEG channels\n",
    "feature_cols = [col for col in eeg_data_fnl.columns if col not in ['src_filename', 'label_txt', 'label']]\n",
    "print(f\"Number of features: {len(feature_cols)}\")\n",
    "\n",
    "X = eeg_data_fnl[feature_cols].values\n",
    "y = eeg_data_fnl['label'].values\n",
    "\n",
    "# Train/test split\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
    "\n",
    "print(f\"\\nTraining samples: {len(X_train)}\")\n",
    "print(f\"Testing samples: {len(X_test)}\")\n",
    "\n",
    "# Convert to PyTorch tensors\n",
    "X_train_tensor = torch.FloatTensor(X_train)\n",
    "y_train_tensor = torch.LongTensor(y_train)\n",
    "X_test_tensor = torch.FloatTensor(X_test)\n",
    "y_test_tensor = torch.LongTensor(y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Train GaussianNB Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize model\n",
    "num_features = X_train.shape[1]\n",
    "num_classes = len(np.unique(y))\n",
    "\n",
    "print(f\"Creating GaussianNB model with {num_features} features and {num_classes} classes\")\n",
    "model = GaussianNB(num_features=num_features, num_classes=num_classes)\n",
    "\n",
    "# Fit the model\n",
    "print(\"\\nFitting model...\")\n",
    "model.fit(X_train_tensor, y_train_tensor)\n",
    "print(\"Model fitted successfully!\")\n",
    "\n",
    "# Display learned parameters\n",
    "print(f\"\\nClass priors: {model.class_priors}\")\n",
    "print(f\"Means shape: {model.means.shape}\")\n",
    "print(f\"Variances shape: {model.variances.shape}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Evaluate Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Training accuracy\n",
    "with torch.no_grad():\n",
    "    y_train_pred = model(X_train_tensor).numpy()\n",
    "    train_accuracy = accuracy_score(y_train, y_train_pred)\n",
    "    print(f\"Training Accuracy: {train_accuracy:.4f} ({train_accuracy*100:.2f}%)\")\n",
    "\n",
    "# Testing accuracy\n",
    "with torch.no_grad():\n",
    "    y_test_pred = model(X_test_tensor).numpy()\n",
    "    test_accuracy = accuracy_score(y_test, y_test_pred)\n",
    "    print(f\"Testing Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Classification Report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"\\nClassification Report:\")\n",
    "print(classification_report(y_test, y_test_pred, target_names=le.classes_))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Confusion Matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cm = confusion_matrix(y_test, y_test_pred)\n",
    "\n",
    "plt.figure(figsize=(10, 8))\n",
    "sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', \n",
    "            xticklabels=le.classes_, yticklabels=le.classes_)\n",
    "plt.title('Confusion Matrix - GaussianNB')\n",
    "plt.ylabel('True Label')\n",
    "plt.xlabel('Predicted Label')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Save Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save the trained model\n",
    "model_path = 'gaussiannb_trained.pth'\n",
    "torch.save({\n",
    "    'model_state_dict': model.state_dict(),\n",
    "    'num_features': num_features,\n",
    "    'num_classes': num_classes,\n",
    "    'label_encoder_classes': le.classes_,\n",
    "    'train_accuracy': train_accuracy,\n",
    "    'test_accuracy': test_accuracy\n",
    "}, model_path)\n",
    "\n",
    "print(f\"\\nModel saved to {model_path}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}