In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Sign Language Digits Recognition Analysis\n",
    "\n",
    "This notebook provides an interactive analysis of the sign language digits recognition project."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "sys.path.append('../src')\n",
    "sys.path.append('..')\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "from config import *\n",
    "from data_preprocessing import DataPreprocessor\n",
    "from model_training import ModelTrainer\n",
    "from evaluation import ModelEvaluator\n",
    "from hyperparameter_tuning import HyperparameterTuner\n",
    "from visualization import Visualizer\n",
    "\n",
    "plt.style.use('seaborn-v0_8')\n",
    "plt.rcParams['figure.figsize'] = (12, 8)\n",
    "\n",
    "print(\"All imports successful!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Data Exploration and Preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize preprocessor and visualizer\n",
    "preprocessor = DataPreprocessor()\n",
    "visualizer = Visualizer()\n",
    "\n",
    "# Load dataset\n",
    "print(\"Loading dataset...\")\n",
    "if preprocessor.load_kaggle_dataset():\n",
    "    print(f\"Dataset loaded successfully!\")\n",
    "    print(f\"Total samples: {len(preprocessor.image_data)}\")\n",
    "    print(f\"Image shape: {preprocessor.image_data.shape}\")\n",
    "    print(f\"Labels shape: {preprocessor.labels.shape}\")\nelse:\n",
    "    print(\"Failed to load dataset. Please check the data directory.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize sample images\n",
    "visualizer.plot_sample_images(\n",
    "    preprocessor.image_data,\n",
    "    preprocessor.labels\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Show class distribution\n",
    "distribution = visualizer.plot_data_distribution(preprocessor.labels)\n",
    "print(\"\\nClass distribution:\")\n",
    "for digit, count in distribution.items():\n",
    "    print(f\"  Digit {digit}: {count} samples\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Normalize and split data\n",
    "preprocessor.normalize_data('standard')\n",
    "preprocessor.split_data()\n",
    "preprocessor.save_processed_data()\n",
    "\n",
    "print(\"Data preprocessing completed!\")\n",
    "print(f\"Training samples: {preprocessor.X_train.shape[0]}\")\n",
    "print(f\"Validation samples: {preprocessor.X_val.shape[0]}\")\n",
    "print(f\"Test samples: {preprocessor.X_test.shape[0]}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Model Training and Evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize trainer and evaluator\n",
    "trainer = ModelTrainer()\n",
    "evaluator = ModelEvaluator()\n",
    "\n",
    "# Load preprocessed data\n",
    "trainer.load_data()\n",
    "\n",
    "# Train a basic model\n",
    "print(\"Training basic model...\")\n",
    "history = trainer.train_model(model_type='basic', epochs=20, use_augmentation=True)\n",
    "print(\"Training completed!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot training history\n",
    "trainer.plot_training_history()\n",
    "trainer.save_training_history()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluate model\n",
    "eval_results = trainer.evaluate_model()\n",
    "metrics = evaluator.calculate_metrics(\n",
    "    eval_results['y_true_classes'],\n",
    "    eval_results['y_pred_classes'],\n",
    "    eval_results['y_pred']\n",
    ")\n",
    "\n",
    "print(f\"Test Accuracy: {metrics['accuracy']:.4f}\")\n",
    "print(f\"Test Precision: {metrics['precision_macro']:.4f}\")\n",
    "print(f\"Test Recall: {metrics['recall_macro']:.4f}\")\n",
    "print(f\"Test F1-Score: {metrics['f1_macro']:.4f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate evaluation plots\n",
    "evaluator.plot_confusion_matrix(\n",
    "    eval_results['y_true_classes'],\n",
    "    eval_results['y_pred_classes']\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ROC curves\n",
    "evaluator.plot_roc_curves(\n",
    "    eval_results['y_true_classes'],\n",
    "    eval_results['y_pred']\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Precision-Recall curves\n",
    "evaluator.plot_precision_recall_curves(\n",
    "    eval_results['y_true_classes'],\n",
    "    eval_results['y_pred']\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Hyperparameter Tuning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize hyperparameter tuner\n",
    "tuner = HyperparameterTuner()\n",
    "\n",
    "# Analyze batch size effect\n",
    "print(\"Analyzing batch size effect...\")\n",
    "batch_results = tuner.analyze_batch_size_effect()\n",
    "\n",
    "# Convert to DataFrame for visualization\n",
    "batch_df = pd.DataFrame(batch_results)\n",
    "print(batch_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize batch size analysis\n",
    "visualizer.plot_hyperparameter_analysis(batch_results, 'batch_size')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Analyze learning rate effect\n",
    "print(\"Analyzing learning rate effect...\")\n",
    "lr_results = tuner.analyze_learning_rate_effect()\n",
    "visualizer.plot_hyperparameter_analysis(lr_results, 'learning_rate')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Analyze regularization effect\n",
    "print(\"Analyzing regularization effect...\")\n",
    "reg_results = tuner.analyze_regularization_effect()\n",
    "visualizer.plot_regularization_comparison(reg_results)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Model Architecture Comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Compare different model architectures\n",
    "print(\"Comparing model architectures...\")\n",
    "model_types = ['basic', 'advanced']\n",
    "arch_results = tuner.compare_models(model_types, epochs=10)  # Reduced epochs for notebook\n",
    "\n",
    "# Display results\n",
    "for model_type, result in arch_results.items():\n",
    "    print(f\"\\n{model_type.upper()} Model:\")\n",
    "    print(f\"  Test Accuracy: {result['metrics']['accuracy']:.4f}\")\n",
    "    print(f\"  Training Time: {result['training_time']:.2f} seconds\")\n",
    "    print(f\"  Final Val Accuracy: {result['final_val_accuracy']:.4f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create comparison visualization\n",
    "comparison_df = evaluator.compare_models(arch_results)\n",
    "print(\"\\nModel Comparison Summary:\")\n",
    "print(comparison_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Results Dashboard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create results dashboard\n",
    "dashboard_results = {}\n",
    "for model_type, result in arch_results.items():\n",
    "    dashboard_results[model_type] = {\n",
    "        'test_accuracy': result['metrics']['accuracy'],\n",
    "        'precision': result['metrics']['precision_macro'],\n",
    "        'recall': result['metrics']['recall_macro'],\n",
    "        'f1_score': result['metrics']['f1_macro'],\n",
    "        'training_time': result['training_time'],\n",
    "        'total_params': 100000,  # Placeholder\n",
    "    }\n",
    "\n",
    "visualizer.create_results_dashboard(dashboard_results)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Error Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Analyze misclassifications\n",
    "misclassified_pairs = evaluator.analyze_misclassifications(\n",
    "    trainer.preprocessor.X_test,\n",
    "    eval_results['y_true_classes'],\n",
    "    eval_results['y_pred_classes'],\n",
    "    eval_results['y_pred']\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prediction confidence analysis\n",
    "evaluator.plot_prediction_confidence(\n",
    "    eval_results['y_pred'],\n",
    "    eval_results['y_true_classes']\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Summary and Conclusions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate comprehensive evaluation report\n",
    "evaluator.generate_evaluation_report(\n",
    "    model_name=\"Final_CNN_Model\",\n",
    "    y_true=eval_results['y_true_classes'],\n",
    "    y_pred=eval_results['y_pred_classes'],\n",
    "    y_pred_proba=eval_results['y_pred'],\n",
    "    training_time=100.0  # Placeholder\n",
    ")\n",
    "\n",
    "print(\"\\nAnalysis completed! Check the results directory for saved plots and reports.\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}