In [4]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Alzheimer's Disease Classification Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import sys\n",
    "import os\n",
    "\n",
    "# Add the src directory to the Python path\n",
    "sys.path.append(os.path.abspath('../src'))\n",
    "\n",
    "from data_preprocessing import load_data, preprocess_data\n",
    "from model import train_model, evaluate_model, get_feature_importance\n",
    "from visualization import (plot_correlation_matrix, plot_diagnosis_distribution,\n",
    "                           plot_age_distribution, plot_confusion_matrix,\n",
    "                           plot_feature_importance)\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load and Explore Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load data from local device\n",
    "file_path = \"C:/Users/Kanika Barik/.ms-ad/Downloads/archive/alzheimers_disease_data.csv\"\n",
    "data = load_data(file_path)\n",
    "\n",
    "# Display the first few rows\n",
    "print(data.head())\n",
    "\n",
    "# Display basic information about the dataset\n",
    "print(data.info())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Exploratory Data Analysis (EDA)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Descriptive Statistics\n",
    "print(\"\\nDescriptive Statistics:\")\n",
    "print(data.describe())\n",
    "\n",
    "# Check for missing values\n",
    "print(\"\\nMissing Values:\")\n",
    "print(data.isnull().sum())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Correlation Matrix\n",
    "plot_correlation_matrix(data)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Diagnosis Distribution\n",
    "plot_diagnosis_distribution(data)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Age Distribution by Diagnosis\n",
    "plot_age_distribution(data)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data Preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Preprocess data\n",
    "X_train, X_test, y_train, y_test, feature_names = preprocess_data(data)\n",
    "print(\"Data preprocessing completed.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model Training and Evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Train model\n",
    "model = train_model(X_train, y_train)\n",
    "print(\"Model training completed.\")\n",
    "\n",
    "# Evaluate model\n",
    "cm, y_pred = evaluate_model(model, X_test, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot confusion matrix\n",
    "plot_confusion_matrix(cm)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot feature importance\n",
    "feature_importance = get_feature_importance(model, feature_names)\n",
    "plot_feature_importance(feature_importance)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Conclusion\n",
    "\n",
    "This notebook has performed a comprehensive analysis of the Alzheimer's disease dataset, including:\n",
    "1. Data loading and exploration\n",
    "2. Exploratory Data Analysis (EDA)\n",
    "3. Data visualization\n",
    "4. Data preprocessing\n",
    "5. Model training and evaluation\n",
    "6. Model visualization\n",
    "\n",
    "The Random Forest Classifier has been used to predict Alzheimer's disease diagnosis. Review the classification report, confusion matrix, and feature importance plot to understand the model's performance and the most influential factors in the diagnosis prediction."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}


{'cells': [{'cell_type': 'markdown',
   'metadata': {},
   'source': ["# Alzheimer's Disease Classification Analysis"]},
  {'cell_type': 'code',
   'execution_count': None,
   'metadata': {},
   'outputs': [],
   'source': ['import pandas as pd\n',
    'import numpy as np\n',
    'import matplotlib.pyplot as plt\n',
    'import seaborn as sns\n',
    'import sys\n',
    'import os\n',
    '\n',
    '# Add the src directory to the Python path\n',
    "sys.path.append(os.path.abspath('../src'))\n",
    '\n',
    'from data_preprocessing import load_data, preprocess_data\n',
    'from model import train_model, evaluate_model, get_feature_importance\n',
    'from visualization import (plot_correlation_matrix, plot_diagnosis_distribution,\n',
    '                           plot_age_distribution, plot_confusion_matrix,\n',
    '                           plot_feature_importance)\n',
    '\n',
    '%matplotlib inline']},
  {'cell_type': 'markdown',
   'metadata': {},
   'source': ['## Load a