In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Correlation Analysis\n",
    "\n",
    "This notebook performs correlation analysis between news sentiment and stock prices."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from src.data.data_loader import load_news_data, load_stock_data, merge_data\n",
    "from src.utils.sentiment_analyzer import apply_sentiment_analysis\n",
    "from src.utils.visualizer import plot_sentiment_vs_price, plot_correlation_heatmap\n",
    "from src.models.correlation_analysis import calculate_correlation, calculate_lagged_correlation, perform_granger_causality_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load and prepare data\n",
    "news_df = load_news_data('../data/financial_news.csv')\n",
    "news_df = apply_sentiment_analysis(news_df)\n",
    "\n",
    "symbols = ['AAPL', 'GOOGL', 'MSFT']\n",
    "start_date = news_df['date'].min()\n",
    "end_date = news_df['date'].max()\n",
    "\n",
    "for symbol in symbols:\n",
    "    print(f\"Analyzing {symbol}...\")\n",
    "    stock_df = load_stock_data(symbol, start_date, end_date)\n",
    "    merged_df = merge_data(news_df, stock_df)\n",
    "    \n",
    "    # Plot sentiment vs price\n",
    "    plot_sentiment_vs_price(merged_df)\n",
    "    \n",
    "    # Calculate correlations\n",
    "    correlation = calculate_correlation(merged_df, 'sentiment', 'Close')\n",
    "    print(f\"Correlation between sentiment and stock price: {correlation}\")\n",
    "    \n",
    "    # Calculate lagged correlations\n",
    "    for lag in range(1, 6):\n",
    "        lagged_corr = calculate_lagged_correlation(merged_df, 'sentiment', 'Close', lag)\n",
    "        print(f\"Lagged correlation (lag={lag}): {lagged_corr}\")\n",
    "    \n",
    "    # Perform Granger Causality test\n",
    "    granger_results = perform_granger_causality_test(merged_df, 'sentiment', 'Close', 5)\n",
    "    print(\"Granger Causality Test Results:\")\n",
    "    print(granger_results)\n",
    "    \n",
    "    # Plot correlation heatmap\n",
    "    plot_correlation_heatmap(merged_df, ['sentiment', 'Open', 'High', 'Low', 'Close', 'Volume'])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}

