In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "view-in-github"
   },
   "source": [
    "<a href=\"https://colab.research.google.com/github/yourusername/market-research-v1/blob/main/notebooks/research/market_patterns.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "market_patterns_title"
   },
   "source": [
    "# Market Pattern Research - Indian Stock Market (2022)\n",
    "\n",
    "## Objective\n",
    "Research and identify recurring patterns in Indian stock market data including:\n",
    "- Seasonal patterns in NIFTY 50 and major stocks\n",
    "- Daily, weekly, and monthly trading patterns\n",
    "- Sector rotation patterns\n",
    "- Pre/Post earnings patterns\n",
    "- Festival and event-based patterns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "install_requirements"
   },
   "outputs": [],
   "source": [
    "# Install required packages\n",
    "!pip install yfinance pandas numpy matplotlib seaborn plotly ta-lib scipy statsmodels\n",
    "!pip install nsepy  # For Indian stock data\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "import_libraries"
   },
   "outputs": [],
   "source": [
    "import yfinance as yf\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import plotly.graph_objects as go\n",
    "import plotly.express as px\n",
    "from plotly.subplots import make_subplots\n",
    "from datetime import datetime, timedelta\n",
    "from scipy import stats\n",
    "import talib as ta\n",
    "from statsmodels.tsa.seasonal import seasonal_decompose\n",
    "from nsepy import get_history\n",
    "from nsepy.symbols import all_symbols\n",
    "\n",
    "# Set style for plots\n",
    "plt.style.use('seaborn-v0_8')\n",
    "sns.set_palette(\"husl\")\n",
    "\n",
    "# Configuration\n",
    "START_DATE = '2020-01-01'\n",
    "END_DATE = '2022-12-31'\n",
    "ANALYSIS_PERIOD = '2022-01-01'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "data_collection_header"
   },
   "source": [
    "## Data Collection - Indian Stocks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "fetch_indian_market_data"
   },
   "outputs": [],
   "source": [
    "# Indian market symbols with Yahoo Finance tickers\n",
    "INDIAN_STOCKS = {\n",
    "    'NIFTY50': '^NSEI',\n",
    "    'SENSEX': '^BSESN',\n",
    "    'RELIANCE': 'RELIANCE.NS',\n",
    "    'TCS': 'TCS.NS',\n",
    "    'INFY': 'INFY.NS',\n",
    "    'HDFCBANK': 'HDFCBANK.NS',\n",
    "    'ICICIBANK': 'ICICIBANK.NS',\n",
    "    'HINDUNILVR': 'HINDUNILVR.NS',\n",
    "    'ITC': 'ITC.NS',\n",
    "    'SBIN': 'SBIN.NS',\n",
    "    'BHARTIARTL': 'BHARTIARTL.NS',\n",
    "    'ASIANPAINT': 'ASIANPAINT.NS',\n",
    "    'MARUTI': 'MARUTI.NS',\n",
    "    'KOTAKBANK': 'KOTAKBANK.NS',\n",
    "    'LT': 'LT.NS'\n",
    "}\n",
    "\n",
    "# Sector classification\n",
    "SECTORS = {\n",
    "    'Banking': ['HDFCBANK.NS', 'ICICIBANK.NS', 'SBIN.NS', 'KOTAKBANK.NS'],\n",
    "    'IT': ['TCS.NS', 'INFY.NS'],\n",
    "    'FMCG': ['HINDUNILVR.NS', 'ITC.NS'],\n",
    "    'Energy': ['RELIANCE.NS'],\n",
    "    'Auto': ['MARUTI.NS'],\n",
    "    'Telecom': ['BHARTIARTL.NS'],\n",
    "    'Paint': ['ASIANPAINT.NS'],\n",
    "    'Infrastructure': ['LT.NS']\n",
    "}\n",
    "\n",
    "def fetch_stock_data(symbol, start_date, end_date):\n",
    "    \"\"\"Fetch stock data using yfinance\"\"\"\n",
    "    try:\n",
    "        stock = yf.Ticker(symbol)\n",
    "        data = stock.history(start=start_date, end=end_date)\n",
    "        data['Symbol'] = symbol\n",
    "        return data\n",
    "    except Exception as e:\n",
    "        print(f\"Error fetching data for {symbol}: {e}\")\n",
    "        return None\n",
    "\n",
    "# Fetch data for all stocks\n",
    "print(\"Fetching Indian market data...\")\n",
    "market_data = {}\n",
    "\n",
    "for name, symbol in INDIAN_STOCKS.items():\n",
    "    print(f\"Fetching {name} ({symbol})...\")\n",
    "    data = fetch_stock_data(symbol, START_DATE, END_DATE)\n",
    "    if data is not None and not data.empty:\n",
    "        market_data[name] = data\n",
    "        print(f\"✓ {name}: {len(data)} records\")\n",
    "    else:\n",
    "        print(f\"✗ Failed to fetch {name}\")\n",
    "\n",
    "print(f\"\\nSuccessfully fetched data for {len(market_data)} stocks\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "seasonal_patterns_header"
   },
   "source": [
    "## 1. Seasonal Pattern Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "seasonal_analysis"
   },
   "outputs": [],
   "source": [
    "def analyze_seasonal_patterns(data, stock_name):\n",
    "    \"\"\"Analyze seasonal patterns in stock data\"\"\"\n",
    "    df = data.copy()\n",
    "    df['Returns'] = df['Close'].pct_change()\n",
    "    df['Month'] = df.index.month\n",
    "    df['DayOfWeek'] = df.index.dayofweek\n",
    "    df['DayOfMonth'] = df.index.day\n",
    "    df['Quarter'] = df.index.quarter\n",
    "    \n",
    "    # Monthly patterns\n",
    "    monthly_returns = df.groupby('Month')['Returns'].agg(['mean', 'std', 'count'])\n",
    "    monthly_returns['mean_pct'] = monthly_returns['mean'] * 100\n",
    "    \n",
    "    # Day of week patterns\n",
    "    dow_returns = df.groupby('DayOfWeek')['Returns'].agg(['mean', 'std', 'count'])\n",
    "    dow_returns['mean_pct'] = dow_returns['mean'] * 100\n",
    "    dow_returns.index = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']\n",
    "    \n",
    "    # Quarterly patterns\n",
    "    quarterly_returns = df.groupby('Quarter')['Returns'].agg(['mean', 'std', 'count'])\n",
    "    quarterly_returns['mean_pct'] = quarterly_returns['mean'] * 100\n",
    "    \n",
    "    return {\n",
    "        'monthly': monthly_returns,\n",
    "        'daily': dow_returns,\n",
    "        'quarterly': quarterly_returns,\n",
    "        'raw_data': df\n",
    "    }\n",
    "\n",
    "# Analyze NIFTY 50 seasonal patterns\n",
    "if 'NIFTY50' in market_data:\n",
    "    nifty_patterns = analyze_seasonal_patterns(market_data['NIFTY50'], 'NIFTY50')\n",
    "    \n",
    "    # Create visualization\n",
    "    fig, axes = plt.subplots(2, 2, figsize=(15, 12))\n",
    "    fig.suptitle('NIFTY 50 - Seasonal Pattern Analysis (2020-2022)', fontsize=16, fontweight='bold')\n",
    "    \n",
    "    # Monthly returns\n",
    "    axes[0, 0].bar(range(1, 13), nifty_patterns['monthly']['mean_pct'], \n",
    "                   color=['red' if x < 0 else 'green' for x in nifty_patterns['monthly']['mean_pct']])\n",
    "    axes[0, 0].set_title('Average Monthly Returns')\n",
    "    axes[0, 0].set_xlabel('Month')\n",
    "    axes[0, 0].set_ylabel('Average Return (%)')\n",
    "    axes[0, 0].grid(True, alpha=0.3)\n",
    "    \n",
    "    # Day of week returns\n",
    "    axes[0, 1].bar(range(5), nifty_patterns['daily']['mean_pct'], \n",
    "                   color=['red' if x < 0 else 'green' for x in nifty_patterns['daily']['mean_pct']])\n",
    "    axes[0, 1].set_title('Average Daily Returns by Day of Week')\n",
    "    axes[0, 1].set_xlabel('Day of Week')\n",
    "    axes[0, 1].set_ylabel('Average Return (%)')\n",
    "    axes[0, 1].set_xticks(range(5))\n",
    "    axes[0, 1].set_xticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri'])\n",
    "    axes[0, 1].grid(True, alpha=0.3)\n",
    "    \n",
    "    # Quarterly returns\n",
    "    axes[1, 0].bar(range(1, 5), nifty_patterns['quarterly']['mean_pct'], \n",
    "                   color=['red' if x < 0 else 'green' for x in nifty_patterns['quarterly']['mean_pct']])\n",
    "    axes[1, 0].set_title('Average Quarterly Returns')\n",
    "    axes[1, 0].set_xlabel('Quarter')\n",
    "    axes[1, 0].set_ylabel('Average Return (%)')\n",
    "    axes[1, 0].grid(True, alpha=0.3)\n",
    "    \n",
    "    # Monthly volatility\n",
    "    axes[1, 1].bar(range(1, 13), nifty_patterns['monthly']['std'] * 100, \n",
    "                   color='orange', alpha=0.7)\n",
    "    axes[1, 1].set_title('Monthly Volatility (Std Dev)')\n",
    "    axes[1, 1].set_xlabel('Month')\n",
    "    axes[1, 1].set_ylabel('Volatility (%)')\n",
    "    axes[1, 1].grid(True, alpha=0.3)\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "    \n",
    "    # Print summary statistics\n",
    "    print(\"\\n\" + \"=\"*50)\n",
    "    print(\"NIFTY 50 SEASONAL PATTERN SUMMARY\")\n",
    "    print(\"=\"*50)\n",
    "    \n",
    "    print(\"\\nBest Performing Months:\")\n",
    "    best_months = nifty_patterns['monthly'].sort_values('mean_pct', ascending=False).head(3)\n",
    "    for month, data in best_months.iterrows():\n",
    "        month_name = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \n",
    "                     'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][month-1]\n",
    "        print(f\"  {month_name}: {data['mean_pct']:.2f}% (±{data['std']*100:.2f}%)\")\n",
    "    \n",
    "    print(\"\\nWorst Performing Months:\")\n",
    "    worst_months = nifty_patterns['monthly'].sort_values('mean_pct', ascending=True).head(3)\n",
    "    for month, data in worst_months.iterrows():\n",
    "        month_name = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \n",
    "                     'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][month-1]\n",
    "        print(f\"  {month_name}: {data['mean_pct']:.2f}% (±{data['std']*100:.2f}%)\")\n",
    "    \n",
    "    print(\"\\nBest Day of Week:\")\n",
    "    best_day = nifty_patterns['daily'].sort_values('mean_pct', ascending=False).iloc[0]\n",
    "    print(f\"  {nifty_patterns['daily'].sort_values('mean_pct', ascending=False).index[0]}: {best_day['mean_pct']:.3f}%\")\n",
    "    \n",
    "    print(\"\\nWorst Day of Week:\")\n",
    "    worst_day = nifty_patterns['daily'].sort_values('mean_pct', ascending=True).iloc[0]\n",
    "    print(f\"  {nifty_patterns['daily'].sort_values('mean_pct', ascending=True).index[0]}: {worst_day['mean_pct']:.3f}%\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "intraday_patterns_header"
   },
   "source": [
    "## 2. Intraday and Volume Patterns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "volume_patterns"
   },
   "outputs": [],
   "source": [
    "def analyze_volume_patterns(data, stock_name):\n",
    "    \"\"\"Analyze volume and price relationship patterns\"\"\"\n",
    "    df = data.copy()\n",
    "    df['Returns'] = df['Close'].pct_change()\n",
    "    df['Volume_MA'] = df['Volume'].rolling(window=20).mean()\n",
    "    df['Volume_Ratio'] = df['Volume'] / df['Volume_MA']\n",
    "    df['Price_Range'] = (df['High'] - df['Low']) / df['Close'] * 100\n",
    "    \n",
    "    # Volume-Return correlation\n",
    "    volume_return_corr = df['Volume'].corr(abs(df['Returns']))\n",
    "    \n",
    "    # High volume days analysis\n",
    "    high_volume_days = df[df['Volume_Ratio'] > 2.0]  # Days with 2x average volume\n",
    "    \n",
    "    return {\n",
    "        'volume_return_correlation': volume_return_corr,\n",
    "        'high_volume_days': high_volume_days,\n",
    "        'avg_return_high_volume': high_volume_days['Returns'].mean() * 100,\n",
    "        'avg_return_normal': df[df['Volume_Ratio'] <= 2.0]['Returns'].mean() * 100,\n",
    "        'data': df\n",
    "    }\n",
    "\n",
    "# Analyze volume patterns for major stocks\n",
    "volume_analysis = {}\n",
    "major_stocks = ['NIFTY50', 'RELIANCE', 'TCS', 'HDFCBANK']\n",
    "\n",
    "for stock in major_stocks:\n",
    "    if stock in market_data:\n",
    "        volume_analysis[stock] = analyze_volume_patterns(market_data[stock], stock)\n",
    "\n",
    "# Visualize volume patterns\n",
    "if volume_analysis:\n",
    "    fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n",
    "    fig.suptitle('Volume-Price Relationship Analysis', fontsize=16, fontweight='bold')\n",
    "    \n",
    "    for i, (stock, analysis) in enumerate(list(volume_analysis.items())[:4]):\n",
    "        row, col = i // 2, i % 2\n",
    "        \n",
    "        # Scatter plot of volume vs absolute returns\n",
    "        df = analysis['data'].dropna()\n",
    "        axes[row, col].scatter(df['Volume'] / 1e6, abs(df['Returns']) * 100, \n",
    "                              alpha=0.5, s=20)\n",
    "        axes[row, col].set_title(f'{stock} - Volume vs |Returns|')\n",
    "        axes[row, col].set_xlabel('Volume (Millions)')\n",
    "        axes[row, col].set_ylabel('Absolute Returns (%)')\n",
    "        axes[row, col].grid(True, alpha=0.3)\n",
    "        \n",
    "        # Add correlation text\n",
    "        corr = analysis['volume_return_correlation']\n",
    "        axes[row, col].text(0.05, 0.95, f'Correlation: {corr:.3f}', \n",
    "                           transform=axes[row, col].transAxes, \n",
    "                           bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "    \n",
    "    # Print volume analysis summary\n",
    "    print(\"\\n\" + \"=\"*60)\n",
    "    print(\"VOLUME-PRICE RELATIONSHIP ANALYSIS\")\n",
    "    print(\"=\"*60)\n",
    "    \n",
    "    for stock, analysis in volume_analysis.items():\n",
    "        print(f\"\\n{stock}:\")\n",
    "        print(f\"  Volume-Return Correlation: {analysis['volume_return_correlation']:.3f}\")\n",
    "        print(f\"  Average Return on High Volume Days: {analysis['avg_return_high_volume']:.3f}%\")\n",
    "        print(f\"  Average Return on Normal Days: {analysis['avg_return_normal']:.3f}%\")\n",
    "        print(f\"  High Volume Days Count: {len(analysis['high_volume_days'])}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "festival_patterns_header"
   },
   "source": [
    "## 3. Festival and Event-Based Patterns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "festival_analysis"
   },
   "outputs": [],
   "source": [
    "# Indian market holidays and festivals (2020-2022)\n",
    "INDIAN_FESTIVALS = {\n",
    "    '2020': {\n",
    "        'Diwali': '2020-11-14',\n",
    "        'Holi': '2020-03-10',\n",
    "        'Dussehra': '2020-10-25',\n",
    "        'Eid': '2020-05-25'\n",
    "    },\n",
    "    '2021': {\n",
    "        'Diwali': '2021-11-04',\n",
    "        'Holi': '2021-03-29',\n",
    "        'Dussehra': '2021-10-15',\n",
    "        'Eid': '2021-05-14'\n",
    "    },\n",
    "    '2022': {\n",
    "        'Diwali': '2022-10-24',\n",
    "        'Holi': '2022-03-18',\n",
    "        'Dussehra': '2022-10-05',\n",
    "        'Eid': '2022-05-03'\n",
    "    }\n",
    "}\n",
    "\n",
    "def analyze_festival_patterns(data, stock_name):\n",
    "    \"\"\"Analyze stock performance around Indian festivals\"\"\"\n",
    "    df = data.copy()\n",
    "    df['Returns'] = df['Close'].pct_change()\n",
    "    \n",
    "    festival_analysis = {}\n",
    "    \n",
    "    for year, festivals in INDIAN_FESTIVALS.items():\n",
    "        for festival, date_str in festivals.items():\n",
    "            try:\n",
    "                festival_date = pd.to_datetime(date_str)\n",
    "                \n",
    "                # Get data around festival (5 days before and after)\n",
    "                start_window = festival_date - timedelta(days=10)\n",
    "                end_window = festival_date + timedelta(days=10)\n",
    "                \n",
    "                window_data = df[(df.index >= start_window) & (df.index <= end_window)]\n",
    "                \n",
    "                if len(window_data) > 0:\n",
    "                    pre_festival = window_data[window_data.index < festival_date]['Returns'].mean()\n",
    "                    post_festival = window_data[window_data.index > festival_date]['Returns'].mean()\n",
    "                    \n",
    "                    festival_key = f\"{festival}_{year}\"\n",
    "                    festival_analysis[festival_key] = {\n",
    "                        'pre_festival_return': pre_festival * 100,\n",
    "                        'post_festival_return': post_festival * 100,\n",
    "                        'date': festival_date,\n",
    "                        'data_points': len(window_data)\n",
    "                    }\n",
    "            except Exception as e:\n",
    "                print(f\"Error analyzing {festival} {year}: {e}\")\n",
    "    \n",
    "    return festival_analysis\n",
    "\n",
    "# Analyze festival patterns for NIFTY 50\n",
    "if 'NIFTY50' in market_data:\n",
    "    nifty_festival_patterns = analyze_festival_patterns(market_data['NIFTY50'], 'NIFTY50')\n",
    "    \n",
    "    if nifty_festival_patterns:\n",
    "        # Create visualization\n",
    "        festivals = list(nifty_festival_patterns.keys())\n",
    "        pre_returns = [nifty_festival_patterns[f]['pre_festival_return'] for f in festivals]\n",
    "        post_returns = [nifty_festival_patterns[f]['post_festival_return'] for f in festivals]\n",
    "        \n",
    "        x = np.arange(len(festivals))\n",
    "        width = 0.35\n",
    "        \n",
    "        fig, ax = plt.subplots(figsize=(14, 8))\n",
    "        bars1 = ax.bar(x - width/2, pre_returns, width, label='Pre-Festival (5 days)', alpha=0.8)\n",
    "        bars2 = ax.bar(x + width/2, post_returns, width, label='Post-Festival (5 days)', alpha=0.8)\n",
    "        \n",
    "        ax.set_xlabel('Festivals')\n",
    "        ax.set_ylabel('Average Returns (%)')\n",
    "        ax.set_title('NIFTY 50 - Festival Pattern Analysis (2020-2022)')\n",
    "        ax.set_xticks(x)\n",
    "        ax.set_xticklabels(festivals, rotation=45, ha='right')\n",
    "        ax.legend()\n",
    "        ax.grid(True, alpha=0.3)\n",
    "        ax.axhline(y=0, color='black', linestyle='-', alpha=0.3)\n",
    "        \n",
    "        # Add value labels on bars\n",
    "        for bar in bars1:\n",
    "            height = bar.get_height()\n",
    "            ax.annotate(f'{height:.2f}%', xy=(bar.get_x() + bar.get_width() / 2, height),\n",
    "                       xytext=(0, 3), textcoords=\"offset points\", ha='center', va='bottom', fontsize=8)\n",
    "        \n",
    "        for bar in bars2:\n",
    "            height = bar.get_height()\n",
    "            ax.annotate(f'{height:.2f}%', xy=(bar.get_x() + bar.get_width() / 2, height),\n",
    "                       xytext=(0, 3), textcoords=\"offset points\", ha='center', va='bottom', fontsize=8)\n",
    "        \n",
    "        plt.tight_layout()\n",
    "        plt.show()\n",
    "        \n",
    "        # Print festival analysis summary\n",
    "        print(\"\\n\" + \"=\"*60)\n",
    "        print(\"NIFTY 50 - FESTIVAL PATTERN ANALYSIS\")\n",
    "        print(\"=\"*60)\n",
    "        \n",
    "        # Calculate average pre and post festival returns\n",
    "        avg_pre = np.mean(pre_returns)\n",
    "        avg_post = np.mean(post_returns)\n",
    "        \n",
    "        print(f\"\\nOverall Pattern:\")\n",
    "        print(f\"  Average Pre-Festival Return (5 days): {avg_pre:.3f}%\")\n",
    "        print(f\"  Average Post-Festival Return (5 days): {avg_post:.3f}%\")\n",
    "        print(f\"  Pre vs Post Difference: {avg_pre - avg_post:.3f}%\")\n",
    "        \n",
    "        # Identify best performing festivals\n",
    "        festival_performance = [(f, nifty_festival_patterns[f]['pre_festival_return']) \n",
    "                               for f in festivals]\n",
    "        festival_performance.sort(key=lambda x: x[1], reverse=True)\n",
    "        \n",
    "        print(f\"\\nBest Pre-Festival Performers:\")\n",
    "        for festival, return_pct in festival_performance[:3]:\n",
    "            print(f\"  {festival}: {return_pct:.3f}%\")\n",
    "    else:\n",
    "        print(\"No festival pattern data available for analysis.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "momentum_patterns_header"
   },
   "source": [
    "## 4. Momentum and Reversal Patterns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "momentum_analysis"
   },
   "outputs": [],
   "source": [
    "def analyze_momentum_patterns(data, stock_name):\n",
    "    \"\"\"Analyze momentum and mean reversion patterns\"\"\"\n",
    "    df = data.copy()\n",
    "    df['Returns'] = df['Close'].pct_change()\n",
    "    \n",
    "    # Calculate various lookback periods\n",
    "    for period in [1, 3, 5

    not complted