In [None]:
{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "sector_analysis_title"
      },
      "source": [
        "# Indian Stock Market Sector Analysis Research\n",
        "## Market Research System v1.0 - 2022\n",
        "\n",
        "**Objective**: Comprehensive analysis of Indian stock market sectors including:\n",
        "- Sector performance comparison\n",
        "- Inter-sector correlation analysis\n",
        "- Sector rotation patterns\n",
        "- Economic impact on different sectors\n",
        "\n",
        "**Data Sources**:\n",
        "- NSE/BSE stock data via Yahoo Finance\n",
        "- Indian economic indicators\n",
        "- Sector-wise indices (Nifty sectoral indices)\n",
        "\n",
        "**Last Updated**: December 2022"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "setup_section"
      },
      "source": [
        "## 1. Setup and Dependencies"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "install_dependencies"
      },
      "outputs": [],
      "source": [
        "# Install required packages\n",
        "!pip install yfinance pandas numpy matplotlib seaborn plotly ta-lib-binary\n",
        "!pip install scipy scikit-learn warnings"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "import_libraries"
      },
      "outputs": [],
      "source": [
        "import yfinance as yf\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import seaborn as sns\n",
        "import plotly.graph_objects as go\n",
        "import plotly.express as px\n",
        "from plotly.subplots import make_subplots\n",
        "import talib as ta\n",
        "from datetime import datetime, timedelta\n",
        "import warnings\n",
        "from scipy import stats\n",
        "from sklearn.preprocessing import StandardScaler\n",
        "from sklearn.cluster import KMeans\n",
        "\n",
        "warnings.filterwarnings('ignore')\n",
        "plt.style.use('seaborn-v0_8')\n",
        "sns.set_palette(\"husl\")\n",
        "\n",
        "print(\"Libraries imported successfully!\")\n",
        "print(f\"Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "data_definition"
      },
      "source": [
        "## 2. Indian Market Sector Definitions and Stock Universe"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "define_sectors"
      },
      "outputs": [],
      "source": [
        "# Indian Stock Market Sectors with major stocks (2022 focus)\n",
        "INDIAN_SECTORS = {\n",
        "    'IT': {\n",
        "        'stocks': ['TCS.NS', 'INFY.NS', 'HCLTECH.NS', 'WIPRO.NS', 'TECHM.NS', \n",
        "                  'LTI.NS', 'MINDTREE.NS', 'MPHASIS.NS', 'LTTS.NS', 'COFORGE.NS'],\n",
        "        'index': '^CNXIT',\n",
        "        'name': 'Information Technology'\n",
        "    },\n",
        "    'Banking': {\n",
        "        'stocks': ['HDFCBANK.NS', 'ICICIBANK.NS', 'KOTAKBANK.NS', 'AXISBANK.NS', 'SBIN.NS',\n",
        "                  'INDUSINDBK.NS', 'BANDHANBNK.NS', 'FEDERALBNK.NS', 'PNB.NS', 'BANKBARODA.NS'],\n",
        "        'index': '^CNXBANK',\n",
        "        'name': 'Banking & Financial Services'\n",
        "    },\n",
        "    'Pharma': {\n",
        "        'stocks': ['SUNPHARMA.NS', 'DRREDDY.NS', 'CIPLA.NS', 'DIVISLAB.NS', 'BIOCON.NS',\n",
        "                  'LUPIN.NS', 'AUROBINDO.NS', 'CADILAHC.NS', 'TORNTPHARM.NS', 'GLENMARK.NS'],\n",
        "        'index': '^CNXPHARMA',\n",
        "        'name': 'Pharmaceuticals'\n",
        "    },\n",
        "    'FMCG': {\n",
        "        'stocks': ['HINDUNILVR.NS', 'ITC.NS', 'NESTLEIND.NS', 'BRITANNIA.NS', 'DABUR.NS',\n",
        "                  'GODREJCP.NS', 'COLPAL.NS', 'MARICO.NS', 'UBL.NS', 'TATACONSUM.NS'],\n",
        "        'index': '^CNXFMCG',\n",
        "        'name': 'Fast Moving Consumer Goods'\n",
        "    },\n",
        "    'Auto': {\n",
        "        'stocks': ['MARUTI.NS', 'TATAMOTORS.NS', 'M&M.NS', 'BAJAJ-AUTO.NS', 'HEROMOTOCO.NS',\n",
        "                  'EICHERMOT.NS', 'TVSMOTOR.NS', 'ASHOKLEY.NS', 'BALKRISIND.NS', 'MRF.NS'],\n",
        "        'index': '^CNXAUTO',\n",
        "        'name': 'Automobile'\n",
        "    },\n",
        "    'Metals': {\n",
        "        'stocks': ['TATASTEEL.NS', 'JSWSTEEL.NS', 'HINDALCO.NS', 'VEDL.NS', 'SAIL.NS',\n",
        "                  'JINDALSTEL.NS', 'NMDC.NS', 'COALINDIA.NS', 'NATIONALUM.NS', 'MOIL.NS'],\n",
        "        'index': '^CNXMETAL',\n",
        "        'name': 'Metals & Mining'\n",
        "    },\n",
        "    'Energy': {\n",
        "        'stocks': ['RELIANCE.NS', 'ONGC.NS', 'IOC.NS', 'BPCL.NS', 'HINDPETRO.NS',\n",
        "                  'GAIL.NS', 'POWERGRID.NS', 'NTPC.NS', 'COALINDIA.NS', 'OIL.NS'],\n",
        "        'index': '^CNXENERGY',\n",
        "        'name': 'Energy & Utilities'\n",
        "    },\n",
        "    'Realty': {\n",
        "        'stocks': ['DLF.NS', 'GODREJPROP.NS', 'OBEROIRLTY.NS', 'PRESTIGE.NS', 'SOBHA.NS',\n",
        "                  'PHOENIXLTD.NS', 'BRIGADE.NS', 'MAHLIFE.NS', 'SUNTECK.NS', 'KOLTE-PATIL.NS'],\n",
        "        'index': '^CNXREALTY',\n",
        "        'name': 'Real Estate'\n",
        "    }\n",
        "}\n",
        "\n",
        "# Market Indices for comparison\n",
        "MARKET_INDICES = {\n",
        "    'NIFTY50': '^NSEI',\n",
        "    'SENSEX': '^BSESN',\n",
        "    'NIFTY_NEXT50': '^NSMIDCP',\n",
        "    'NIFTY_SMALLCAP': '^CNXSC'\n",
        "}\n",
        "\n",
        "print(\"Sector definitions loaded:\")\n",
        "for sector, data in INDIAN_SECTORS.items():\n",
        "    print(f\"- {data['name']}: {len(data['stocks'])} stocks\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "data_collection"
      },
      "source": [
        "## 3. Data Collection and Preprocessing"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "fetch_data_function"
      },
      "outputs": [],
      "source": [
        "def fetch_stock_data(symbols, start_date, end_date, progress=True):\n",
        "    \"\"\"\n",
        "    Fetch stock data for multiple symbols with error handling\n",
        "    \"\"\"\n",
        "    data_dict = {}\n",
        "    failed_symbols = []\n",
        "    \n",
        "    for i, symbol in enumerate(symbols):\n",
        "        if progress:\n",
        "            print(f\"Fetching {symbol} ({i+1}/{len(symbols)})\")\n",
        "        \n",
        "        try:\n",
        "            ticker = yf.Ticker(symbol)\n",
        "            data = ticker.history(start=start_date, end=end_date, auto_adjust=True)\n",
        "            \n",
        "            if not data.empty and len(data) > 10:  # Minimum data requirement\n",
        "                data_dict[symbol] = data\n",
        "            else:\n",
        "                failed_symbols.append(symbol)\n",
        "                \n",
        "        except Exception as e:\n",
        "            print(f\"Error fetching {symbol}: {str(e)}\")\n",
        "            failed_symbols.append(symbol)\n",
        "    \n",
        "    if failed_symbols:\n",
        "        print(f\"Failed to fetch data for: {failed_symbols}\")\n",
        "    \n",
        "    return data_dict, failed_symbols\n",
        "\n",
        "def calculate_returns(price_data, period='daily'):\n",
        "    \"\"\"\n",
        "    Calculate returns for different periods\n",
        "    \"\"\"\n",
        "    if period == 'daily':\n",
        "        return price_data.pct_change()\n",
        "    elif period == 'weekly':\n",
        "        return price_data.resample('W').last().pct_change()\n",
        "    elif period == 'monthly':\n",
        "        return price_data.resample('M').last().pct_change()\n",
        "    else:\n",
        "        return price_data.pct_change()\n",
        "\n",
        "print(\"Data fetching functions defined successfully!\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "set_analysis_period"
      },
      "outputs": [],
      "source": [
        "# Set analysis period (2022 focus)\n",
        "START_DATE = '2020-01-01'  # Extended for better analysis\n",
        "END_DATE = '2022-12-31'\n",
        "ANALYSIS_YEAR = 2022\n",
        "\n",
        "print(f\"Analysis Period: {START_DATE} to {END_DATE}\")\n",
        "print(f\"Primary Focus Year: {ANALYSIS_YEAR}\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "fetch_market_indices"
      },
      "outputs": [],
      "source": [
        "# Fetch market indices data\n",
        "print(\"Fetching market indices data...\")\n",
        "indices_symbols = list(MARKET_INDICES.values())\n",
        "indices_data, failed_indices = fetch_stock_data(indices_symbols, START_DATE, END_DATE)\n",
        "\n",
        "print(f\"Successfully fetched {len(indices_data)} market indices\")\n",
        "if failed_indices:\n",
        "    print(f\"Failed indices: {failed_indices}\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "fetch_sector_data"
      },
      "outputs": [],
      "source": [
        "# Fetch sector-wise stock data\n",
        "print(\"Fetching sector-wise stock data...\")\n",
        "sector_data = {}\n",
        "all_stocks = []\n",
        "\n",
        "for sector_name, sector_info in INDIAN_SECTORS.items():\n",
        "    print(f\"\\nFetching {sector_info['name']} sector data...\")\n",
        "    stocks = sector_info['stocks']\n",
        "    all_stocks.extend(stocks)\n",
        "    \n",
        "    # Fetch individual stock data\n",
        "    stock_data, failed_stocks = fetch_stock_data(stocks, START_DATE, END_DATE)\n",
        "    \n",
        "    # Fetch sector index data\n",
        "    try:\n",
        "        index_data = yf.Ticker(sector_info['index']).history(start=START_DATE, end=END_DATE, auto_adjust=True)\n",
        "        if not index_data.empty:\n",
        "            stock_data[f\"{sector_name}_INDEX\"] = index_data\n",
        "    except:\n",
        "        print(f\"Could not fetch index data for {sector_name}\")\n",
        "    \n",
        "    sector_data[sector_name] = {\n",
        "        'stocks': stock_data,\n",
        "        'failed': failed_stocks,\n",
        "        'info': sector_info\n",
        "    }\n",
        "    \n",
        "    print(f\"Sector {sector_name}: {len(stock_data)} securities fetched, {len(failed_stocks)} failed\")\n",
        "\n",
        "print(f\"\\nTotal data collection complete!\")\n",
        "print(f\"Total sectors: {len(sector_data)}\")\n",
        "print(f\"Total unique stocks attempted: {len(set(all_stocks))}\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "sector_performance"
      },
      "source": [
        "## 4. Sector Performance Analysis"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "calculate_sector_returns"
      },
      "outputs": [],
      "source": [
        "def calculate_sector_metrics(sector_data_dict, year_focus=2022):\n",
        "    \"\"\"\n",
        "    Calculate comprehensive sector performance metrics\n",
        "    \"\"\"\n",
        "    sector_metrics = {}\n",
        "    \n",
        "    for sector_name, data in sector_data_dict.items():\n",
        "        stocks_data = data['stocks']\n",
        "        \n",
        "        if not stocks_data:\n",
        "            continue\n",
        "            \n",
        "        # Get closing prices for all stocks in sector\n",
        "        closes = pd.DataFrame()\n",
        "        for symbol, stock_data in stocks_data.items():\n",
        "            if 'Close' in stock_data.columns:\n",
        "                closes[symbol] = stock_data['Close']\n",
        "        \n",
        "        if closes.empty:\n",
        "            continue\n",
        "            \n",
        "        # Calculate sector average (equal weighted)\n",
        "        sector_close = closes.mean(axis=1)\n",
        "        sector_returns = sector_close.pct_change().dropna()\n",
        "        \n",
        "        # Filter for focus year\n",
        "        year_mask = sector_returns.index.year == year_focus\n",
        "        year_returns = sector_returns[year_mask]\n",
        "        \n",
        "        # Calculate metrics\n",
        "        metrics = {\n",
        "            'sector_name': data['info']['name'],\n",
        "            'total_return_2022': (sector_close[sector_close.index.year == year_focus].iloc[-1] / \n",
        "                                 sector_close[sector_close.index.year == year_focus].iloc[0] - 1) * 100,\n",
        "            'total_return_period': (sector_close.iloc[-1] / sector_close.iloc[0] - 1) * 100,\n",
        "            'volatility_2022': year_returns.std() * np.sqrt(252) * 100,\n",
        "            'volatility_period': sector_returns.std() * np.sqrt(252) * 100,\n",
        "            'sharpe_ratio_2022': (year_returns.mean() * 252) / (year_returns.std() * np.sqrt(252)),\n",
        "            'max_drawdown_2022': calculate_max_drawdown(sector_close[sector_close.index.year == year_focus]),\n",
        "            'avg_daily_return_2022': year_returns.mean() * 100,\n",
        "            'positive_days_2022': (year_returns > 0).sum() / len(year_returns) * 100,\n",
        "            'stocks_count': len([s for s in stocks_data.keys() if 'INDEX' not in s]),\n",
        "            'data_points': len(sector_close)\n",
        "        }\n",
        "        \n",
        "        sector_metrics[sector_name] = metrics\n",
        "    \n",
        "    return pd.DataFrame(sector_metrics).T\n",
        "\n",
        "def calculate_max_drawdown(price_series):\n",
        "    \"\"\"\n",
        "    Calculate maximum drawdown\n",
        "    \"\"\"\n",
        "    rolling_max = price_series.expanding().max()\n",
        "    drawdown = (price_series - rolling_max) / rolling_max\n",
        "    return drawdown.min() * 100\n",
        "\n",
        "# Calculate sector performance metrics\n",
        "print(\"Calculating sector performance metrics...\")\n",
        "sector_performance = calculate_sector_metrics(sector_data, ANALYSIS_YEAR)\n",
        "\n",
        "print(\"\\nSector Performance Summary (2022):\")\n",
        "print(\"=\" * 60)\n",
        "performance_display = sector_performance[['sector_name', 'total_return_2022', 'volatility_2022', 'sharpe_ratio_2022']].round(2)\n",
        "performance_display = performance_display.sort_values('total_return_2022', ascending=False)\n",
        "print(performance_display.to_string())"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "visualize_sector_performance"
      },
      "outputs": [],
      "source": [
        "# Visualize sector performance\n",
        "fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))\n",
        "fig.suptitle('Indian Stock Market Sector Analysis - 2022', fontsize=16, fontweight='bold')\n",
        "\n",
        "# 1. Total Returns 2022\n",
        "returns_sorted = sector_performance.sort_values('total_return_2022', ascending=True)\n",
        "colors = ['red' if x < 0 else 'green' for x in returns_sorted['total_return_2022']]\n",
        "ax1.barh(returns_sorted.index, returns_sorted['total_return_2022'], color=colors, alpha=0.7)\n",
        "ax1.set_title('Sector Returns 2022 (%)')\n",
        "ax1.set_xlabel('Return (%)')\n",
        "ax1.axvline(x=0, color='black', linestyle='--', alpha=0.5)\n",
        "ax1.grid(True, alpha=0.3)\n",
        "\n",
        "# 2. Risk-Return Scatter\n",
        "ax2.scatter(sector_performance['volatility_2022'], sector_performance['total_return_2022'], \n",
        "           s=100, alpha=0.7, c=range(len(sector_performance)), cmap='viridis')\n",
        "for idx, sector in enumerate(sector_performance.index):\n",
        "    ax2.annotate(sector, (sector_performance.loc[sector, 'volatility_2022'], \n",
        "                         sector_performance.loc[sector, 'total_return_2022']),\n",
        "                xytext=(5, 5), textcoords='offset points', fontsize=8)\n",
        "ax2.set_xlabel('Volatility 2022 (%)')\n",
        "ax2.set_ylabel('Return 2022 (%)')\n",
        "ax2.set_title('Risk-Return Profile')\n",
        "ax2.grid(True, alpha=0.3)\n",
        "\n",
        "# 3. Sharpe Ratios\n",
        "sharpe_sorted = sector_performance.sort_values('sharpe_ratio_2022', ascending=True)\n",
        "colors_sharpe = ['red' if x < 0 else 'green' for x in sharpe_sorted['sharpe_ratio_2022']]\n",
        "ax3.barh(sharpe_sorted.index, sharpe_sorted['sharpe_ratio_2022'], color=colors_sharpe, alpha=0.7)\n",
        "ax3.set_title('Sharpe Ratios 2022')\n",
        "ax3.set_xlabel('Sharpe Ratio')\n",
        "ax3.axvline(x=0, color='black', linestyle='--', alpha=0.5)\n",
        "ax3.grid(True, alpha=0.3)\n",
        "\n",
        "# 4. Maximum Drawdowns\n",
        "dd_sorted = sector_performance.sort_values('max_drawdown_2022', ascending=True)\n",
        "ax4.barh(dd_sorted.index, dd_sorted['max_drawdown_2022'], color='red', alpha=0.7)\n",
        "ax4.set_title('Maximum Drawdowns 2022 (%)')\n",
        "ax4.set_xlabel('Max Drawdown (%)')\n",
        "ax4.grid(True, alpha=0.3)\n",
        "\n",
        "plt.tight_layout()\n",
        "plt.show()\n",
        "\n",
        "# Print detailed performance table\n",
        "print(\"\\nDetailed Sector Performance Metrics (2022):\")\n",
        "print(\"=\" * 80)\n",
        "detailed_cols = ['sector_name', 'total_return_2022', 'volatility_2022', \n",
        "                'sharpe_ratio_2022', 'max_drawdown_2022', 'positive_days_2022']\n",
        "detailed_display = sector_performance[detailed_cols].round(2)\n",
        "detailed_display = detailed_display.sort_values('total_return_2022', ascending=False)\n",
        "print(detailed_display.to_string())"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "correlation_analysis"
      },
      "source": [
        "## 5. Inter-Sector Correlation Analysis"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "calculate_correlations"
      },
      "outputs": [],
      "source": [
        "def create_sector_returns_matrix(sector_data_dict, year_focus=2022):\n",
        "    \"\"\"\n",
        "    Create a matrix of sector returns for correlation analysis\n",
        "    \"\"\"\n",
        "    sector_returns = pd.DataFrame()\n",
        "    \n",
        "    for sector_name, data in sector_data_dict.items():\n",
        "        stocks_data = data['stocks']\n",
        "        \n",
        "        if not stocks_data:\n",
        "            continue\n",
        "            \n",
        "        # Get closing prices for all stocks in sector\n",
        "        closes = pd.DataFrame()\n",
        "        for symbol, stock_data in stocks_data.items():\n",
        "            if 'Close' in stock_data.columns and 'INDEX' not in symbol:\n",
        "                closes[symbol] = stock_data['Close']\n",
        "        \n",
        "        if closes.empty:\n",
        "            continue\n",
        "            \n",
        "        # Calculate sector average returns\n",
        "        sector_close = closes.mean(axis=1)\n",
        "        sector_ret = sector_close.pct_change().dropna()\n",
        "        \n",
        "        # Filter for focus year if specified\n",
        "        if year_focus:\n",
        "            year_mask = sector_ret.index.year == year_focus\n",
        "            sector_ret = sector_ret[year_mask]\n",
        "        \n",
        "        sector_returns[sector_name] = sector_ret\n",
        "    \n",
        "    return sector_returns.dropna()\n",
        "\n",
        "# Create sector returns matrix\n",
        "print(\"Creating sector returns matrix...\")\n",
        "sector_returns_2022 = create_sector_returns_matrix(sector_data, 2022)\n",
        "sector_returns_all = create_sector_returns_matrix(sector_data, None)\n",
        "\n",
        "print(f\"Sector returns matrix shape (2022): {sector_returns_2022.shape}\")\n",
        "print(f\"Sector returns matrix shape (All period): {sector_returns_all.shape}\")\n",
        "print(f\"Available sectors: {list(sector_returns_2022.columns)}\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "correlation_heatmap"
      },
      "outputs": [],
      "source": [
        "# Calculate and visualize correlation matrices\n",
        "if not sector_returns_2022.empty:\n",
        "    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))\n",
        "    \n",
        "    # 2022 Correlations\n",
        "    corr_2022 = sector_returns_2022.corr()\n",
        "    mask = np.triu(np.ones_like(corr_2022, dtype=bool))\n",
        "    sns.heatmap(corr_2022, mask=mask, annot=True, cmap='RdYlBu_r', center=0,\n",
        "                square=True, linewidths=0.5, cbar_kws={\"shrink\": .8}, ax=ax1)\n",
        "    ax1.set_title('Sector Correlations - 2022', fontsize=14, fontweight='bold')\n",
        "    \n",
        "    # All Period Correlations\n",
        "    if not sector_returns_all.empty:\n",
        "        corr_all = sector_returns_all.corr()\n",
        "        mask_all = np.triu(np.ones_like(corr_all, dtype=bool))\n",
        "        sns.heatmap(corr_all, mask=mask_all, annot=True, cmap='RdYlBu_r', center=0,\n",
        "                    square=True, linewidths=0.5, cbar_kws={\"shrink\": .8}, ax=ax2)\n",
        "        ax2.set_title('Sector Correlations - Full Period', fontsize=14, fontweight='bold')\n",
        "    \n",
        "    plt.tight_layout()\n",
        "    plt.show()\n",
        "    \n",
        "    # Print correlation insights\n",
        "    print(\"\\nCorrelation Analysis Insights (2022):\")\n",
        "    print(\"=\" * 50)\n",
        "    \n",
        "    # Find highest and lowest correlations\n",
        "    corr_values = corr_2022.values\n",
        "    np.fill_diagonal(corr_values, np.nan)  # Remove diagonal\n",
        "    \n",
        "    # Get sector pairs with highest/lowest correlations\n",
        "    corr_flat = pd.DataFrame(corr_2022.stack()).reset_index()\n",
        "    corr_flat.columns = ['Sector1', 'Sector2', 'Correlation']\n",
        "    corr_flat = corr_flat[corr_flat['Sector1']




        not complted