In [1]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import datetime
import logging
import os
import io

# --- Configuration ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# --- USER-EDITABLE PATH VARIABLE ---
# Define the output directory for the CSV files.
CUSTOM_OUTPUT_DIR = r"/content/output2"  # <-- EDIT THIS LINE TO YOUR DESIRED PATH

# --- Index Scraping Configurations ---
# This dictionary holds all the information needed to scrape constituent tickers for each index.
INDEX_CONFIG = {
    "sp500": {
        "name": "S&P 500",
        "url": "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies",
        "table_identifier": {'id': 'constituents'},
        "ticker_column": "Symbol",
        "name_column": "Security", # Added name column
        "clean_fn": lambda s: s.replace('.', '-')  # For tickers like 'BRK.B' -> 'BRK-B'
    },
    "dowjones": {
        "name": "Dow Jones",
        "url": "https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average",
        "table_identifier": {'id': 'constituents'},
        "ticker_column": "Symbol",
        "name_column": "Company", # Added name column
        "clean_fn": lambda s: s
    },
    "nasdaq100": {
        "name": "NASDAQ 100",
        "url": "https://en.wikipedia.org/wiki/Nasdaq-100",
        "table_identifier": {'id': 'constituents'},
        "ticker_column": "Ticker",
        "name_column": "Company", # Added name column
        "clean_fn": lambda s: s.replace('.', '-')
    },
    "nifty50": {
        "name": "Nifty 50",
        "url": "https://en.wikipedia.org/wiki/NIFTY_50",
        "table_identifier": {'id': 'constituents'},
        "ticker_column": "Symbol",
        "name_column": "Company name", # Corrected name column
        "clean_fn": lambda s: f"{s}.NS"  # Append .NS for National Stock Exchange of India
    },
    "ftse100": {
        "name": "FTSE 100",
        "url": "https://en.wikipedia.org/wiki/FTSE_100_Index",
        "table_identifier": {'id': 'constituents'},
        "ticker_column": "Ticker",  # Changed to Ticker
        "name_column": "Company", # Added name column
        "clean_fn": lambda s: f"{s}.L" if '.' not in s else s # Append .L for London Stock Exchange
    }
}

def get_tickers_from_wikipedia(config):
    """Scrapes tickers and company names from a Wikipedia page based on a flexible configuration."""
    name = config['name']
    url = config['url']
    table_id = config['table_identifier']
    ticker_col = config['ticker_column']
    name_col = config['name_column'] # Get name column
    logging.info(f"--- Scraping ticker list for {name} ---")
    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'lxml')
        table = soup.find('table', table_id)
        if table is None:
            logging.error(f"Could not find specified table on {url} for {name}.")
            return [], {}

        df = pd.read_html(io.StringIO(str(table)))[0]

        if ticker_col not in df.columns:
            logging.error(f"Ticker column '{ticker_col}' not found in {name} table. Available columns: {df.columns.tolist()}")
            return [], {}

        if name_col not in df.columns: # Check for name column
            logging.error(f"Name column '{name_col}' not found in {name} table. Available columns: {df.columns.tolist()}")
            return [], {}

        # Create a dictionary mapping cleaned tickers to company names
        ticker_name_map = dict(zip(df[ticker_col].astype(str).apply(config['clean_fn']), df[name_col].astype(str)))

        tickers = list(ticker_name_map.keys())

        logging.info(f"[SUCCESS] Fetched {len(tickers)} tickers for {name}. First 5: {tickers[:5]}")
        return tickers, ticker_name_map

    except Exception as e:
        logging.error(f"Scraping tickers for {name} failed: {e}")
        return [], {}

def fetch_and_save_closing_prices(index_name, tickers, ticker_name_map, output_dir):
    """
    Fetches daily closing prices and volume for a list of tickers in a single batch call
    and saves the result as a CSV with company names as headers.
    """
    if not tickers:
        logging.warning(f"[SKIP] No tickers provided for {index_name}.")
        return

    logging.info(f"=== Starting data fetch for {index_name} ({len(tickers)} stocks) ===")

    end_date = datetime.date.today()
    start_date = end_date - datetime.timedelta(days=3*365) # Fetch 3 years of data

    try:
        all_data = yf.download(tickers, start=start_date, end=end_date, progress=True)

        if all_data.empty:
            logging.error(f"No data returned for any tickers in {index_name}.")
            return

        # Use pd.concat for more efficient DataFrame creation
        data_frames = []
        for ticker in tickers:
            if ticker in all_data['Close'].columns: # Check if ticker data was fetched
                company_name = ticker_name_map.get(ticker, ticker) # Use company name or ticker if not found
                temp_df = pd.DataFrame(index=all_data.index)
                temp_df[(company_name, 'Close')] = all_data['Close'][ticker]
                if ticker in all_data['Volume'].columns: # Check if volume data exists
                    temp_df[(company_name, 'Volume')] = all_data['Volume'][ticker]
                else:
                    temp_df[(company_name, 'Volume')] = None # Add empty volume column if not available
                data_frames.append(temp_df)

        if not data_frames:
            logging.error(f"No valid ticker data to combine for {index_name}.")
            return

        combined_df = pd.concat(data_frames, axis=1)

        combined_df.index.name = 'Date'
        logging.info(f"Finished fetching data for {index_name}.")
        logging.info(f"  Result shape: dates={combined_df.shape[0]}, successfully fetched stocks={combined_df.shape[1]//2}") # Divide by 2 for Close/Volume pairs
        logging.info(f"  Sample Head:\n{combined_df.head(2)}")

        # Save DataFrame as CSV
        os.makedirs(output_dir, exist_ok=True)
        filename = os.path.join(output_dir, f"{index_name.replace(' ', '_')}_stock_data.csv")
        # Flatten the multi-level columns for CSV output
        combined_df.columns = ['_'.join(col).strip() for col in combined_df.columns.values]
        combined_df.to_csv(filename)
        logging.info(f"[SAVED] Data saved to {filename}\n")

    except Exception as e:
        logging.error(f"An error occurred during data download for {index_name}: {e}")

if __name__ == "__main__":
    indices_to_process = ["sp500", "dowjones", "nasdaq100", "nifty50", "ftse100"]

    # Ensure the main output directory exists
    os.makedirs(CUSTOM_OUTPUT_DIR, exist_ok=True)

    for index_key in indices_to_process:
        config = INDEX_CONFIG[index_key]
        tickers, ticker_name_map = get_tickers_from_wikipedia(config)
        fetch_and_save_closing_prices(config['name'], tickers, ticker_name_map, CUSTOM_OUTPUT_DIR)

    logging.info("=== ALL PROCESSES COMPLETE ===")

2025-10-29 17:02:46,161 - INFO - --- Scraping ticker list for S&P 500 ---
2025-10-29 17:02:46,892 - INFO - [SUCCESS] Fetched 503 tickers for S&P 500. First 5: ['MMM', 'AOS', 'ABT', 'ABBV', 'ACN']
2025-10-29 17:02:46,894 - INFO - === Starting data fetch for S&P 500 (503 stocks) ===


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  502 of 503 completed
2025-10-29 17:04:55,029 - ERROR - 
503 Failed downloads:
2025-10-29 17:04:55,031 - ERROR - ['KEYS', 'PWR', 'CTRA', 'BMY', 'VMC', 'ELV', 'HSY', 'IP', 'CVS', 'HBAN', 'SJM', 'KMX', 'BXP', 'LIN', 'FAST', 'CNC', 'WELL', 'OXY', 'UHS', 'MTD', 'DHR', 'POOL', 'MSCI', 'TAP', 'AEP', 'CAT', 'ATO', 'MDT', 'TGT', 'HAL', 'VST', 'V', 'AVGO', 'BLDR', 'ZTS', 'DD', 'AEE', 'ROL', 'MLM', 'GEN', 'PH', 'TDG', 'HOOD', 'ORLY', 'PEG', 'KLAC', 'LMT', 'ARE', 'IVZ', 'IQV', 'TER', 'BAC', 'FE', 'SOLV', 'AZO', 'IBM', 'TDY', 'UBER', 'ITW', 'NWSA', 'APTV', 'PKG', 'LOW', 'ECL', 'MHK', 'ESS', 'WDAY', 'HUM', 'GIS', 'KR', 'WMB', 'VLO', 'LDOS', 'HPE', 'PRU', 'AES', 'NOC', 'CAG', 'AIG', 'META', 'MGM', 'IBKR', 'IPG', 'FDX', 'AJG', 'ES', 'DVN', 'MSI', 'GS', 'AMZN', 'T', 'ADSK', 'DHI', 'CFG', 'STLD', 'INVH', 'EXPE', 'ICE', 'GRMN', 'AMAT', 'AME', 'NTAP', 'JPM', 'MAA', 'FOXA', 'EA', 'AOS', 'JKHY', 'GPN', 'ABT', 'ACN', 'NRG', 'SPG', 'LYV', 'BF-B', 'SYK', 'ROS

In [None]:
!pip install dash_bootstrap_components

In [None]:
import pandas as pd
from dash import Dash, html, dcc, Input, Output
import plotly.graph_objs as go
import dash_bootstrap_components as dbc

# ---------- CONFIG: Define file paths ----------
file_paths = {
    "Dow Jones": r"C:\Users\prabh\DA2402\Dow_Jones_stock_data.csv",
    "NASDAQ 100": r"C:\Users\prabh\DA2402\NASDAQ_100_stock_data.csv",
    "S&P 500": r"C:\Users\prabh\DA2402\S&P_500_stock_data.csv",
    "FTSE 100": r"C:\Users\prabh\DA2402\FTSE_100_stock_data.csv",
    "Nifty 50": r"C:\Users\prabh\DA2402\Nifty_50_stock_data.csv",
}

# ---------- Data Processing ----------
def compute_pct_changes(df: pd.DataFrame, date_col: str = 'Date'):
    df = df.copy()
    df[date_col] = pd.to_datetime(df[date_col])
    df.sort_values(date_col, inplace=True)
    stock_names = [col.replace('Close', '') for col in df.columns if col.endswith('Close')]
    result = {}
    last_date = df[date_col].iloc[-1]
    lookup = df.set_index(date_col)

    def get_prev(date, days):
        target = date - pd.Timedelta(days=days)
        prev = lookup[lookup.index <= target]
        return prev.iloc[-1] if not prev.empty else lookup.iloc[0]

    for stock in stock_names:
        if f'{stock}Close' not in lookup.columns or pd.isna(lookup[f'{stock}Close'].iloc[-1]):
            continue

        last_close = lookup[f'{stock}Close'].iloc[-1]
        last_volume = lookup[f'{stock}Volume'].iloc[-1]
        prev_day_row = lookup.iloc[-2] if len(lookup) > 1 else lookup.iloc[0]
        prev_week_row = get_prev(last_date, 7)
        prev_month_row = get_prev(last_date, 30)
        prev_year_row = get_prev(last_date, 365)

        def pct_change(now, prev): return 100 * (now - prev) / prev if prev != 0 else 0

        result[stock] = {
            'last_close': last_close,
            'last_day_pct': pct_change(last_close, prev_day_row[f'{stock}Close']),
            'last_day_volume': abs(last_volume - prev_day_row[f'{stock}Volume']),
            'last_week_pct': pct_change(last_close, prev_week_row[f'{stock}Close']),
            'last_week_volume': abs(last_volume - prev_week_row[f'{stock}Volume']),
            'last_month_pct': pct_change(last_close, prev_month_row[f'{stock}Close']),
            'last_month_volume': abs(last_volume - prev_month_row[f'{stock}Volume']),
            'last_year_pct': pct_change(last_close, prev_year_row[f'{stock}Close']),
            'last_year_volume': abs(last_volume - prev_year_row[f'{stock}Volume']),
        }
    return result

# ---------- Load All Datasets ----------
index_data = {}
for name, path in file_paths.items():
    try:
        df = pd.read_csv(path)
        index_data[name] = compute_pct_changes(df)
    except FileNotFoundError:
        print(f"⚠ File not found: {path}")
        index_data[name] = {}

# ---------- Visualization Helpers ----------
def get_top_bottom(data, period, top_n=5):
    valid_data = {k: v for k, v in data.items() if pd.notna(v.get(period))}
    sorted_stocks = sorted(valid_data.items(), key=lambda x: x[1][period])
    losers = sorted_stocks[:top_n]
    gainers = sorted_stocks[-top_n:]
    return sorted(losers + gainers, key=lambda x: x[1][period])

def create_symmetrical_bar(period, data, title):
    if not data:
        return go.Figure().update_layout(title="No data available", paper_bgcolor='black', font=dict(color='white'))

    top_bottom_stocks = get_top_bottom(data, period)
    stock_names = [s[0].strip('_') for s in top_bottom_stocks]
    pct_changes = [s[1][period] for s in top_bottom_stocks]
    colors = ['#d62728' if pct < 0 else '#2ca02c' for pct in pct_changes]
    closes = [s[1]['last_close'] for s in top_bottom_stocks]
    volumes = [s[1].get(period.replace('_pct', '_volume'), 0) for s in top_bottom_stocks]

    hover_texts = [
        f"<b>{name}</b><br>"
        f"Change: {pct:.2f}%<br>"
        f"Close: {close:,.2f}<br>"
        f"Volume Δ: {int(vol):,}<extra></extra>"
        for name, pct, close, vol in zip(stock_names, pct_changes, closes, volumes)
    ]

    text_labels = [f'{pct:.2f}%' for pct in pct_changes]

    fig = go.Figure(go.Bar(
        x=pct_changes,
        y=stock_names,
        orientation='h',
        marker=dict(color=colors, line=dict(color='white', width=1)),
        text=text_labels,
        textposition='outside',
        textfont=dict(size=12, color='white'),
        hoverinfo='text',
        hovertext=hover_texts
    ))

    max_abs_change = max(abs(p) for p in pct_changes)
    axis_limit = max_abs_change * 1.25

    fig.update_layout(
        title=dict(text=title, x=0.5, font=dict(size=22, family='Arial Black')),
        xaxis=dict(
            title='Percentage Change (%)',
            range=[-axis_limit, axis_limit],
            showgrid=True,
            gridcolor='rgba(255,255,255,0.1)',
            zeroline=True,
            zerolinecolor='rgba(255,255,255,0.5)',
            zerolinewidth=2
        ),
        yaxis=dict(title='Stock', tickfont=dict(size=12)),
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        font=dict(color='white'),
        height=600,
        margin=dict(l=120, r=60, t=80, b=60),
        showlegend=False
    )
    return fig

# ---------- App Layout ----------
app = Dash(__name__, external_stylesheets=[dbc.themes.SLATE])

tabs_config = [
    {'label': 'Last Day', 'value': 'tab-day', 'period': 'last_day_pct', 'title': 'Top 5 Gainers & Losers - Daily'},
    {'label': 'Last Week', 'value': 'tab-week', 'period': 'last_week_pct', 'title': 'Top 5 Gainers & Losers - Weekly'},
    {'label': 'Last Month', 'value': 'tab-month', 'period': 'last_month_pct', 'title': 'Top 5 Gainers & Losers - Monthly'},
    {'label': 'Last Year', 'value': 'tab-year', 'period': 'last_year_pct', 'title': 'Top 5 Gainers & Losers - Yearly'},
]

app.layout = dbc.Container([
    html.Br(),
    html.H1("🌍 Global Market Performance Dashboard",
            className="text-center text-info mb-4",
            style={'fontFamily': 'Trebuchet MS', 'fontWeight': 'bold'}),

    # Dropdown for index selection
    dbc.Row([
        dbc.Col([
            html.Label("Select Stock Index:", style={'color': '#00FFFF', 'fontSize': '18px'}),
            dcc.Dropdown(
                id='index-selector',
                options=[{'label': name, 'value': name} for name in index_data.keys()],
                value='Dow Jones',
                clearable=False,
                style={'backgroundColor': '#222', 'color': 'black'}
            )
        ], width=6)
    ], className="mb-4 justify-content-center"),

    # Top Gainer / Loser cards
    dbc.Row([
        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Top Gainer", className="card-title text-success"),
                html.H3(id='top-gainer-name'),
                html.P(id='top-gainer-value', className="text-success"),
                html.P(id='top-gainer-close', className="text-light mb-0"),
                html.P(id='top-gainer-vol', className="text-info mb-0")
            ])
        ], color="dark", outline=True, style={'borderRadius': '12px', 'boxShadow': '0 0 10px #00ff99'})),

        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Top Loser", className="card-title text-danger"),
                html.H3(id='top-loser-name'),
                html.P(id='top-loser-value', className="text-danger"),
                html.P(id='top-loser-close', className="text-light mb-0"),
                html.P(id='top-loser-vol', className="text-info mb-0")
            ])
        ], color="dark", outline=True, style={'borderRadius': '12px', 'boxShadow': '0 0 10px #ff4d4d'}))
    ], className="mb-4"),

    # Tabs for time periods
    dcc.Tabs(id='performance-tabs', value='tab-day', children=[
        dcc.Tab(label=t['label'], value=t['value'],
                style={'padding': '12px', 'backgroundColor': '#222'},
                selected_style={'padding': '12px', 'fontWeight': 'bold', 'borderBottom': '3px solid #00FFFF'})
        for t in tabs_config
    ]),
    
    # Visualization container
    html.Div(id='tab-content', style={'backgroundColor': '#212529', 'padding': '20px', 'borderRadius': '15px'})
], fluid=True)

# ---------- Callback ----------
@app.callback(
    [Output('tab-content', 'children'),
     Output('top-gainer-name', 'children'),
     Output('top-gainer-value', 'children'),
     Output('top-gainer-close', 'children'),
     Output('top-gainer-vol', 'children'),
     Output('top-loser-name', 'children'),
     Output('top-loser-value', 'children'),
     Output('top-loser-close', 'children'),
     Output('top-loser-vol', 'children')],
    [Input('index-selector', 'value'),
     Input('performance-tabs', 'value')]
)
def update_dashboard(selected_index, tab_value):
    change_dict = index_data.get(selected_index, {})
    config = next(t for t in tabs_config if t['value'] == tab_value)
    period = config['period']
    title = f"{selected_index} — {config['title']}"

    fig = create_symmetrical_bar(period, change_dict, title)
    graph = dcc.Graph(figure=fig, style={'height': '650px'})

    if not change_dict:
        return graph, "-", "-", "-", "-", "-", "-", "-", "-"

    valid_data = {k: v for k, v in change_dict.items() if pd.notna(v.get(period))}
    if not valid_data:
        return graph, "-", "-", "-", "-", "-", "-", "-", "-"

    top_gainer = max(valid_data.items(), key=lambda x: x[1][period])
    top_loser = min(valid_data.items(), key=lambda x: x[1][period])

    g_name = top_gainer[0].strip('_')
    g_val = f"+{top_gainer[1][period]:.2f}%"
    g_close = f"Close: {top_gainer[1]['last_close']:,.2f}"
    g_vol = f"Volume Δ: {int(top_gainer[1][period.replace('_pct', '_volume')]):,}"

    l_name = top_loser[0].strip('_')
    l_val = f"{top_loser[1][period]:.2f}%"
    l_close = f"Close: {top_loser[1]['last_close']:,.2f}"
    l_vol = f"Volume Δ: {int(top_loser[1][period.replace('_pct', '_volume')]):,}"

    return graph, g_name, g_val, g_close, g_vol, l_name, l_val, l_close, l_vol


if __name__ == '__main__':
    app.run(debug=True)