In [3]:
import plotly.graph_objects as go

# Initialize the figure object
fig = go.Figure()

# Add the Scattergeo trace to mark a location
fig.add_trace(go.Scattergeo(
    # Coordinates for New York City (approx. S&P 500 location)
    lon = [-74.0060],
    lat = [40.7128],
    mode = 'markers+text', # Show both a marker and text label
    marker = dict(
        size = 10, # Marker size (5 * 2)
        color = 'red',
        # colorscale is typically used when plotting multiple points with varying Z values,
        # but kept here for consistency.
        colorscale = 'Viridis' 
    ),
    text = ['S&P 500'],
    textfont = dict(
        size = 12,
        color = "red"
    ),
    # To place the text above the marker
    textposition = "top center" 
))

# Configure the geographical settings (layout)
fig.update_geos(
    projection_type = 'orthographic', # This gives the 3D globe effect
    showcountries = True,
    showocean = True,
    oceancolor = '#a1c2fa', # Slightly nicer light blue
    countrycolor = '#333333',
    landcolor = '#e0e0e0',
    bgcolor = '#f4f4f4' # Background color for the map area
)

# Update the layout for a title and margin
fig.update_layout(
    title_text = 'S&P 500 Location (New York) on a 3D Globe',
    title_font_size = 24,
    margin={"r":0,"t":50,"l":0,"b":0}
)

# This command is crucial: it opens the interactive Plotly figure 
# in your default web browser for viewing.
fig.show()


In [4]:
import plotly.graph_objects as go
import pandas as pd # We need pandas to create the DataFrame (df) used in the plot

# --- 1. Define the missing DataFrame (df) ---
# We create sample data representing a few major global indices, 
# including a numerical value (Market_Cap_Billion) to drive the colorization.
data = {
    'Index_Name': ['S&P 500 (USA)', 'FTSE 100 (UK)', 'Nikkei 225 (Japan)', 'Hang Seng (HK)'],
    'Latitude': [40.7128, 51.5074, 35.6895, 22.3193],
    'Longitude': [-74.0060, -0.1278, 139.6917, 114.1694],
    'Market_Cap_Billion': [45000, 3500, 6000, 5000] 
}
df = pd.DataFrame(data)

# Initialize the figure object
fig = go.Figure()

# Add the Scattermapbox trace
fig.add_trace(go.Scattermapbox(
    lon = df['Longitude'],
    lat = df['Latitude'],
    mode = 'markers+text',
    
    # --- 2. Colorization Implementation ---
    marker = dict(
        # Set the size dynamically based on Market Cap
        size = df['Market_Cap_Billion'] / 500, 
        
        # Set the color dynamically based on Market Cap
        color = df['Market_Cap_Billion'],      
        
        # Choose a dynamic colorscale (Try 'Viridis', 'Plasma', or 'Electric')
        colorscale = 'Electric',               
        showscale = True,                      # Display the color bar legend
        colorbar = dict(
            title="Market Cap ($B)",
            orientation="h"
        )
    ),
    text = df['Index_Name'],
    textfont = dict(size=10, color='black'),
    textposition = "top right",
    hoverinfo = 'text',
    hovertext = df['Index_Name'] + '<br>Market Cap: $' + (df['Market_Cap_Billion']).astype(str) + 'B'
))

# Configure the mapbox layout
fig.update_layout(
    title_text = 'Global Stock Market Indices by Market Cap (Mapbox)',
    mapbox = dict(
        style = 'open-street-map', # FREE, no API key required for this style!
        zoom = 1.2,
        center = dict(lon=20, lat=30)
    ),
    margin={"r":0,"t":50,"l":0,"b":0}
)

# This command is crucial: it opens the interactive Plotly figure 
# in your default web browser for viewing.
fig.show()


In [7]:
import pandas as pd
import plotly.express as px  # Import the high-level express library



# --- 1. Load and Prepare Your Actual Data ---
# Make sure this CSV file is in the same directory as your script,
# or provide the full path to it.
CSV_FILE_PATH = r'D:\Stock-Market-Indices\data\stock_indices_locations_final.csv'
try:
    df = pd.read_csv(CSV_FILE_PATH)
except FileNotFoundError:
    print(f"Error: The file '{CSV_FILE_PATH}' was not found.")
    exit()

# --- 2. Data Cleaning ---
df.dropna(subset=['Latitude', 'Longitude'], inplace=True)
# (Location data will be regenerated, so we don't need to convert lat/lon here yet)

# --- 3. THE DEFINITIVE FIX: Manual Override Dictionary ---
# This dictionary is now the primary source of truth for all known problematic tickers.
TICKER_TO_CITY_OVERRIDE = {
    # Asia-Pacific Corrections
    "TPX.F": "Tokyo, Japan",
    "^KSE": "Karachi, Pakistan",
    "^CSE": "Colombo, Sri Lanka",
    "^VNINDEX": "Ho Chi Minh City, Vietnam",
    # European Corrections
    "^CROBEX": "Zagreb, Croatia",
    "^PX": "Prague, Czech Republic",
    "^ATG": "Athens, Greece",
    "^BUX": "Budapest, Hungary",
    "^OBX": "Oslo, Norway",
    "^WIG20": "Warsaw, Poland",
    "^BELEX15": "Belgrade, Serbia",
    "^SMSI": "Madrid, Spain",
    "^SLI": "Zurich, Switzerland",
    # Americas Corrections
    "^SPBLPGPT": "Lima, Peru",
    "^IBC": "Caracas, Venezuela",
    # Defaulting to New York Corrections
    "AIA": "Hong Kong", # S&P Asia 50 ETF
    "IEV": "Zurich, Switzerland", # S&P Europe 350 ETF
    "ILF": "New York, USA", # S&P Latin America 40 ETF is US-based
}

# --- 4. A New, More Robust Location Assignment Logic ---
# We regenerate the 'City' column based on our new two-layered approach.
def get_correct_city(row):
    ticker = row['Ticker']
    # Layer 1: Check our manual override first
    if ticker in TICKER_TO_CITY_OVERRIDE:
        return TICKER_TO_CITY_OVERRIDE[ticker]
    # Layer 2: If not in override, use the original City from the CSV
    else:
        return row['City']

df['Corrected_City'] = df.apply(get_correct_city, axis=1)

# --- 5. Assign Regions based on the CORRECTED City ---
def assign_region(city_string):
    if not isinstance(city_string, str): return 'Other'
    city_lower = city_string.lower()
    americas_locations = ['usa', 'canada', 'mexico', 'brazil', 'argentina', 'chile', 'colombia', 'peru', 'venezuela']
    if any(loc in city_lower for loc in americas_locations): return 'Americas'
    emea_locations = ['uk', 'germany', 'france', 'switzerland', 'spain', 'italy', 'netherlands', 'belgium', 'sweden', 'denmark', 'finland', 'norway', 'poland', 'austria', 'greece', 'ireland', 'portugal', 'hungary', 'czech', 'south africa', 'egypt', 'morocco', 'israel', 'saudi arabia', 'turkey', 'serbia', 'croatia']
    if any(loc in city_lower for loc in emea_locations): return 'Europe & EMEA'
    apac_locations = ['japan', 'china', 'hong kong', 'india', 'australia', 'south korea', 'taiwan', 'singapore', 'indonesia', 'malaysia', 'thailand', 'philippines', 'pakistan', 'new zealand', 'sri lanka', 'vietnam']
    if any(loc in city_lower for loc in apac_locations): return 'Asia-Pacific'
    return 'Other'

df['Region'] = df['Corrected_City'].apply(assign_region)

# --- 6. Aggregate Data by the CORRECTED Location ---
location_groups = df.groupby(['Corrected_City', 'Latitude', 'Longitude', 'Region'])
def format_hover_names(names, limit=10):
    if len(names) > limit:
        return '<br>'.join(names.head(limit)) + f'<br>... and {len(names) - limit} more.'
    return '<br>'.join(names)
aggregated_df = location_groups.agg(
    Index_Count=('Ticker', 'size'),
    Hover_Text=('Index Name', format_hover_names),
    Full_List=('Index Name', lambda names: list(names))
).reset_index()

# --- 7. Create the Plot ---
fig = px.scatter_mapbox(
    data_frame=aggregated_df,
    lat="Latitude",
    lon="Longitude",
    color="Region",
    size="Index_Count",
    size_max=50,
    hover_name="Corrected_City",
    hover_data={"Hover_Text": True, "Index_Count": True},
    title="Global Stock Market Indices: Major Financial Centers (Corrected)",
    zoom=1.2,
    center={"lat": 30, "lon": 20}
)
fig.update_traces(hovertemplate="<b>%{hovertext}</b><br><br>Number of Indices: %{customdata[1]}<br>--------------------<br>%{customdata[0]}<extra></extra>")
fig.update_layout(mapbox_style="carto-positron", margin={"r":0,"t":50,"l":0,"b":0}, legend_title_text='Region')
fig.show()

# --- 8. Print the Corrected Detailed List ---
print("\n" + "="*60)
print("--- Detailed Lists for Major Financial Hubs (Corrected Data) ---")
print("="*60)
major_hubs = aggregated_df[aggregated_df['Index_Count'] > 1].sort_values('Index_Count', ascending=False)
for index, row in major_hubs.iterrows():
    print(f"\n📍 {row['Corrected_City']} ({row['Index_Count']} Indices):")
    for idx_name in row['Full_List']:
        print(f"   - {idx_name}")


--- Detailed Lists for Major Financial Hubs (Corrected Data) ---

📍 New York, USA (28 Indices):
   - Dow Jones Global Titans 50
   - MSCI World
   - S&P Global 100
   - S&P Global 1200
   - The Global Dow
   - MSCI EAFE
   - S&P Latin America 40
   - MASI index (Morocco)
   - Dow Jones Industrial Average
   - Dow Jones Transportation Average
   - Dow Jones Utility Average
   - Nasdaq Composite
   - Nasdaq-100
   - Russell 1000
   - Russell 2000
   - Russell 3000
   - Russell MidCap
   - S&P 100
   - S&P 500
   - S&P MidCap 400
   - S&P SmallCap 600
   - Wilshire 5000
   - FTSE 350 Index (UK)
   - Amex Oil Index (Energy)
   - PHLX Semiconductor Sector (Electronics)
   - HUI Gold Index (Metals)
   - Philadelphia Gold and Silver Index (Metals)
   - Palisades Water Index (ZWI)

📍 Shanghai, China (4 Indices):
   - SSE Composite Index (China)
   - SZSE Component Index (China)
   - CSI 300 Index (China)
   - SSE 50 Index (China)

📍 London, UK (4 Indices):
   - FTSE All-World
   - FTSE 100 In

In [10]:
import pandas as pd
import plotly.express as px
import requests
import time
import logging

# --- 1. Configuration ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
CSV_FILE_PATH = r'D:\Stock-Market-Indices\data\stock_indices_locations_final.csv'

# --- 2. THE DEFINITIVE FIX: A Comprehensive Manual Override Dictionary ---
# This is now our single source of truth for ALL locations.
TICKER_TO_CITY_TRUTH_MAP = {
    # --- Americas ---
    "EFA": "New York, USA", "ILF": "New York, USA", "IWR": "New York, USA", "IJR": "New York, USA",
    "^DJGT": "New York, USA", "URTH": "New York, USA", "^SPG100": "New York, USA", "^SPG1200": "New York, USA",
    "^GDOW": "New York, USA", "^DJI": "New York, USA", "^DJT": "New York, USA", "^DJU": "New York, USA",
    "^IXIC": "New York, USA", "^NDX": "New York, USA", "^RUI": "New York, USA", "^RUT": "New York, USA",
    "^RUA": "New York, USA", "^OEX": "New York, USA", "^GSPC": "New York, USA", "^MID": "New York, USA",
    "^W5000": "New York, USA", "^XOI": "New York, USA", "^SOX": "New York, USA", "^HUI": "New York, USA",
    "^XAU": "New York, USA", "PHO": "New York, USA", "^VIX": "Chicago, USA",
    "^GSPTSE": "Toronto, Canada", "^JX": "Toronto, Canada", "XMI.TO": "Toronto, Canada",
    "^MXX": "Mexico City, Mexico", "^BVSP": "São Paulo, Brazil", "^MERV": "Buenos Aires, Argentina",
    "^IPSA": "Santiago, Chile", "^ICOLCAP": "Bogotá, Colombia", "^SPBLPGPT": "Lima, Peru",
    "^IBC": "Caracas, Venezuela",
    # --- Europe, Middle East & Africa (EMEA) ---
    "VWRA.L": "London, UK", "^FTSE": "London, UK", "^FTMC": "London, UK", "^NMX": "London, UK", "^FTAS": "London, UK",
    "^GDAXI": "Frankfurt, Germany", "^MDAXI": "Frankfurt, Germany", "^TECDAX": "Frankfurt, Germany",
    "^FCHI": "Paris, France", "^CN20": "Paris, France", "^SBF120": "Paris, France",
    "IEV": "Zurich, Switzerland", "^STOXX50E": "Zurich, Switzerland", "^STOXX": "Zurich, Switzerland", "^SSMI": "Zurich, Switzerland", "^SLI": "Zurich, Switzerland",
    "^AEX": "Amsterdam, Netherlands", "^AMX": "Amsterdam, Netherlands",
    "FTSEMIB.MI": "Milan, Italy", "^ISEQ": "Dublin, Ireland",
    "^IBEX": "Madrid, Spain", "^SMSI": "Madrid, Spain",
    "PSI20.LS": "Lisbon, Portugal", "^BFX": "Brussels, Belgium",
    "^ATX": "Vienna, Austria", "^OMX": "Stockholm, Sweden",
    "^OMXC25": "Copenhagen, Denmark", "^OMXH25": "Helsinki, Finland",
    "^OBX": "Oslo, Norway", "^ATG": "Athens, Greece",
    "^BUX": "Budapest, Hungary", "^WIG20": "Warsaw, Poland",
    "^PX": "Prague, Czech Republic", "XU100.IS": "Istanbul, Turkey",
    "^J203": "Johannesburg, South Africa", "^CASE30": "Cairo, Egypt",
    "^MASI": "Casablanca, Morocco", "^TA125": "Tel Aviv, Israel", "^TA35": "Tel Aviv, Israel",
    "^TASI.SR": "Riyadh, Saudi Arabia", "^BELEX15": "Belgrade, Serbia", "^CROBEX": "Zagreb, Croatia",
    # --- Asia-Pacific (APAC) ---
    "AIA": "Hong Kong", "^HSI": "Hong Kong",
    "TPX.F": "Tokyo, Japan", "^N225": "Tokyo, Japan",
    "000001.SS": "Shanghai, China", "000300.SS": "Shanghai, China", "000016.SS": "Shanghai, China",
    "399001.SZ": "Shenzhen, China",
    "^BSESN": "Mumbai, India", "^NSEI": "Mumbai, India", "^NSMIDCP": "Mumbai, India",
    "^AXJO": "Sydney, Australia", "^AORD": "Sydney, Australia", "^AXKO": "Sydney, Australia",

    "^KS11": "Seoul, South Korea", "^TWII": "Taipei, Taiwan",
    "^STI": "Singapore", "^JKSE": "Jakarta, Indonesia",
    "^KLSE": "Kuala Lumpur, Malaysia", "^SET.BK": "Bangkok, Thailand",
    "^PSEI": "Manila, Philippines", "^KSE": "Karachi, Pakistan",
    "^CSE": "Colombo, Sri Lanka", "^VNINDEX": "Ho Chi Minh City, Vietnam",
    "^NZ50": "Wellington, New Zealand"
}

# --- 3. Geolocation Logic with Cache ---
GEO_CACHE = {}
def get_lat_lon(city):
    if city in GEO_CACHE: return GEO_CACHE[city]
    try:
        url = 'https://nominatim.openstreetmap.org/search'
        params = {'q': city, 'format': 'json', 'limit': 1}
        resp = requests.get(url, params=params, headers={'User-Agent': 'GlobalIndexMapper/2.0'})
        resp.raise_for_status()
        results = resp.json()
        if results:
            lat, lon = results[0]['lat'], results[0]['lon']
            GEO_CACHE[city] = (lat, lon); return lat, lon
    except Exception as e:
        logging.error(f"Geolocation failed for {city}: {e}")
    GEO_CACHE[city] = (None, None); return None, None

# --- 4. Main Data Rebuilding and Plotting ---
def main():
    try:
        # Load original data just for the Index Names and Tickers
        df_original = pd.read_csv(CSV_FILE_PATH)
    except FileNotFoundError:
        print(f"Error: The file '{CSV_FILE_PATH}' was not found."); exit()

    # Build a new, clean list of dictionaries from our source of truth
    clean_data = []
    for index, row in df_original.iterrows():
        ticker = row['Ticker']
        if ticker in TICKER_TO_CITY_TRUTH_MAP:
            clean_data.append({
                'Index Name': row['Index Name'],
                'Ticker': ticker,
                'City': TICKER_TO_CITY_TRUTH_MAP[ticker]
            })

    # Convert the clean list to a DataFrame
    df = pd.DataFrame(clean_data)

    # Fetch fresh coordinates for our clean city data
    unique_cities = df['City'].unique()
    logging.info(f"Fetching coordinates for {len(unique_cities)} unique cities...")
    for city in unique_cities:
        get_lat_lon(city)
        time.sleep(1) # Be polite to the API

    df['Latitude'] = df['City'].map(lambda city: GEO_CACHE[city][0])
    df['Longitude'] = df['City'].map(lambda city: GEO_CACHE[city][1])
    df.dropna(subset=['Latitude', 'Longitude'], inplace=True) # Drop any city that failed geocoding

    # Assign regions and aggregate as before
    df['Region'] = df['City'].apply(assign_region) # You can reuse the 'assign_region' function from before
    aggregated_df = df.groupby(['City', 'Latitude', 'Longitude', 'Region']).agg(
        Index_Count=('Ticker', 'size'),
        Hover_Text=('Index Name', lambda names: '<br>'.join(names.head(10)) + (f'<br>... and {len(names) - 10} more.' if len(names) > 10 else '')),
        Full_List=('Index Name', list)
    ).reset_index()

    # Create the plot
    fig = px.scatter_mapbox(
        data_frame=aggregated_df, lat="Latitude", lon="Longitude", color="Region",
        size="Index_Count", size_max=50, hover_name="City", hover_data={"Hover_Text": True, "Index_Count": True},
        title="Global Stock Market Indices: Major Financial Centers (Corrected)",
        zoom=1.2, center={"lat": 30, "lon": 20}
    )
    fig.update_traces(hovertemplate="<b>%{hovertext}</b><br><br>Number of Indices: %{customdata[1]}<br>--------------------<br>%{customdata[0]}<extra></extra>")
    fig.update_layout(mapbox_style="carto-positron", margin={"r":0,"t":50,"l":0,"b":0}, legend_title_text='Region')
    fig.show()
    # Print the corrected detailed list
    print("\n" + "="*60); print("--- Detailed Lists for Major Financial Hubs (Final Corrected Data) ---"); print("="*60)
    major_hubs = aggregated_df[aggregated_df['Index_Count'] > 1].sort_values('Index_Count', ascending=False)
    for index, row in major_hubs.iterrows():
        print(f"\n📍 {row['City']} ({row['Index_Count']} Indices):")
        for idx_name in row['Full_List']: print(f"   - {idx_name}")

def assign_region(city_string):
    # This helper function remains the same
    if not isinstance(city_string, str): return 'Other'
    city_lower = city_string.lower()
    americas = ['usa', 'canada', 'mexico', 'brazil', 'argentina', 'chile', 'colombia', 'peru', 'venezuela']
    emea = ['uk', 'germany', 'france', 'switzerland', 'spain', 'italy', 'netherlands', 'belgium', 'sweden', 'denmark', 'finland', 'norway', 'poland', 'austria', 'greece', 'ireland', 'portugal', 'hungary', 'czech', 'south africa', 'egypt', 'morocco', 'israel', 'saudi arabia', 'turkey', 'serbia', 'croatia']
    apac = ['japan', 'china', 'hong kong', 'india', 'australia', 'south korea', 'taiwan', 'singapore', 'indonesia', 'malaysia', 'thailand', 'philippines', 'pakistan', 'new zealand', 'sri lanka', 'vietnam']
    if any(loc in city_lower for loc in americas): return 'Americas'
    if any(loc in city_lower for loc in emea): return 'Europe & EMEA'
    if any(loc in city_lower for loc in apac): return 'Asia-Pacific'
    return 'Other'

if __name__ == "__main__":
    main()

2025-10-26 16:25:51,207 - INFO - Fetching coordinates for 54 unique cities...



--- Detailed Lists for Major Financial Hubs (Final Corrected Data) ---

📍 New York, USA (26 Indices):
   - Dow Jones Global Titans 50
   - MSCI World
   - S&P Global 100
   - S&P Global 1200
   - The Global Dow
   - MSCI EAFE
   - S&P Latin America 40
   - Dow Jones Industrial Average
   - Dow Jones Transportation Average
   - Dow Jones Utility Average
   - Nasdaq Composite
   - Nasdaq-100
   - Russell 1000
   - Russell 2000
   - Russell 3000
   - Russell MidCap
   - S&P 100
   - S&P 500
   - S&P MidCap 400
   - S&P SmallCap 600
   - Wilshire 5000
   - Amex Oil Index (Energy)
   - PHLX Semiconductor Sector (Electronics)
   - HUI Gold Index (Metals)
   - Philadelphia Gold and Silver Index (Metals)
   - Palisades Water Index (ZWI)

📍 London, UK (5 Indices):
   - FTSE All-World
   - FTSE 100 Index (UK)
   - FTSE MID 250 Index (UK)
   - FTSE 350 Index (UK)
   - FTSE All-Share Index (UK)

📍 Zurich, Switzerland (5 Indices):
   - EURO STOXX 50
   - STOXX Europe 600
   - S&P Europe 350
   - S