# Dependencies

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from dash import Dash, dcc, html, Input, Output

# Import data

In [2]:
sp500 = pd.read_excel('data/s&p500.xlsx')
sp500_top10_holders = pd.read_excel('data/sp500_top10_holders.xlsx')

In [3]:
# Fix index and column names
sp500.columns = sp500.iloc[0]
sp500 = sp500[1:]

sp500_top10_holders.columns = sp500_top10_holders.iloc[0]
sp500_top10_holders = sp500_top10_holders[1:]

In [4]:
sp500.head()

Unnamed: 0,symbol,isin,shortName,longName,quoteType,address1,city,state,zip,country,...,returnOnEquity,freeCashflow,operatingCashflow,earningsGrowth,revenueGrowth,grossMargins,ebitdaMargins,operatingMargins,financialCurrency,longBusinessSummary
1,NVDA,US67066G1040,NVIDIA Corporation,NVIDIA Corporation,EQUITY,2788 San Tomas Expressway,Santa Clara,CA,95051,United States,...,1.23767,33725874176,48663998464,1.68,1.224,0.75976,0.6353,0.62057,USD,NVIDIA Corporation provides graphics and compu...
2,META,US30303M1027,"Meta Platforms, Inc.","Meta Platforms, Inc.",EQUITY,1 Meta Way,Menlo Park,CA,94025,United States,...,0.35375,32011874304,78421999616,0.732,0.221,0.81491,0.49921,0.38105,USD,"Meta Platforms, Inc. engages in the developmen..."
3,AVGO,US11135F1012,Broadcom Inc.,Broadcom Inc.,EQUITY,3421 Hillview Ave,Palo Alto,CA,94304,United States,...,0.12509,25984124928,19185999872,1.881,0.164,0.74713,0.4904,0.31765,USD,"Broadcom Inc. designs, develops, and supplies ..."
4,V,US92826C8394,Visa Inc.,Visa Inc.,EQUITY,PO Box 8999,San Francisco,CA,94128-8999,United States,...,0.48548,14696625152,20213000192,0.202,0.096,0.97803,0.69832,0.66865,USD,Visa Inc. operates as a payment technology com...
5,UNH,US91324P1021,UnitedHealth Group Incorporated,UnitedHealth Group Incorporated,EQUITY,UnitedHealth Group Center,Minnetonka,MN,55343,United States,...,0.15584,-11781999616,9598999552,-0.22,0.064,0.23467,0.09034,0.07966,USD,UnitedHealth Group Incorporated operates as a ...


## Clean shareholders table

In [5]:
# Delete rows 'Top mutual fund holders' in 'class' column
sp500_top10_holders = sp500_top10_holders[sp500_top10_holders['class'] != 'Top mutual fund Holders']
sp500_top10_holders.shape

# Remove duplicated rows
sp500_top10_holders = sp500_top10_holders.drop_duplicates()
sp500_top10_holders.shape

(4710, 8)

## Merge Data

In [6]:
merged_df = pd.merge(sp500_top10_holders, sp500, on=['symbol', 'isin'])
merged_df.head()

Unnamed: 0,isin,symbol,class,name,shares,date reported,%,value,shortName,longName,...,returnOnEquity,freeCashflow,operatingCashflow,earningsGrowth,revenueGrowth,grossMargins,ebitdaMargins,operatingMargins,financialCurrency,longBusinessSummary
0,US67066G1040,NVDA,Top Institutional Holders,Vanguard Group Inc,2.14B,"Jun 30, 2024",8.74,264902353368,NVIDIA Corporation,NVIDIA Corporation,...,1.23767,33725874176,48663998464,1.68,1.224,0.75976,0.6353,0.62057,USD,NVIDIA Corporation provides graphics and compu...
1,US67066G1040,NVDA,Top Institutional Holders,Blackrock Inc.,1.84B,"Jun 30, 2024",7.5,227168271024,NVIDIA Corporation,NVIDIA Corporation,...,1.23767,33725874176,48663998464,1.68,1.224,0.75976,0.6353,0.62057,USD,NVIDIA Corporation provides graphics and compu...
2,US67066G1040,NVDA,Top Institutional Holders,"FMR, LLC",1.04B,"Jun 30, 2024",4.26,128932216839,NVIDIA Corporation,NVIDIA Corporation,...,1.23767,33725874176,48663998464,1.68,1.224,0.75976,0.6353,0.62057,USD,NVIDIA Corporation provides graphics and compu...
3,US67066G1040,NVDA,Top Institutional Holders,State Street Corporation,981.49M,"Jun 30, 2024",4.0,121223675015,NVIDIA Corporation,NVIDIA Corporation,...,1.23767,33725874176,48663998464,1.68,1.224,0.75976,0.6353,0.62057,USD,NVIDIA Corporation provides graphics and compu...
4,US67066G1040,NVDA,Top Institutional Holders,"Geode Capital Management, LLC",534.55M,"Jun 30, 2024",2.18,66022706767,NVIDIA Corporation,NVIDIA Corporation,...,1.23767,33725874176,48663998464,1.68,1.224,0.75976,0.6353,0.62057,USD,NVIDIA Corporation provides graphics and compu...


In [7]:
# merged_merged_merged_merged_df[['isin', 'symbol']].nunique()
merged_df.shape


(4710, 42)

## Companies not located on US

In [8]:
merged_df['country'].unique()

array(['United States', 'Ireland', 'Netherlands', 'Switzerland',
       'Bermuda', 'Singapore', 'United Kingdom', 'Canada'], dtype=object)

## Convert values

In [9]:
# Function to convert values with 'B' and 'M' suffixes to numeric
def convert_shares(value):
    if 'B' in value:
        return float(value.replace('B', '')) * 1e9
    elif 'M' in value:
        return float(value.replace('M', '')) * 1e6
    elif 'k' in value:
        return float(value.replace('k', '')) * 1e3
    else:
        return float(value)

# Apply the function to the 'shares' column
merged_df['shares'] = merged_df['shares'].apply(convert_shares)

In [10]:
# List of columns to convert to numeric
columns_to_numeric = [
    '%', 'value', 'marketCap', 'enterpriseValue', 'totalCash', 'totalCashPerShare',
    'ebitda', 'totalDebt', 'quickRatio', 'currentRatio', 'totalRevenue', 'debtToEquity',
    'revenuePerShare', 'returnOnAssets', 'returnOnEquity', 'freeCashflow', 'operatingCashflow',
    'earningsGrowth', 'revenueGrowth', 'grossMargins', 'ebitdaMargins', 'operatingMargins'
]

# Convert specified columns to numeric
merged_df[columns_to_numeric] = merged_df[columns_to_numeric].apply(pd.to_numeric, errors='coerce')

# Display the data types to verify the changes
merged_df.head()


Unnamed: 0,isin,symbol,class,name,shares,date reported,%,value,shortName,longName,...,returnOnEquity,freeCashflow,operatingCashflow,earningsGrowth,revenueGrowth,grossMargins,ebitdaMargins,operatingMargins,financialCurrency,longBusinessSummary
0,US67066G1040,NVDA,Top Institutional Holders,Vanguard Group Inc,2140000000.0,"Jun 30, 2024",8.74,264902353368,NVIDIA Corporation,NVIDIA Corporation,...,1.23767,33725870000.0,48664000000.0,1.68,1.224,0.75976,0.6353,0.62057,USD,NVIDIA Corporation provides graphics and compu...
1,US67066G1040,NVDA,Top Institutional Holders,Blackrock Inc.,1840000000.0,"Jun 30, 2024",7.5,227168271024,NVIDIA Corporation,NVIDIA Corporation,...,1.23767,33725870000.0,48664000000.0,1.68,1.224,0.75976,0.6353,0.62057,USD,NVIDIA Corporation provides graphics and compu...
2,US67066G1040,NVDA,Top Institutional Holders,"FMR, LLC",1040000000.0,"Jun 30, 2024",4.26,128932216839,NVIDIA Corporation,NVIDIA Corporation,...,1.23767,33725870000.0,48664000000.0,1.68,1.224,0.75976,0.6353,0.62057,USD,NVIDIA Corporation provides graphics and compu...
3,US67066G1040,NVDA,Top Institutional Holders,State Street Corporation,981490000.0,"Jun 30, 2024",4.0,121223675015,NVIDIA Corporation,NVIDIA Corporation,...,1.23767,33725870000.0,48664000000.0,1.68,1.224,0.75976,0.6353,0.62057,USD,NVIDIA Corporation provides graphics and compu...
4,US67066G1040,NVDA,Top Institutional Holders,"Geode Capital Management, LLC",534550000.0,"Jun 30, 2024",2.18,66022706767,NVIDIA Corporation,NVIDIA Corporation,...,1.23767,33725870000.0,48664000000.0,1.68,1.224,0.75976,0.6353,0.62057,USD,NVIDIA Corporation provides graphics and compu...


# Treemap

## Sharehold -> Company -> Details

In [11]:
# Create a tree map using plotly.express
fig = px.treemap(
    merged_df, 
    path=['name', 'symbol'],
    values='value',
    color='value', 
    hover_data=['shares'],
    color_continuous_scale='RdBu'
)

# Change the figure size
fig.update_layout(
    width=1000,
    height=800
)
fig.show()

# Treemap 2

In [12]:
import plotly.express as px

app = Dash(__name__)

app.layout = html.Div([
    html.H4("Interactive TreeMap with Dash"),
    html.P("Filter options:"),
    dcc.RadioItems(
        id='filter-options',
        value='shareholder',
        options=['shareholder', 'sector', 'industry', 'country'],
    ),
    dcc.Graph(id="graph"),
], style={'backgroundColor': 'white'})

@app.callback(
    Output("graph", "figure"),
    Input("filter-options", "value"))
def generate_chart(mode):
    df = px.data.tips()  # replace with your own data source
    if mode == 'shareholder':
        merged_df["filter"] = merged_df["name"]
    elif mode == 'sector':
        merged_df["filter"] = merged_df["sector"]
    elif mode == 'industry':
        merged_df["filter"] = merged_df["industry"]
    elif mode == 'country':
        merged_df["filter"] = merged_df["country"]

    fig = px.treemap(
        merged_df,
        path=[px.Constant("sp500"), 'filter', 'symbol'],
        values='value',
        color='value',
        hover_data=['shares'],
        title=f"Values grouped by {mode}",
        color_continuous_scale='RdBu'
    )

    return fig

if __name__ == '__main__':
    app.run_server(debug=True)

In [None]:
# TODO add context - Later
# TODO seperate notebook into 2 files: preprocessing and treemap - Gui
# TODO make it appear in a seperate window/browser - Gui
# TODO Shareholder -> sector - Gui
# TODO Shareholder -> company - Gui
# TODO change color scheme - Iva
# TODO remove white border from squares - Iva 
# TODO card color different from background -  Iva
