# Dependencies

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from dash import Dash, dcc, html, Input, Output

# Import data

In [None]:
sp500 = pd.read_excel('data/s&p500.xlsx')
sp500_top10_holders = pd.read_excel('data/sp500_top10_holders.xlsx')

In [None]:
# Fix index and column names
sp500.columns = sp500.iloc[0]
sp500 = sp500[1:]

sp500_top10_holders.columns = sp500_top10_holders.iloc[0]
sp500_top10_holders = sp500_top10_holders[1:]

In [None]:
sp500.head()

## Clean shareholders table

In [None]:
# Delete rows 'Top mutual fund holders' in 'class' column
sp500_top10_holders = sp500_top10_holders[sp500_top10_holders['class'] != 'Top mutual fund Holders']
sp500_top10_holders.shape

# Remove duplicated rows
sp500_top10_holders = sp500_top10_holders.drop_duplicates()
sp500_top10_holders.shape

## Merge Data

In [None]:
merged_df = pd.merge(sp500_top10_holders, sp500, on=['symbol', 'isin'])
merged_df.head()

In [None]:
# merged_merged_merged_merged_df[['isin', 'symbol']].nunique()
merged_df.shape


## Companies not located on US

In [None]:
merged_df['country'].unique()

## Convert values

In [None]:
# Function to convert values with 'B' and 'M' suffixes to numeric
def convert_shares(value):
    if 'B' in value:
        return float(value.replace('B', '')) * 1e9
    elif 'M' in value:
        return float(value.replace('M', '')) * 1e6
    elif 'k' in value:
        return float(value.replace('k', '')) * 1e3
    else:
        return float(value)

# Apply the function to the 'shares' column
merged_df['shares'] = merged_df['shares'].apply(convert_shares)

In [None]:
# List of columns to convert to numeric
columns_to_numeric = [
    '%', 'value', 'marketCap', 'enterpriseValue', 'totalCash', 'totalCashPerShare',
    'ebitda', 'totalDebt', 'quickRatio', 'currentRatio', 'totalRevenue', 'debtToEquity',
    'revenuePerShare', 'returnOnAssets', 'returnOnEquity', 'freeCashflow', 'operatingCashflow',
    'earningsGrowth', 'revenueGrowth', 'grossMargins', 'ebitdaMargins', 'operatingMargins'
]

# Convert specified columns to numeric
merged_df[columns_to_numeric] = merged_df[columns_to_numeric].apply(pd.to_numeric, errors='coerce')

# Display the data types to verify the changes
merged_df.head()


# Treemap

## Sharehold -> Company -> Details

In [None]:
# Create a tree map using plotly.express
fig = px.treemap(
    merged_df, 
    path=['name', 'symbol'],
    values='value',
    color='value', 
    hover_data=['shares'],
    color_continuous_scale='RdBu'
)

# Change the figure size
fig.update_layout(
    width=1000,
    height=800
)
fig.show()

# Treemap 2

In [39]:
import plotly.express as px

app = Dash(__name__)

app.layout = html.Div([
    html.H4("Interactive TreeMap with Dash"),
    html.P("Filter options:"),
    dcc.RadioItems(
        id='filter-options',
        value='shareholder',
        options=['shareholder', 'sector', 'industry', 'country'],
    ),
    dcc.Graph(id="graph"),
], style={'backgroundColor': 'white'})

@app.callback(
    Output("graph", "figure"),
    Input("filter-options", "value"))
def generate_chart(mode):
    df = px.data.tips()  # replace with your own data source
    if mode == 'shareholder':
        merged_df["filter"] = merged_df["name"]
    elif mode == 'sector':
        merged_df["filter"] = merged_df["sector"]
    elif mode == 'industry':
        merged_df["filter"] = merged_df["industry"]
    elif mode == 'country':
        merged_df["filter"] = merged_df["country"]

    fig = px.treemap(
        merged_df,
        path=[px.Constant("sp500"), 'filter', 'symbol'],
        values='value',
        color='value',
        hover_data=['shares'],
        title=f"Values grouped by {mode}",
        color_continuous_scale='RdBu'
    )

    return fig

if __name__ == '__main__':
    app.run_server(debug=True)

In [None]:
# TODO add context
# TODO seperate notebook into 2 files: preprocessing and treemap
# TODO make it appear in a seperate window/browser
# TODO Shareholder -> sector
# TODO Shareholder -> company
# TODO change color scheme
# TODO remove white border from squares
# TODO card color different from background
