Imports

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf

import sys
from pathlib import Path
# import datetime as dt
# from enum import Enum

# import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px

import gc

sys.path.append(str(Path('D:\Desktop\VS\VSCode\Python\StockAnalyzer').resolve()))

Setup

In [None]:
Market_Tickers = ['AAPL', 'TSLA', 'NVDA', 'INTC', 'MSFT',
                  'PFE', 'JNJ', 'MRNA', 'ABT', 'AMGN',
                  'XOM', 'CVX', 'BP', 'SHEL', 'TTE',
                  'JPM', 'BAC', 'C', 'GS', 'MS',
                  'AMZN', 'WMT', 'COST', 'TGT', 'HD',
                  'TSM', 'TM', 'SFTBY', 'BABA'
                ]

start_date = "1995-01-01"
ticker_data = {ticker: yf.Ticker(ticker) for ticker in Market_Tickers}

temp = yf.download(Market_Tickers, start=start_date, group_by='ticker')

# Filter to only 'Close' and 'Volume' columns
tickers_historical_data = temp.loc[:, (slice(None), ["Adj Close"])]
del temp

# tickers_historical_close_price = yf.download(Market_Tickers,start="1995-01-01")["Close", "Adj. Close"]

Analysis

In [None]:
# DailyReturns = pd.DataFrame({ticker: tickers_historical_data[ticker]["Close"].pct_change()*100 for ticker in Market_Tickers})

Calculate market cap

In [None]:
def calculate_market_caps(prices, ticker):
    """
    Backpropagates market cap for each day, adjusting the number of shares based on 
    historical stock actions (splits, buybacks).
    
    :param prices_df: DataFrame of daily stock prices with 'Close' column.
    :param ticker_obj: yfinance.Ticker object for the stock.
    :return: DataFrame with added 'Market Cap' column.
    """
    
    ## Initialize shares_outstanding
    # Try to get the sharesOutstanding (if available)
    shares_outstanding = ticker.info.get('sharesOutstanding', None)
    
    # If sharesOutstanding is not available, fall back to using floatShares (if available)
    if shares_outstanding is None:
        try:
            shares_outstanding = ticker.info.get('floatShares', None)
        except:
            pass
        if shares_outstanding is None:
            print(f"Warning: Both sharesOutstanding and floatShares are missing for {ticker.ticker}. Cannot calculate market cap.")
            return None  # Return None if both are missing
    
    adj_close_prices = prices.to_numpy()
    adj_close_prices = adj_close_prices.reshape(-1)  # Flatten if necessary (e.g., in case it's a 2D array)

    market_caps = adj_close_prices * shares_outstanding

    # return market_caps
    return pd.Series(market_caps, index=prices.index)


Calc stock weights

In [None]:
def calculate_stock_weights(market_caps):
    """
    Optimized version to calculate the weight of each stock in the market on each date.
    
    :param market_caps: pandas DataFrame of market caps, indexed by dates, with columns for each stock.
    :return: pandas DataFrame with the weight of each stock on each date.
    """
    # Calculate the total market cap for each date in a vectorized way
    total_market_caps = market_caps.sum(axis=1)

    # Avoid division by zero by filling any NaNs in total_market_caps with 1 (to keep results valid)
    # The result will still be NaN for dates with zero market cap
    total_market_caps = total_market_caps.replace(0, 1)

    # Calculate weights using broadcasting (no loop needed)
    stock_weights = market_caps.div(total_market_caps, axis=0)
    
    return stock_weights

Build Market Caps history for each ticker

In [None]:
MarketCap_data = pd.DataFrame(columns=Market_Tickers,index=tickers_historical_data.index)

for ticker in Market_Tickers:
    print(ticker)
    MarketCap_data[ticker] = calculate_market_caps(     tickers_historical_data[ticker],
                                                        ticker_data[ticker])
    gc.collect()  # Force garbage collection to free memory

# market_daily_returns = DailyReturns.mean(axis=1,skipna=True)
# market_daily_returns[0] = 0

# data_len = market_daily_returns.__len__()

# market_trend = np.zeros(shape=market_daily_returns.shape)
# for i_day in range(1,len(market_daily_returns)):
#     market_trend[i_day] = (np.prod(1 + (market_daily_returns[0:i_day]/100)) - 1)*100


Calculate the full market index

In [None]:
Stock_Weights = calculate_stock_weights(MarketCap_data)

# tickers_historical_data.reindex(Stock_Weights.index)
tickers_historical_data.index = pd.to_datetime(tickers_historical_data.index).tz_localize(None)
# Stock_Weights.reindex(tickers_historical_data.index)
Stock_Weights.index = pd.to_datetime(Stock_Weights.index).tz_localize(None)

temp = np.nan_to_num(Stock_Weights.to_numpy() * tickers_historical_data.to_numpy(),0).sum(axis=1)
first_value_pos = np.where(temp > 0)[0][0]

# Compute cumulative probabilities
daily_change = (np.append(0,np.diff(temp[first_value_pos:]))/temp[first_value_pos:])*100
daily_change_sorted = np.sort(daily_change)
n = daily_change_sorted.__len__()

cumulative_probs = np.arange(1, n + 1) / n # Cumulative probabilities from 0 to 1

world_idx_price = pd.DataFrame(index=tickers_historical_data.index, columns=["price index", "index returns", "returns", "sorted returns", "CDF"])
world_idx_price["price index"] = temp
world_idx_price["index returns"] = ((temp/temp[first_value_pos]) - 1)*100
world_idx_price["returns"] = daily_change
world_idx_price["sorted returns"] = daily_change_sorted
world_idx_price["CDF"] = cumulative_probs

del temp

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=world_idx_price.index,y=world_idx_price["index returns"],mode='lines',name="Market Trend"))
fig.update_layout(
    title="Daily Market Trend (Market-Cap-Weighted)",
    xaxis_title="Date",
    yaxis_title="Average Daily Return [%]"
)
fig.show()

Sector & Geographical trends

In [None]:
Geographical_Grouping = {'United States': 'North America', 
                         'United Kingdom': 'Europe',
                         'France': 'Europe',
                         'Taiwan': 'Asia',
                         'South Korea': 'Asia',
                         'China': 'Asia',
                         'Japan': 'Asia'
                         }

In [None]:
sectors = []
countries = []
regions = []
SectorGroups = {}
GeoGroups = {}
for ticker in Market_Tickers:
    sector = ticker_data[ticker].info['sector']
    country = ticker_data[ticker].info['country']

    sectors.append(sector)
    countries.append(country)

    region = Geographical_Grouping[country]
    regions.append(region)

    if(region not in GeoGroups):
        GeoGroups[region] = []
    GeoGroups[region].append(ticker)

    if(sector not in SectorGroups):
        SectorGroups[sector] = []
    SectorGroups[sector].append(ticker)

In [None]:
sectors = np.unique(sectors)
regions = np.unique(regions)

In [None]:
# IMPORTANT! : need to address division by 0 on some of the markets' daily return values

Sector_idx_prices = pd.DataFrame(index=tickers_historical_data.index, columns=sectors)
Sector_idx_returns = pd.DataFrame(index=tickers_historical_data.index, columns=sectors)
# Sector_idx_daily_chng = pd.DataFrame(index=tickers_historical_data.index, columns=sectors)
Sector_idx_daily_chng = {}
# Sector_idx_daily_chng_srt = pd.DataFrame(index=tickers_historical_data.index, columns=sectors)
Sector_idx_daily_chng_srt = {}
# Sector_idx_cdf = pd.DataFrame(index=tickers_historical_data.index, columns=sectors)
Sector_idx_cdf = {}

Geo_idx_prices = pd.DataFrame(index=tickers_historical_data.index, columns=regions)
Geo_idx_returns = pd.DataFrame(index=tickers_historical_data.index, columns=regions)
# Geo_idx_daily_chng = pd.DataFrame(index=tickers_historical_data.index, columns=regions)
Geo_idx_daily_chng = {}
# Geo_idx_daily_chng_srt = pd.DataFrame(index=tickers_historical_data.index, columns=regions)
Geo_idx_daily_chng_srt = {}
# Geo_idx_cdf = pd.DataFrame(index=tickers_historical_data.index, columns=regions)
Geo_idx_cdf = {}

# pd.DataFrame({ticker: closing_prices[ticker].pct_change()*100 for ticker in Market_Tickers})

for sector in sectors:
    Tickers = SectorGroups[sector]

    Sector_Weights = calculate_stock_weights(MarketCap_data[Tickers])

    temp = np.nan_to_num(Sector_Weights.to_numpy() * tickers_historical_data[Tickers].to_numpy(),0).sum(axis=1)
    first_value_pos = np.where(temp > 0)[0][0]
    
    temp_daily_change = (np.append(0,np.diff(temp[first_value_pos:]))/temp[first_value_pos:])*100
    temp_daily_change_srt = np.sort(temp_daily_change)

    n = temp_daily_change_srt.__len__()

    temp_daily_change_cdf = np.arange(1, 1 + n) / n

    Sector_idx_prices[sector] = temp
    Sector_idx_returns[sector] = ((temp/temp[first_value_pos]) - 1)*100 # since the first day
    Sector_idx_daily_chng[sector] = temp_daily_change
    Sector_idx_daily_chng_srt[sector] = temp_daily_change_srt
    Sector_idx_cdf[sector] = temp_daily_change_cdf

    del temp

for region in regions:
    Tickers = GeoGroups[region]

    region_Weights = calculate_stock_weights(MarketCap_data[Tickers])

    temp = np.nan_to_num(region_Weights.to_numpy() * tickers_historical_data[Tickers].to_numpy(),0).sum(axis=1)
    first_value_pos = np.where(temp > 0)[0][0]

    temp_daily_change = (np.append(0,np.diff(temp[first_value_pos:]))/temp[first_value_pos:])*100
    temp_daily_change_srt = np.sort(temp_daily_change)

    n = temp_daily_change_srt.__len__()

    temp_daily_change_cdf = np.arange(1, 1 + n) / n

    Geo_idx_prices[region] = temp
    Geo_idx_returns[region] = ((temp/temp[first_value_pos]) - 1)*100
    Geo_idx_daily_chng[region] = temp_daily_change
    Geo_idx_daily_chng_srt[region] = temp_daily_change_srt
    Geo_idx_cdf[region] = temp_daily_change_cdf
    
    del temp


Plot Sector & Geographical trends

In [None]:
# Plot sectors
fig_sectors = go.Figure()
for sector in sectors:
    fig_sectors.add_trace(go.Scatter(x=Sector_idx_returns.index, y=Sector_idx_returns[sector], mode="lines", name=sector))

fig_sectors.update_layout(
    title="Sector Trends",
    xaxis_title="Date",
    yaxis_title="Sector Trend [%]",
    legend_title="Sectors",
)
fig_sectors.show()

# Plot geographical markets
fig_geo = go.Figure()
for region in regions:
    fig_geo.add_trace(go.Scatter(x=Geo_idx_returns.index, y=Geo_idx_returns[region], mode="lines", name=region))

fig_geo.update_layout(
    title="Geographical Market Trends",
    xaxis_title="Date",
    yaxis_title="Geographical Market Trend [%]",
    legend_title="Geographical Markets",
)
fig_geo.show()

Test Company group

In [None]:
from Company import Company
# Think about creating a downloaded database that is stored in a csv file
Companies = {ticker: Company(ticker,start_date) for ticker in Market_Tickers}

Calculate popularity distribution

In [None]:
# softmax_exp = {ticker: np.exp(Companies[ticker].trading_volume/Companies[ticker].shares_outstanding) for ticker in Market_Tickers}
# softmax_sum = np.sum(val for val in softmax_exp.values())

distribution_sum = sum(Companies[ticker].trading_volume/Companies[ticker].shares_outstanding for ticker in Market_Tickers)
for ticker in Market_Tickers:
    Companies[ticker].popularity = (Companies[ticker].trading_volume/Companies[ticker].shares_outstanding)/distribution_sum

fig_popularity = go.Figure()
fig_popularity.add_trace(go.Bar(x=Market_Tickers, y=[company.popularity for company in Companies.values()], 
                                name="Popularity distribution"))
fig_popularity.add_trace(go.Bar(x=Market_Tickers, y=[company.trading_volume/company.shares_outstanding for company in Companies.values()], 
                                name="relative traded volume"))

fig_popularity.show()

Plot return distributions

In [None]:
# Plot world index returns
fig_world_idx = px.histogram(x=world_idx_price["returns"].values, title="World index daily returns", labels={"x": "Returns [%]", "y": "Frequency"})
fig_world_idx.show()

# Plot sectors
for key in Sector_idx_daily_chng.keys():
    fig = px.histogram(x=Sector_idx_daily_chng[key], title=f"{key} sector index daily returns", 
                       labels={"x": key + " sector index daily returns [%]", "y": "Frequency"})
    fig.show()

# Plot geographical markets
for key in Geo_idx_daily_chng.keys():
    fig = px.histogram(x=Geo_idx_daily_chng[key], title=f"{key} index daily returns", 
                       labels={"x": key + " index daily returns [%]", "y": "Frequency"})
    fig.show()

Plot CDFs

In [None]:
# Plot World index CDF
fig = go.Figure()
fig.add_trace(go.Scatter(x=world_idx_price["sorted returns"],y=world_idx_price["CDF"],mode='lines',name="Market Trend"))
fig.update_layout(
    title="Daily Market returns' CDF",
    xaxis_title="Daily returns [%]",
    yaxis_title="CDF"
)
fig.show()

# Plot sectors CDF
fig_sectors = go.Figure()
for sector in sectors:
    fig_sectors.add_trace(go.Scatter(x=Sector_idx_daily_chng_srt[sector], y=Sector_idx_cdf[sector], mode="lines", name=sector))

fig_sectors.update_layout(
    title="Sector Trends",
    xaxis_title="Daily sector returns [%]",
    yaxis_title="CDF",
    legend_title="Sectors",
)
fig_sectors.show()

# Plot geographical markets CDF
fig_geo = go.Figure()
for region in regions:
    fig_geo.add_trace(go.Scatter(x=Geo_idx_daily_chng_srt[region], y=Geo_idx_cdf[region], mode="lines", name=region))

fig_geo.update_layout(
    title="Geographical Market Trends",
    xaxis_title="Daily returns [%]",
    yaxis_title="CDF",
    legend_title="Geographical Markets",
)
fig_geo.show()