In [1]:
import pandas as pd
# import plotly as pl
import altair as al

al.data_transformers.disable_max_rows() # remove 5k rows limit
al.themes.enable('dark') # set dark theme

ThemeRegistry.enable('dark')

In [2]:
import yfinance as yf

def get_ticker_history(ticker, print_info = False):
    '''
    Retrieve Ticker history from yahoo finance.
    Argument ticker can be a list of tickers.
    '''
    if type(ticker)==list: # if list download data for every ticker.
        tickers = []
        for t in ticker:
            temp = get_ticker_history(t).reset_index()
            temp['ticker'] = t
            tickers.append(temp)
        return pd.concat(tickers, axis=0)

    ticke = yf.Ticker(ticker)

    # get stock info
    if print_info:
        print(ticke.info['shortName'])   

    # get historical market data
    hist = ticke.history(period="max")
    return hist



In [3]:
df = get_ticker_history(['SPY'])
df.to_csv('data.csv')
df.head(2)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains,ticker
0,1993-01-29 00:00:00-05:00,25.047173,25.047173,24.92256,25.029371,1003200,0.0,0.0,0.0,SPY
1,1993-02-01 00:00:00-05:00,25.047181,25.207397,25.047181,25.207397,480500,0.0,0.0,0.0,SPY


In [4]:
## Line Chart data
df['date'] = df.Date
df['month_day'] = [d.month*100 + d.day for d in df.date]
df['year'] = [str(d.year) for d in df.date]
df['year2'] = df.year

# calculate expanding pct change by year
df['ytd_pct_chng'] = df.groupby('year')['Close'].apply(lambda x: x.div(x.iloc[0]).subtract(1)).values
print(f'Original df {df.shape}:\n',df.head(2),'\n')

## Correlation Chart data
corrMat = df.pivot(index=['month_day'], columns = ['year'])['Close'].corr().stack()
corrMat.index.names = ["year", "year2"]
corrMat = corrMat.reset_index().rename(columns={0:'corr'})
corrMat = corrMat[~corrMat[['year','year2']].apply(frozenset, axis=1).duplicated()] # remove duplicate pairs
corrMat
print(f'Correlation df {corrMat.shape}:\n',corrMat.head(),'\n')

## Horizontal Bar data
# calculate percent change
annual = df.groupby('year')['Close'].apply(lambda x: pd.Series.pct_change(x).sum()).reset_index()
print(f'Annual Change df {annual.shape}:\n',annual.head(3))

Original df (7673, 15):
                        Date       Open       High        Low      Close   
0 1993-01-29 00:00:00-05:00  25.047173  25.047173  24.922560  25.029371  \
1 1993-02-01 00:00:00-05:00  25.047181  25.207397  25.047181  25.207397   

    Volume  Dividends  Stock Splits  Capital Gains ticker   
0  1003200        0.0           0.0            0.0    SPY  \
1   480500        0.0           0.0            0.0    SPY   

                       date  month_day  year year2  ytd_pct_chng  
0 1993-01-29 00:00:00-05:00        129  1993  1993      0.000000  
1 1993-02-01 00:00:00-05:00        201  1993  1993      0.007113   

Correlation df (496, 3):
    year year2      corr
0  1993  1993  1.000000
1  1993  1994  0.497355
2  1993  1995  0.878285
3  1993  1996  0.826791
4  1993  1997  0.794767 

Annual Change df (31, 2):
    year     Close
0  1993  0.087365
1  1994  0.012183
2  1995  0.321471


In [24]:
correlated_years = corrMat[(corrMat.year2=='2023') & (corrMat.year!='2023')].sort_values(by='corr', key=abs, ascending = False)[['year','corr']].values[:2]
correlated_years[0][1]

-0.9257804669292313

In [7]:
## Line Chart
# define base chart
base = al.Chart(df,title="Every Year of SPY, 1993 - 2022").mark_line(interpolate='basis').encode(
    x=al.X('monthdate(date):O', title='',  axis=al.Axis(labelAngle=-45)),
    y=al.Y('ytd_pct_chng:Q', title='Percent Change YTD', axis=al.Axis(format='%')),
    detail='year',
    color = al.condition("datum.year == '2022'", al.value('red'), al.value('grey')),
    tooltip=['date',al.Tooltip('ytd_pct_chng', format=".0%")]
).properties(
    width=1100,
    height=500
)

# add highlight on hover selector
highlight = al.selection(type='single', on='mouseover',
                          fields=['year'], nearest=True)

points = base.mark_circle().encode(
    opacity=al.value(0)
).add_selection(
    highlight
)

# add hover highlight
lines = base.mark_line().encode(
    size=al.condition(~highlight, al.value(1), al.value(3))
)
# add 2022 color fill
line22 = base.mark_line().encode(
    size=al.condition(al.expr.datum['year'] == '2022', al.value(2), al.value(1))
)

spy_line = (points + lines + line22)
spy_line

   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
        combined and should be specified using "selection_point()".


In [8]:
## Correlation Map
# define base chart
corr = al.Chart(corrMat, title='Correlation by year').mark_rect().encode(
    x=al.X('year', title=None, sort='ascending', axis=al.Axis(orient="top",labelAngle=-45)),
    y=al.Y('year2', title=None, sort='descending'),
    color=al.Color('corr', legend=None),
    tooltip=['year','year2',al.Tooltip('corr', format=".0%")]
).properties(
    width=750,
    height=750
)
# add colored labels 
text = corr.mark_text(size=9).encode(
    al.Text('corr:Q', format=".0%"),
    color=al.condition(
        'datum.corr > 0',
        al.value('white'),
        al.value('black')
    )
)

# define year selector and add to correlation chart
year_selector = al.selection_single(fields=['year','year2'],on = 'mouseover',name='year_selector')
spy_corr = (corr + text).add_selection(
    year_selector
)
spy_corr



In [9]:
## Horizonthal Bar Chart

# define base
bar = al.Chart(annual, title='Annual Percent Change').mark_bar().encode(
    x=al.X('Close:Q',title='', axis=al.Axis(format=".0%", orient="top")),
    y=al.Y("year:O",sort='descending'),
    tooltip=['year',al.Tooltip('Close', format=".0%")],
    color=al.condition(
        'datum.Close > 0',
        al.value('#4c78a8'),
        al.value('red')
    )
).properties(width=250,height=750)

# add labels
text = bar.mark_text(
    align='left',
).encode(
    text=al.Text('Close:Q',format='.0%'),
    color=al.value('white')
)
bar = (bar+text)
bar

In [10]:
## Add crossfiltering

# vega expression for years selected in correlation map
year_selector_expr = '''
indexof(datum.year, year_selector.year) >= 0 || indexof(datum.year, year_selector.year2) >= 0
'''

# color conditions for line chart
color={
    'condition': [
        {"value":"white", "test": year_selector_expr},
        {"value":"red", "test": "datum.year == 2022"},
        {"value":"grey", "test": "datum.year != 2022"}
    ]
}

# add color interactions and custom legend to line chart
spy_line2 = spy_line+base.mark_line().encode(
    color=al.Color('year',
                   scale=al.Scale(domain=['2022'],range=['red']),
                   condition=color['condition'], 
                   legend=al.Legend(title='',orient='bottom-left',labelFontSize=18,symbolSize=400,symbolStrokeWidth=3,offset=40)),
    size=al.condition(year_selector_expr,al.value(2),al.value(1))
).add_selection(year_selector)

view = (spy_line2 & (spy_corr | bar)).configure_title(fontSize=18).configure(background='black')
view



In [35]:
# save to html
view.save('SPY History.html')
view.save('spy_history_vega.json')