In [1]:
import pandas as pd 
import numpy as np 
import datetime
import pandas_datareader.data as web
import yahoo_finance as yf
import plotly.graph_objects as go
from plotly.offline import iplot, init_notebook_mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)


import random 
from ipywidgets import widgets
from chart_studio.widgets import GraphWidget
from scipy.stats import ttest_ind,probplot,bayes_mvs, linregress
from ipywidgets import interact,interact_manual,interactive_output


<IPython.core.display.Javascript object>

<h1>Symbol in Consumer Industry 

In [2]:
#get basic stock date from 2018-01-01 to today
start = datetime.datetime(2018,1,1)
end = datetime.date.today()
symbols = ['CPB','CL','COST','SJM','K','PEP','KO','HSY','KHC','WMT']

solar_df = web.get_data_yahoo(symbols, start,end)
df = solar_df.stack().reset_index().set_index('Date')
df['log_return']= np.log(df['Close']/df['Open'])

In [3]:
symbol_names ={'CPB': 'Campbell Soup Company',
 'CL': 'Colgate-Palmolive Company',
 'COST': 'Costco Wholesale Corporation',
 'SJM': 'J.M.Smucker Company',
 'K': 'Kellogg Company',
 'PEP': 'PepsiCo, Inc.',
 'KO': 'The Coca-Cola Company',
 'HSY': 'The Hershey Company',
 'KHC': 'The Kraft Heinz Company',
 'WMT': 'Walmart Inc.'}

In [4]:
#some essential widgets list
s = [(value,key) for key, value in symbol_names.items() ]
num = widgets.BoundedIntText(
    value=300,
    min=1,
    max=len(df),
    step=1,
    description='#Samples',
    disabled=False
)
start_date = widgets.DatePicker(
    description='Start Date',
    value= pd.to_datetime('2018-11-01'))
end_date = widgets.DatePicker(
    description = 'End Date',
    value= pd.to_datetime('2019-12-05'))


first_textbox = widgets.Dropdown(
    description='First Type:   ',
    value='Open',
    options=list(df.select_dtypes('number').columns)
)

second_textbox = widgets.Dropdown(
    description='Second Type:   ',
    value='Close',
    options=list(df.select_dtypes('number').columns)
)

symbol = widgets.Dropdown(
    description='Symbol:   ',
    value='WMT',
    options = s
)


<h2> Part1. Time Series Figure

In [5]:
trace1 = go.Scatter(x=df.loc[df.Symbols=='WMT'].index, y=df.loc[df.Symbols=='WMT']['Open'], opacity=0.75, name='Open Price',mode='markers+lines')
trace2 = go.Scatter(x=df.loc[df.Symbols=='WMT'].index, y=df.loc[df.Symbols=='WMT']['Close'], opacity=0.75, name='Close Price',mode='markers+lines')
g = go.FigureWidget(data=[trace1, trace2],
                    layout=go.Layout(height=600,width=1000,
                        title=dict(
                            text='Open and Close Price of Walmart'
                        ),
                          yaxis_title_text = 'Price(USD)',
                        barmode='overlay'
                                     
                    ))

In [6]:
def validate():
    if (start_date.value >= pd.Timestamp(start)) and (end_date.value <= pd.Timestamp(end)) and (symbol.value in symbols):
        return True
    else:
        return False
    

def response(change):

    if validate():
        start= pd.Timestamp(start_date.value)
        end = pd.Timestamp(end_date.value)
        filter_list = [i and j and k for i,j,k in
                          zip(df['Symbols'] == symbol.value, df.index >= start,
                              df.index <=end)]
        temp_df = df[filter_list]
        y1 = temp_df[first_textbox.value]
        y2 = temp_df[second_textbox.value]
        with g.batch_update():
            g.data[0].y = y1
            g.data[1].y = y2
            g.data[0].name = f'{first_textbox.value}'
            g.data[1].name = f'{second_textbox.value}'
            g.layout.barmode = 'overlay'
            g.layout.title = f'The {first_textbox.value} and {second_textbox.value} Price of {symbol_names[symbol.value]}(USD)'
            g.layout.yaxis.title = 'Price(USD)'





start_date.observe(response, names="value")
end_date.observe(response, names="value")
first_textbox.observe(response, names="value")
second_textbox.observe(response, names="value")
symbol.observe(response, names="value")

Please Select your interested symbol to display graphs

In [7]:
container = widgets.HBox(children=[start_date, end_date])
container2 = widgets.HBox([first_textbox, second_textbox])
widgets.VBox([container,
               container2,symbol,g])

VBox(children=(HBox(children=(DatePicker(value=Timestamp('2018-11-01 00:00:00'), description='Start Date'), Da…

<h2> Part 2.One Symbol Analysis

<h3> 2.1 Visualization

In [8]:
def one_symbol_visualizatoin(symbol,n):
    
    filter_idx  = random.sample(list(df.index.unique()),n)
    regre_df = df.loc[filter_idx]

    data1 =np.array(regre_df.loc[regre_df.Symbols==symbol]['log_return'])
    (osm, osr), (slope, intercept, r) = probplot(data1,fit=True)
    
    trace1=go.Histogram(
        x= data1,
        histnorm='percent',
        opacity=0.75,
        xaxis='x1',
        yaxis='y1'
    )
    trace2 = go.Scatter(x=osm,y=osr,mode='markers',opacity=0.6,name='data', xaxis='x2',yaxis='y2')
    trace3 = go.Scatter(x=osm,y=osm*slope+intercept,mode='lines',name='fit', xaxis='x2',yaxis='y2')

    axis=dict(
       showline=True,
       zeroline=False,
       showgrid=True,
       mirror=True,
       ticklen=4,
       gridcolor='#ffffff',
       tickfont=dict(size=10))
    
    layout = dict(
        width=1000,
        height=500,
        autosize=False,
        title=f'Histogram and Probability plot of the log return for {symbol_names[symbol]}',
        margin = dict(t=50),
        showlegend=False,
        xaxis1=dict(axis, **dict(domain=[0, 0.45], anchor='y1', showticklabels=True,title='Log Return(day)')),
        xaxis2=dict(axis, **dict(domain=[0.52, 0.95], anchor='y2', showticklabels=True,title='Theoretical quantiles')),
        #xaxis3=dict(axis, **dict(domain=[0, 1], anchor='y3')),
        yaxis1=dict(axis, **dict(domain=[0, 1], anchor='x1',title = "Percentage")),
        yaxis2=dict(axis, **dict(domain=[0, 1], anchor='x2', title='Ordered Values')))
    fig = dict(data=[trace1, trace2, trace3], layout=layout)
    iplot(fig)

Please Select your interested symbol and input a number $\leq 487$ for samples

In [9]:
out = interactive_output(one_symbol_visualizatoin, 
             {'symbol':  symbol, 
             'n':num})

u=widgets.HBox([symbol,num])
display(u,out)

HBox(children=(Dropdown(description='Symbol:   ', index=9, options=(('Campbell Soup Company', 'CPB'), ('Colgat…

Output()

<h3>2.2 Analysis

<h4>2.2.1 Confidence Interval

In [10]:
def confidence(symbol,confidence_level,n):
    filter_idx  = random.sample(list(df.index.unique()),n)
    regre_df = df.loc[filter_idx]
    log_return_data = np.array(df.loc[df.Symbols==symbol]['log_return'])
    mean_cntr, var_cntr, std_cntr = bayes_mvs(log_return_data,alpha=confidence_level)
    CI_mu = mean_cntr.minmax
    CI_var = var_cntr.minmax
    CI_mur = tuple([round(x,5) if isinstance(x, float) else x for x in CI_mu])
    CI_varr = tuple([round(x,5) if isinstance(x, float) else x for x in CI_var])
    trace = go.Table(header=dict(values=['Company',f'{str(confidence_level*100)}% Confidence Interval of \u03BC',
                                          f'{str(confidence_level*100)}% Confidence Interval of \u03C3']),
                     cells=dict(values= [[symbol_names[symbol]], [str(CI_mur)], [str(CI_varr)]]))
    layout = go.Layout(height=280,title=f'{str(confidence_level*100)}% Confidence Interval Analysis of {symbol_names[symbol]}')
    fig = go.Figure(data=trace,layout=layout)
    fig.show()

Please Select your interested symbol to display graphs

In [11]:
cl = widgets.BoundedFloatText(
    value=0.95,
    min=0,
    max=1.0,
    step = 0.005,
    description='Confidence Level',
    disabled=False
)

u =   widgets.HBox([symbol,cl,num])
out = interactive_output(confidence, 
             {'symbol':  symbol, 
             'confidence_level':cl,
              'n':num})

display(u,out)

HBox(children=(Dropdown(description='Symbol:   ', index=9, options=(('Campbell Soup Company', 'CPB'), ('Colgat…

Output()

<h4> 2.2.2 Regression Over Time

In [12]:
def one_symbol_regression(symbol):
    
    start_price  = df.loc[df.Symbols==symbol]['Open'][0]
    regre_df = df.loc[df.Symbols==symbol]
    log_return_vs_180102 = np.array(np.log(regre_df['Close']/start_price))
    accumulate_days = np.array((regre_df.index - start).days)
    
    slope, intercept, r_value, p_value, std_err  = linregress(accumulate_days,log_return_vs_180102 )
 
    
    trace1 = go.Scatter(x=accumulate_days,y=log_return_vs_180102,mode='markers',name='data', xaxis='x1',yaxis='y1')
    trace2 = go.Scatter(x=accumulate_days,y=intercept+slope*accumulate_days,mode='lines',name='fit',xaxis='x1',yaxis='y1')
    trace3 = go.Scatter(x=accumulate_days,y=intercept+slope*accumulate_days-log_return_vs_180102,mode='markers',name='residual',xaxis='x2',yaxis='y2')
    trace4 = go.Table(header=dict(values=['slope', 'intercept', 'R^2', 'p_value', 'std_err']),
                 cells=dict(values= [round(x,6) for x in [slope, intercept, (r_value)**2, p_value, std_err]]))
   
    axis=dict(
    showline=True,
    zeroline=False,
    showgrid=True,
    mirror=True,
    ticklen=4,
    gridcolor='#ffffff',
    tickfont=dict(size=10)
)
    layout2 = dict(
        width=950,
        height=800,
        autosize=False,
        title=f'Regression Model of log return of {symbol_names[symbol]} vs Time',
        margin = dict(t=100),
        showlegend=False,
        xaxis1=dict(axis, **dict(domain=[0, 1], anchor='y1', showticklabels=False)),
        xaxis2=dict(axis, **dict(domain=[0, 1], anchor='y2', showticklabels=True,title='Accumulated Datys')),
        #xaxis3=dict(axis, **dict(domain=[0, 1], anchor='y3')),
        yaxis1=dict(axis, **dict(domain=[0.4+0.05, 0.85], anchor='x1', hoverformat='.2f',title = f'Log return vs 2018-01-02')),
        yaxis2=dict(axis, **dict(domain=[0.0, 0.4], anchor='x2',  hoverformat='.2f',title='Residual')))
        #yaxis3=dict(axis, **dict(domain=[0.0, 0.21], anchor='x3', , hoverformat='.2f')))

    fig2 = dict(data=[trace1, trace2, trace3, trace4], layout=layout2)
    iplot(fig2)


Please Select your interested symbol

In [13]:
interact(one_symbol_regression,symbol=symbol)

interactive(children=(Dropdown(description='Symbol:   ', index=9, options=(('Campbell Soup Company', 'CPB'), (…

<function __main__.one_symbol_regression(symbol)>

<h2> Part 3. Two Symbol Analysis

### 1. If the two population means are equal?

In [14]:
def test_two_symbol_mean(symbol_1,symbol_2,num_of_sample1,num_of_sample2,alpha):
    import random
    s1 = list(df.loc[df.Symbols==symbol_1]['log_return'])
    s2 = list(df.loc[df.Symbols==symbol_2]['log_return'])
    data1 = random.sample(s1, num_of_sample1)
    data2 = random.sample(s2, num_of_sample2)
    mean1,mean2=  round(np.mean(data1),4),round(np.mean(data2),4)
    std1,std2 = round(np.std(data1,ddof=1),4),round(np.std(data2,ddof=1),4)
    
    print(f'The sample mean of {num_of_sample1} sample {symbol_names[symbol_1]} and {num_of_sample2} sample {symbol_names[symbol_2]}\n are {mean1} and {mean2}\n')
    print(f'The standard divation of {num_of_sample1} {symbol_names[symbol_1]} and {num_of_sample2} {symbol_names[symbol_2]}\n are {std1} and {std2}\n')
    plot_df = df.pivot(values='log_return',columns='Symbols')
    plot_df[[symbol_1,symbol_2]].iplot(kind='histogram',title=f"Distribution of {symbol_names[symbol_1]} and {symbol_names[symbol_2]}",xTitle='log return', yTitle='Count')

    t_statistic, p_value = ttest_ind(data1,data2,equal_var=False)
    print(f"Use t-test to test the equality of  {symbol_names[symbol_1]} and {symbol_names[symbol_2]}'s population means\n")
    print(f'the t_statisc value is {t_statistic}\n')
    print(f'the p value value is {p_value}\n')
    if p_value > alpha:
        print('Accept null hypothesis that the means are equal.')
    else:
        print('Reject the null hypothesis that the means are equal.')

          


Please select two your interested symbols and input Amounts $\leq 487$ , $\alpha\leq 1$

In [15]:
symbol_1= widgets.Dropdown(options=s,value='WMT',description='Symbol 1')
symbol_2 = widgets.Dropdown(options=s,value='COST',description='Symbol 2')
num1 = widgets.BoundedIntText(
    value=300,
    min=1,
    max=len(df),
    step=1,
    description='1 Amounts',
    disabled=False
)
num2= widgets.BoundedIntText(
    value=300,
    min=1,
    max=len(df),
    step=1,
    description='2 Amounts',
    disabled=False
)
a = widgets.BoundedFloatText(
    value=0.05,
    min=0,
    max=1.0,
    step = 0.005,
    description='Alpha(0-1)',
    disabled=False
)
u1 = widgets.HBox([symbol_1,symbol_2])
u2 = widgets.HBox([num1,num2])

out = interactive_output(test_two_symbol_mean, 
             {'symbol_1':  symbol_1, 
             'symbol_2':symbol_2,
             'num_of_sample1':num1, 
             'num_of_sample2':num2,
             'alpha':a})

display(u1,u2,a,out)

HBox(children=(Dropdown(description='Symbol 1', index=9, options=(('Campbell Soup Company', 'CPB'), ('Colgate-…

HBox(children=(BoundedIntText(value=300, description='1 Amounts', max=4920, min=1), BoundedIntText(value=300, …

BoundedFloatText(value=0.05, description='Alpha(0-1)', max=1.0, step=0.005)

Output()

### 2. Perform a Regression  Model

In [16]:
def regression(symbol_1,symbol_2,n):
    #from plotly.subplots import make_subplots
    from scipy.stats import linregress
    import random 
    filter_idx  = random.sample(list(df.index.unique()),n)
    regre_df = df.loc[filter_idx]
    plot_df= regre_df.pivot(values='log_return',columns='Symbols')
    data1 =np.array(regre_df.loc[regre_df.Symbols==symbol_1]['log_return'])
    data2 =np.array(regre_df.loc[regre_df.Symbols==symbol_2]['log_return'])
    slope, intercept, r_value, p_value, std_err  = linregress(data1,data2)
    trace1 = go.Scatter(x=data1,y=data2,mode='markers',name='data', xaxis='x1',yaxis='y1')
    trace2 = go.Scatter(x=data1,y=intercept+slope*data1,mode='lines',name='fit',xaxis='x1',yaxis='y1')
    trace3 = go.Scatter(x=data1,y=intercept+slope*data1-data2,mode='markers',name='residual',xaxis='x2',yaxis='y2')
    trace4 = go.Table(header=dict(values=['slope', 'intercept', 'r_value', 'p_value', 'std_err']),
                 cells=dict(values= [round(x,8) for x in [slope, intercept, (r_value)**2, p_value, std_err]]))
    
    axis=dict(
    showline=True,
    zeroline=False,
    showgrid=True,
    mirror=True,
    ticklen=4,
    gridcolor='#ffffff',
    tickfont=dict(size=10)
)
    layout2 = dict(
        width=950,
        height=800,
        autosize=False,
        title=f'Regression Model of log return of {symbol_names[symbol_1]} and {symbol_names[symbol_2]}',
        margin = dict(t=100),
        showlegend=False,
        xaxis1=dict(axis, **dict(domain=[0, 1], anchor='y1', showticklabels=False)),
        xaxis2=dict(axis, **dict(domain=[0, 1], anchor='y2', showticklabels=True,title=f'Log return of {symbol_names[symbol_1]}')),
        #xaxis3=dict(axis, **dict(domain=[0, 1], anchor='y3')),
        yaxis1=dict(axis, **dict(domain=[0.4+0.05, 0.85], anchor='x1', hoverformat='.2f',title = f'Log return of {symbol_names[symbol_2]}')),
        yaxis2=dict(axis, **dict(domain=[0.0, 0.4], anchor='x2',  hoverformat='.2f',title='Residual')))
        #yaxis3=dict(axis, **dict(domain=[0.0, 0.21], anchor='x3', , hoverformat='.2f')))

    fig2 = dict(data=[trace1, trace2, trace3, trace4], layout=layout2)
    iplot(fig2)


Please select two interestd symbols and a number $\leq 487$

In [17]:
out2 = interactive_output(regression, 
             {'symbol_1':  symbol_1, 
             'symbol_2':symbol_2,
             'n':num})

u3=widgets.HBox([symbol_1,symbol_2,num])
display(u3,out2)

HBox(children=(Dropdown(description='Symbol 1', index=9, options=(('Campbell Soup Company', 'CPB'), ('Colgate-…

Output()