In [None]:
import pandas as pd
import numpy as np
from datetime import datetime

import seaborn as sns
import matplotlib.pyplot as plt

import yfinance as yf

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_percentage_error as MAPE
#from prophet import Prophet
# from causalimpact import CausalImpact
# # import os


In [None]:
# Pharmaceutical Companies
pharma_companies = {
    # 'BNTX': 'BioNTech SE',
    'PFE': 'Pfizer Inc.',
    'JNJ': 'Johnson & Johnson',
    'NVS': 'Novartis AG',
    'ROG': 'Roche Holding AG',
    'MRK': 'Merck & Co., Inc.',
    'SNY': 'Sanofi',
    'GSK': 'GlaxoSmithKline plc',
    'AZN': 'AstraZeneca PLC',
    'LLY': 'Eli Lilly and Company',
    'ABBV': 'AbbVie Inc.'
 }

# Banks
consumer_staples = {
'KO' : 'The Coca-Cola Company (KO)',
'CL' : 'Colgate-Palmolive Company (CL):',
'KMB' : 'Kimberly-Clark Corporation (KMB)',
'UL': 'Unilever (UL)'
}

retail = {
    "AMZN" : "Amazon.com, Inc.",
    "ITX.MC" : "Industria de Diseño Textil, S.A.",
    "HNNMY" : "Hennes & Mauritz AB",
    "MC.PA" : "LVMH Moët Hennessy Louis Vuitton S.E.",
    "9983.T": "Fast Retailing Co., Ltd."
}

# Global Economy Indicators
global_economy_indicators = {
    '^GSPC' : 'S&P 500 Index',
    '^IXIC' : 'NASDAQ Composite',
    '^FTSE' : 'FTSE 100 (^FTSE)',
    '^DJI' : 'Dow Jones Industrial Average',
    'MSCI' : 'MSCI World Index'
 }


In [None]:
pre_period = [pd.to_datetime('2017-1-1'), pd.to_datetime('2020-2-1')]
post_period = [pd.to_datetime('2020-3-1'), pd.to_datetime('2020-9-1')]

In [None]:
stocks = (list(pharma_companies.keys())
          + list(consumer_staples.keys())
          + list(retail.keys())
          + list(global_economy_indicators.keys()))
stock_marker  = yf.download(start = pre_period[0], end = post_period[1] + pd.DateOffset(months=1), tickers=stocks)

In [None]:
adj_close = stock_marker["Adj Close"]
monthly_avg = adj_close.resample('M').mean()
monthly_avg.index = monthly_avg.index.strftime('%Y-%m')
monthly_avg

In [None]:
def plot_stocks(stocks:list):
    for stock in stocks:
        
        sns.lineplot(x=monthly_avg.index, y=monthly_avg[stock], label=stock)

    plt.axvline(x="2020-03", c="k", linestyle="--")
    plt.xticks(rotation=90)
    plt.show()


In [None]:
import plotly_express as px

def timeline_plot(df):
    fig = px.line(df, x=df.index, y=df.columns)
    fig.update_layout(shapes=[
        dict(
            type='line',
            xref='x',
            yref='paper',
            x0='2020-03',
            y0=0,
            x1='2020-03',
            y1=1,
            line=dict(
                color='black',
                width=2,
                dash='dash'
            )
        )
    ])
    fig.show()


In [None]:
timeline_plot(monthly_avg[pharma_companies.keys()])

In [None]:
timeline_plot(monthly_avg[consumer_staples.keys()])

In [None]:
timeline_plot(monthly_avg[retail.keys()])

In [None]:
timeline_plot(monthly_avg[global_economy_indicators.keys()])

In [None]:

corr = monthly_avg.loc[pre_period[0].strftime('%Y-%m'):pre_period[1].strftime('%Y-%m'), :].corr()
corr = corr.loc[list(pharma_companies.keys()), 
         list(global_economy_indicators.keys())]
sns.heatmap(corr, annot=True, cmap='coolwarm',vmax=1, vmin=0)
plt.title("Pearson's Correlation Heatmap")
plt.show()


In [None]:
corr.loc[list(pharma_companies.keys()), 
         list(global_economy_indicators.keys())]

In [None]:
monthly_avg

In [None]:
from causalimpact import CausalImpact

def causal_impact_analysis(ci_data, target, pre_period, post_period):

    ci = CausalImpact(ci_data, pre_period, post_period)
    ci.run()
    
    # Check model accuracy (MAPE) in the pre period
    y_pred_pre = ci.inferences.loc[:pre_period[1]]['point_pred']
    y_pre = ci_data.loc[:pre_period[1]][target]

    mape = MAPE(y_pre, y_pred_pre)
    print(f'The mean absolute percentage error in the pre period is {mape:.2f}%')
    
    # Model summary
    print(ci.summary())
    
    # This graph shows the empirical and counterfactual data
    ci.plot(panels=['original', 'pointwise', 'cumulative'], figsize=(15,5))
    
    return ci

In [None]:
for pharma in pharma_companies.keys():
    print(f"{'='*10} {pharma} {'='*10}")
    tmp = monthly_avg[[pharma]+list(global_economy_indicators.keys())].dropna()
    tmp.index= pd.to_datetime(tmp.index, format="%Y-%m")
    test = causal_impact_analysis(tmp, pharma, pre_period, post_period)

In [None]:
for company in consumer_staples.keys():
    print(f"{'='*10} {company} {'='*10}")
    tmp = monthly_avg[[company]+list(global_economy_indicators.keys())]
    tmp.index= pd.to_datetime(tmp.index, format="%Y-%m")
    causal_impact_analysis(tmp, company, pre_period, post_period)

In [None]:
for company in retail.keys():
    print(f"{'='*10} {company} {'='*10}")
    tmp = monthly_avg[[company]+list(global_economy_indicators.keys())]
    tmp.index= pd.to_datetime(tmp.index, format="%Y-%m")
    causal_impact_analysis(tmp, company, pre_period, post_period)

In [None]:
for indicator in global_economy_indicators.keys():
    print(f"{'='*10} {indicator} {'='*10}")
    x = list(global_economy_indicators.keys())
    x.remove(indicator)
    order = [indicator] + x
    tmp = monthly_avg[order]
    tmp.index= pd.to_datetime(tmp.index, format="%Y-%m")
    causal_impact_analysis(tmp, company, pre_period, post_period)