In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

****OBJECTIVE: ANALYZE APPLE VS MICROSOFT STOCK***

In [2]:
# Import libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from math import ceil
%matplotlib inline

In [3]:
pip install yfinance

In [4]:
pip install yahoofinancials

In [5]:
import yfinance as yf
import yahoofinancials


**APPLE STOCK ANALYSIS**

In [6]:
apple_df = yf.download('AAPL', start='2016-01-01',end='2021-08-28')
apple_df

In [7]:
apple_df.reset_index()

In [8]:
#Visualize the closing price history
plt.figure(figsize=(16,8))
plt.title('Close Price History')
plt.plot(apple_df.reset_index()['Close'])
plt.xlabel('Date',fontsize=18)
plt.ylabel('Close Price USD ($)',fontsize=18)
plt.show()

In [9]:
#Getting the data we need and converting it to returns
apple_close= apple_df['Close']
apple_returns= np.log(apple_close)
apple_returns.head()

In [10]:
# To perform ADF Test For Apple Returns Before Differencing
from statsmodels.tsa.stattools import adfuller

X = apple_returns.values
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

if result[0] < result[4]["5%"]:
    print ("Reject Ho - Time Series is Stationary")
else:
    print ("Failed to Reject Ho - Time Series is Non-Stationary")

In [11]:
apple_diff = apple_returns.diff()
plt.plot(apple_diff)
plt.show()

In [12]:
#ADF Test for Apple returns after first order differencing
Y= apple_diff.dropna().values
result = adfuller(Y)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

if result[0] < result[4]["5%"]:
    print ("Reject Ho - Time Series is Stationary")
else:
    print ("Failed to Reject Ho - Time Series is Non-Stationary")

In [13]:
apple_diff

In [14]:
apple_diff.dropna()

In [15]:
apple_returns.describe()

In [16]:
from scipy import stats

stats.describe(apple_returns)

In [17]:
import scipy.stats as stats
import statsmodels.formula.api as smf
import statsmodels.stats.api as sms
from statsmodels.compat import lzip
from statsmodels.stats.stattools import jarque_bera

In [18]:
name = ['Jarque-Bera test', 'Chi-squared(2) p-value', 'Skewness', 'Kurtosis']
test = sms.jarque_bera(apple_close)
lzip(name, test)

In [19]:
name = ['Jarque-Bera test', 'Chi-squared(2) p-value', 'Skewness', 'Kurtosis']
test = sms.jarque_bera(apple_diff.dropna())
lzip(name, test)

In [20]:
#  Generate a histogram and automatically scale by area under the curve
from scipy.stats import norm, kurtosis, skew

n, bins, patches = plt.hist(apple_returns, bins = 50, density = True, edgecolor = 'black')

mean = 3.963079
sd = 0.535320

#  Use the mean and standard deviation from above to generate a normal curve with those inputs
x = np.linspace(3.0, 5.0, 50)
y = norm.pdf(x, mean, sd)

#  Overlay the normal curve on the histogram
plt.plot(x, y, 'k')
plt.xlim( [3.0, 5.0]) 
plt.grid(True)
plt.xlabel('Daily Fractional Change')
plt.ylabel('Normalized Count')      

In [21]:
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf

In [22]:
#PACF plot for apple returns
plot_pacf(X, lags=10);

In [23]:
model = ARIMA(apple_diff.dropna(), order=(3,1,0))

In [24]:
model_fit = model.fit()
print(model_fit.summary())

In [25]:
#First 50 Predictions for apple returns
model_fit.plot_predict(start=1, end=50, dynamic=False);

**MICROSOFT STOCK ANALYSIS**

In [26]:
microsoft_df = yf.download('MSFT', start='2016-01-01',end='2021-08-28')
microsoft_df

In [27]:
#Visualize the closing price history
plt.figure(figsize=(16,8))
plt.title('Close Price History')
plt.plot(microsoft_df['Close'])
plt.xlabel('Date',fontsize=18)
plt.ylabel('Close Price USD ($)',fontsize=18)
plt.show()

In [28]:
#Getting the data we need and converting it to returns
microsoft_close= microsoft_df['Close']
microsoft_returns= np.log(microsoft_close)
microsoft_returns.head()

In [29]:
#ADF Test for Microsoft Returns before differencing
T = microsoft_returns.values
result = adfuller(T)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

if result[0] < result[4]["5%"]:
    print ("Reject Ho - Time Series is Stationary")
else:
    print ("Failed to Reject Ho - Time Series is Non-Stationary")

In [30]:
microsoft_diff = microsoft_returns.diff()
plt.plot(microsoft_diff)
plt.show()

In [31]:
#ADF Test for microsoft returns after first order differencing
W= microsoft_diff.dropna().values
result = adfuller(W)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

if result[0] < result[4]["5%"]:
    print ("Reject Ho - Time Series is Stationary")
else:
    print ("Failed to Reject Ho - Time Series is Non-Stationary")

In [32]:
microsoft_diff.dropna()

In [33]:
microsoft_returns.describe()

In [34]:
stats.describe(microsoft_returns)

In [35]:
name = ['Jarque-Bera test', 'Chi-squared(2) p-value', 'Skewness', 'Kurtosis']
test_2 = sms.jarque_bera(microsoft_close)
lzip(name, test_2)

In [36]:
name = ['Jarque-Bera test', 'Chi-squared(2) p-value', 'Skewness', 'Kurtosis']
test_2 = sms.jarque_bera(microsoft_returns)
lzip(name, test_2)

In [37]:
n, bins, patches = plt.hist(microsoft_returns, bins = 50, density = True, edgecolor = 'black')

mean_2 = 4.711227
sd_2 = 0.517553

#  Use the mean and standard deviation from above to generate a normal curve with those inputs
x = np.linspace(3.0, 5.0, 50)
y = norm.pdf(x, mean_2, sd_2)

#  Overlay the noraml curve on the nistogram
plt.plot(x, y, 'k')
plt.xlim( [3.0, 5.0]) 
plt.grid(True)
plt.xlabel('Daily Fractional Change')
plt.ylabel('Normalized Count')

In [38]:
#PACF for Microsoft Returns
plot_pacf(T, lags=10);

In [39]:
model_2 = ARIMA(microsoft_diff.dropna(), order=(2,1,0))

In [40]:
model_2_fit = model_2.fit()
print(model_2_fit.summary())

In [41]:
#First 50 Predictions for microsoft returns
model_2_fit.plot_predict(start=1, end=50, dynamic=False);

****COMPARISON****

In [42]:
df = pd.concat([apple_diff.dropna(), microsoft_diff.dropna()],axis=1)
df.columns = ['APPL','MSFT']
df.head()

In [43]:
df.corr()

In [44]:
plt.scatter(df.APPL,df.MSFT)
plt.xlabel('APPL')
plt.ylabel('MSFT')
plt.show()