In [2]:
# importing all needed libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf

In [3]:
# getting the information needed from yahoo finance
# this includes Microsoft and Sony's stock shares
msft = yf.Ticker("MSFT")
sony = yf.Ticker("SONY")

msftDF = msft.history(period='1d', start='2010-1-1', end='2022-1-25')
sonyDF = sony.history(period='1d', start='2010-1-1', end='2022-1-25')

msftDF.to_csv("msft.csv")
sonyDF.to_csv("sony.csv")


In [13]:
# getting daily data on 'business days only' in both time series
msftDF = msftDF.asfreq('B')
sonyDF = sonyDF.asfreq('B')

# dealing with the NaN
# we will use linear interpolation becuase it estimates the values based on near values
msftDF = msftDF.interpolate()
sonyDF = sonyDF.interpolate()

# we also drop Dividends and Stock Splits becuase they're all zero!
msftDF = msftDF.drop(["Dividends", "Stock Splits"], axis=1)
sonyDF = sonyDF.drop(["Dividends", "Stock Splits"], axis=1)

In [14]:
# re-indexing both time series to match the same time periods
common_date_range = msftDF.index.union(sonyDF.index)
msftDF = msftDF.reindex(common_date_range)
sonyDF = sonyDF.reindex(common_date_range)

In [17]:
# Data Normalization
# we will use Z-score normalization for this data becuase we're using the financial data of two companies to predict their future performance
# this method scales the data to have zero mean and unit variance.
msftDF_normalized = (msftDF - msftDF.mean()) / msftDF.std()
sonyDF_normalized = (sonyDF - sonyDF.mean()) / sonyDF.std()



In [23]:
# Stationarity Transformation by differencing
msftDF_diff = msftDF.diff()
sonyDF_diff = sonyDF.diff()