In [2]:
import pandas as pd
import numpy as np
import FinanceDataReader as fdr

# To print multiple outputs in a cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Display data to three deciaml places.
pd.set_option('display.float_format', lambda x: '%.3f' % x)

# To print all columns
pd.set_option('max_columns', None)

## data used 

In [3]:
code_stock_name = pd.Series(np.array(['NETFLIX', 'Amazon', 'Apple', 'Microsoft', 'NVIDIA', 'Tesla', 
                                     'Autodesk', 'NIKE', 'Pfizer', 'Disney']))
use_stock = pd.DataFrame(
    {
        'stock_name': code_stock_name
    }
)
use_stock.index = np.arange(1,len(use_stock) + 1) # start row index from 1 instead of zero. 

use_stock

Unnamed: 0,stock_name
1,NETFLIX
2,Amazon
3,Apple
4,Microsoft
5,NVIDIA
6,Tesla
7,Autodesk
8,NIKE
9,Pfizer
10,Disney


## load datasets
* Getting the NASDAQ Symbols in [here](https://finance.yahoo.com/)
* Date : '2015-01-01' to '2021-11-25'

In [4]:
NETFLIX_df = fdr.DataReader("NFLX", '2015-01-01', '2021-11-25')
AMAZON_df = fdr.DataReader("AMZN", '2015-01-01', '2021-11-25')
APPLE_DF = fdr.DataReader("AAPL", '2015-01-01', '2021-11-25')
MICROSOFT_df = fdr.DataReader("MSFT", '2015-01-01', '2021-11-25')
NVIDIA_df = fdr.DataReader("NVDA", '2015-01-01', '2021-11-25')
TESLA_dF = fdr.DataReader("TSLA", '2015-01-01', '2021-11-25')
AUTODESK_DF = fdr.DataReader("ADSK", '2015-01-01', '2021-11-25')
NIKE_DF = fdr.DataReader("NKE", '2015-01-01', '2021-11-25')
PFIZER_DF = fdr.DataReader("PFE", '2015-01-01', '2021-11-25')
DISNEY_DF = fdr.DataReader("DIS", '2015-01-01', '2021-11-25')

NETFLIX_df.shape
AMAZON_df.shape
APPLE_DF.shape
MICROSOFT_df.shape
NVIDIA_df.shape
TESLA_dF.shape
AUTODESK_DF.shape
NIKE_DF.shape
PFIZER_DF.shape
DISNEY_DF.shape

(1739, 6)

(1739, 6)

(1738, 6)

(1739, 6)

(1739, 6)

(1739, 6)

(1738, 6)

(1738, 6)

(1738, 6)

(1738, 6)

In [8]:
# The shape is not the same because of several reasons.
# First, Suspended trading occurs because of serious concerns about a company's assets, 
# operations, or other financial information.
# Second, a Trading halt is implemented to correct an order imbalance,
# Third, If the provider omits a specific day's data
# etc..
# So we must match the shape because of the imbalance.

# In this project, I'll take df1(NFLX) as a criteria.


# using 'reindex' to make the same shape
APPLE_df = APPLE_DF.reindex(NETFLIX_df.index)
AUTODESK_df = AUTODESK_DF.reindex(NETFLIX_df.index)
NIKE_df = NIKE_DF.reindex(NETFLIX_df.index)
PFIZER_df = PFIZER_DF.reindex(NETFLIX_df.index)
DISNEY_df = DISNEY_DF.reindex(NETFLIX_df.index)

# Check : Is the Shape the same? 이부분은 나중에 출력에서 지우기
APPLE_df.shape
AUTODESK_df.shape
NIKE_df.shape
PFIZER_df.shape
DISNEY_df.shape

# Fill the data 
# pandas.DataFrame.fillna : Fill NA/NaN values using the specified method.
# "ffill" : forward fill
APPLE_df.fillna(method="ffill").head()
AUTODESK_df.fillna(method="ffill").head()
NIKE_df.fillna(method="ffill").head()
PFIZER_df.fillna(method="ffill").head()
DISNEY_df.fillna(method="ffill").head()


(1739, 6)

(1739, 6)

(1739, 6)

(1739, 6)

(1739, 6)

Unnamed: 0_level_0,Close,Open,High,Low,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-02,27.33,27.85,27.86,26.84,212820000.0,-0.009
2015-01-05,26.56,27.07,27.16,26.35,257140000.0,-0.028
2015-01-06,26.57,26.64,26.86,26.16,263190000.0,0.0
2015-01-07,26.94,26.8,27.05,26.67,160420000.0,0.014
2015-01-08,27.97,27.31,28.04,27.18,237460000.0,0.038


Unnamed: 0_level_0,Close,Open,High,Low,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-02,59.53,60.06,60.49,58.95,901930.0,-0.009
2015-01-05,58.66,59.15,59.44,58.46,2130000.0,-0.015
2015-01-06,57.5,58.89,59.22,57.31,1940000.0,-0.02
2015-01-07,57.38,57.78,58.52,57.1,1220000.0,-0.002
2015-01-08,58.8,57.87,59.13,57.74,1310000.0,0.025


Unnamed: 0_level_0,Close,Open,High,Low,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-02,47.52,48.27,48.48,47.05,4990000.0,-0.012
2015-01-05,46.75,47.26,47.27,46.56,6890000.0,-0.016
2015-01-06,46.48,46.95,47.08,46.03,7580000.0,-0.006
2015-01-07,47.44,46.8,47.65,46.55,7260000.0,0.021
2015-01-08,48.53,47.83,48.55,47.81,6000000.0,0.023


Unnamed: 0_level_0,Close,Open,High,Low,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-02,29.7,29.64,30.13,29.59,15530000.0,0.006
2015-01-05,29.54,29.72,29.78,29.4,23520000.0,-0.005
2015-01-06,29.78,29.64,30.2,29.5,27960000.0,0.008
2015-01-07,30.19,30.07,30.21,29.94,19210000.0,0.014
2015-01-08,30.81,30.66,30.94,30.54,46650000.0,0.02


Unnamed: 0_level_0,Close,Open,High,Low,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-02,93.75,94.91,95.28,92.85,5870000.0,-0.005
2015-01-05,92.38,93.28,93.35,91.78,7790000.0,-0.015
2015-01-06,91.89,92.57,93.19,91.16,6800000.0,-0.005
2015-01-07,92.83,92.72,93.15,92.1,6590000.0,0.01
2015-01-08,93.79,93.87,94.3,93.56,7580000.0,0.01


In [6]:
APPLE_df.nunique()

Close     1475
Open      1474
High      1473
Low       1483
Volume    1643
Change     621
dtype: int64