In [2]:
# import libraries
import pandas as pd

#### Read symbols into respective dataframes

In [3]:
# list of tickers we have data for in pkl file
tickers = ['LLY', 'NVO', 'JNJ', 'MRK', 'ABBV', 'ROG.SW',
           'NVS', 'AZN', 'PFE', 'AMGN', 'PPH', 'IHE', 'PJP']

dfs = {}  # dictionary to store all dataframes

for symbol in tickers:
    # for each symbol, load the pkl file and store it in the dictionary as a df
    dfs[f"{symbol}_df"] = pd.read_pickle(f"pharma-data/day/{symbol}.pkl")

#### Sanity Check
##### Check all the df.head() to ensure they worked

In [4]:
# for each dataframe in the dfs dictionary, print the first 5 rows & shape
for df in dfs:
    print(f"First 5 rows of {df}")
    print(dfs[df].shape)
    print(dfs[df].head())
    print("\n")


First 5 rows of LLY_df
(2501, 6)
                 Open       High        Low      Close  Adj Close   Volume
Date                                                                      
2014-03-13  59.240002  59.330002  58.230000  58.340000  47.321117  4103700
2014-03-14  58.310001  58.980000  58.230000  58.869999  47.751007  4201800
2014-03-17  59.119999  59.290001  58.650002  58.869999  47.751007  3016200
2014-03-18  58.910000  59.700001  58.810001  59.400002  48.180893  4082800
2014-03-19  59.349998  59.740002  58.630001  59.049999  47.897015  2865900


First 5 rows of NVO_df
(2501, 6)
                 Open       High        Low      Close  Adj Close   Volume
Date                                                                      
2014-03-13  23.195000  23.219999  22.750000  22.855000  15.467833  4562200
2014-03-14  22.610001  22.889999  22.584999  22.775000  15.413692  5663000
2014-03-17  22.879999  23.100000  22.860001  22.905001  15.501674  2049200
2014-03-18  22.955000  23.209999

#### Check for null values

In [5]:
for df in dfs:
    print(f"Null values in {df}")
    print(dfs[df].isnull().sum())
    print("\n")

# we see there are no null values anywhere. yfinance dealt with them.

Null values in LLY_df
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64


Null values in NVO_df
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64


Null values in JNJ_df
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64


Null values in MRK_df
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64


Null values in ABBV_df
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64


Null values in ROG.SW_df
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64


Null values in NVS_df
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64


Null values in AZN_df
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume    

In [6]:
dfs['LLY_df'].shape

(2501, 6)

In [7]:
dfs['LLY_df'].dtypes

Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object