## Data Collecting

In [1]:
import pandas as pd
from alpha_vantage.timeseries import TimeSeries
import matplotlib.pyplot as plt
from alpha_api import api_key
import json
from pprint import pprint

ts = TimeSeries(key=api_key)

In [2]:
# Initialize List with the tickers of the chosen stocks
ent_stocks = ["NFLX", "ATVI", "DIS", "AMC", "HAS"] 
food_stocks = ["KO", "BUD", "PEP", "GIS", "MCD"]
ess_stocks = ["CLX", "PG", "CL", "CVS", "JNJ"]

In [19]:
#Create function to call the API and get stocks in a list of tickers
def getStockdf (stocks):
    
    # Create dictionary to store stocks with their data
    stocks_dictionary_list = {}

    # iterate through list and call the ts.get_daily() for each stock inside the stocks list and store list in the dictionary  
    for stock in stocks:
        data, meta_data = ts.get_daily(stock, outputsize="full")
        stocks_dictionary_list[stock] = data

    # intialize list 
    stock_list = []

    """We have to store info in a way that retains the name, date, and stock information in a format that can be fed into the pandas dataframe without losing it's order, so we do the below"""
    # iterate through each stocks dictionary
    for stock, value in stocks_dictionary_list.items():
        # Inside the stocks value is a dictionary with date as the key and the open-volume numbers as the dictionary, so iterate through the value dictionary
        for date, info in value.items():
            # store the date, stock, and info within the dictionary into a tuple and append to the stock_list
            stock_list.append((date, stock, info['1. open'], info['2. high'], info['3. low'], info['4. close'], info['5. volume']))

    # Store each column into a list
    dates = [date[0] for date in stock_list]
    stock = [stock[1] for stock in stock_list]
    open_s = [open_s[2] for open_s in stock_list]
    high_s = [high_s[3] for high_s in stock_list]
    low_s = [low_s[4] for low_s in stock_list]
    close_s = [close_s[5] for close_s in stock_list]
    volume_s = [volume_s[6] for volume_s in stock_list]

    # Create a dictionary with each key have its value as a list of their respective data
    final_dictionary = {'dates': dates, 'stock': stock, 'open': open_s, 'close': close_s, 'high': high_s, 'low': low_s, 'close': close_s, 'volume': volume_s}

    # store into dateframe
    raw_df = pd.DataFrame(data=final_dictionary)
    return raw_df


In [22]:
# Create DataFrames for individual Entertainment stocks
raw_ent_df = getStockdf(ent_stocks)

raw_nflx_df = raw_ent_df.loc[raw_ent_df["stock"] == "NFLX"]
raw_atvi_df = raw_ent_df.loc[raw_ent_df["stock"] == "ATVI"]
raw_dis_df = raw_ent_df.loc[raw_ent_df["stock"] == "DIS"]
raw_amc_df = raw_ent_df.loc[raw_ent_df["stock"] == "AMC"]
raw_has_df = raw_ent_df.loc[raw_ent_df["stock"] == "HAS"]

In [6]:
# Create DataFrames for individual Food stocks
raw_food_df = getStockdf(food_stocks)

raw_ko_df = raw_food_df.loc[raw_food_df["stock"] == "KO"]
raw_bud_df = raw_food_df.loc[raw_food_df["stock"] == "BUD"]
raw_pep_df = raw_food_df.loc[raw_food_df["stock"] == "PEP"]
raw_gis_df = raw_food_df.loc[raw_food_df["stock"] == "GIS"]
raw_mcd_df = raw_food_df.loc[raw_food_df["stock"] == "MCD"]

In [13]:
# Create DataFrames for individual Essentials stocks
raw_ess_df = getStockdf(ess_stocks)

raw_clx_df = raw_ess_df.loc[raw_ess_df["stock"] == "CLX"]
raw_pg_df = raw_ess_df.loc[raw_ess_df["stock"] == "PG"]
raw_cl_df = raw_ess_df.loc[raw_ess_df["stock"] == "CL"]
raw_cvs_df = raw_ess_df.loc[raw_ess_df["stock"] == "CVS"]
raw_jnj_df = raw_ess_df.loc[raw_ess_df["stock"] == "JNJ"]

In [14]:
df_list = [raw_nflx_df, raw_atvi_df, raw_dis_df, raw_amc_df, raw_has_df, raw_ko_df, raw_bud_df, raw_pep_df, raw_gis_df, raw_mcd_df, raw_clx_df, raw_pg_df, raw_cl_df, raw_cvs_df, raw_jnj_df]

for x in range (len(df_list)):
    df_list[x] = df_list[x].iloc[0:254]
    
onethird_df = pd.concat(df_list)

In [16]:
%store onethird_df

Stored 'onethird_df' (DataFrame)
