In [1]:
import numpy as np
import pandas as pd
import requests
import datetime
import time
import json
import os

import functions as f
from secrets import special_api_key

In [2]:
json_list = os.listdir('./json_files')

In [3]:
# weekly time series data is used to avoid issues with holidays
# keep track of ranges (underlying facts about the data)
# comparing first_date which is 5 to 11 days after earnings is announced to
# second_date which is 341 to 347 days after earnings is announced

# counter (corresponds to json_list index)
x = 0

error_years_back = pd.Series([], dtype=str)
error_date = pd.Series([], dtype=str)

for json_file in json_list:
        
    with open(f'json_files/{json_file}') as file: 
        json_file = json.load(file)
    
    ticker, sector, industry = f.extract_stock_details(json_file)
    price_data = f.extract_price_data(json_file)
              
    for years_back in range(5):
                
        try:
            inc_data, cas_data, bal_data = f.extract_single_set_of_financial_statements(json_file, years_back)
        except IndexError:
            print(f'Error: {ticker} no data for {years_back} year back')
            
            with open('./error-lists/years-back.csv', 'a') as file:
                file.write(f'{ticker},{years_back}\n')

            break
   
        earnings_date = inc_data['fiscalDateEnding']
        currency = inc_data['reportedCurrency']

        first_date = datetime.datetime.strptime(earnings_date, '%Y-%m-%d')
        weekday = int(first_date.strftime('%w')) # Sunday is 0 and Saturday is 6. 

        first_date = f.changes_date_to_following_friday(weekday, first_date)

        # place of consideration: what dates should be used? (logical decision)
        # has to be a multiple of 7 (Ex. 7 * 48 = 336)
        # second_date should be before next years annual earnings announcement
        second_date = first_date + datetime.timedelta(336)

        first_date = first_date.strftime('%Y-%m-%d')
        second_date = second_date.strftime('%Y-%m-%d')
       

        try: 
            first_price = price_data['Weekly Adjusted Time Series'][first_date]['5. adjusted close']
            second_price = price_data['Weekly Adjusted Time Series'][second_date]['5. adjusted close']
        except KeyError: 
            # len(price_data) should return 2, should return 0 if a valid api call was not performed, should return 1 if api call limit has been reached
            print(f"Error: {len(price_data)} price_data does not contain {first_date} or {second_date} \t\t ({ticker})")
        
            with open('./error-lists/price-date.csv', 'a') as file: 
                file.write(f'{ticker},{years_back}\n')
                
            continue

        first_price = float(first_price)
        second_price = float(second_price)

        perc_change = (second_price - first_price) / first_price

        df_details = pd.DataFrame({'ticker': ticker, 
                                   'sector': sector, 
                                   'industry': industry, 
                                   'earnings_date': earnings_date, 
                                   'currency': currency, 
                                   'current_price': first_price, 
                                   'following_price': second_price,
                                   'first_price_date': first_date,
                                   'second_price_date': second_date, 
                                   'percent_change': perc_change}, 
                                  index=[0])

        df_inc = pd.DataFrame(inc_data, index=[0])
        df_inc.drop(['fiscalDateEnding', 'reportedCurrency'], axis=1, inplace=True)

        df_bal = pd.DataFrame(bal_data, index=[0])
        df_bal.drop(['fiscalDateEnding', 'reportedCurrency'], axis=1, inplace=True)

        df_cas = pd.DataFrame(cas_data, index=[0])
        df_cas.drop(['fiscalDateEnding', 'reportedCurrency', 'netIncome'], axis=1, inplace=True)
        
        df_all = pd.concat([df_details, df_inc, df_cas, df_bal], axis=1)
         
        # replaces any None values which are interpreted as a string which can not be changed to floats and then changes respective columns to floats
        df_all.iloc[:, 9:] = df_all.iloc[:, 9:].replace('None', np.nan).astype(float)

        df_all.to_csv(f'./stock-sets/{ticker}.csv', mode='a', header=False, index=False)
        df_all.to_csv(f'./master-df.csv', mode='a', header=False, index=False)

Error: 2 price_data does not contain 2023-01-06 or 2023-12-08 		 (META)
Error: 2 price_data does not contain 2023-01-06 or 2023-12-08 		 (TSLA)
Error: 2 price_data does not contain 2023-01-06 or 2023-12-08 		 (TSM)
Error: 2 price_data does not contain 2022-10-07 or 2023-09-08 		 (AAPL)
Error: 2 price_data does not contain 2023-01-06 or 2023-12-08 		 (AMZN)
Error: 2 price_data does not contain 2023-01-06 or 2023-12-08 		 (GOOG)
Error: 2 price_data does not contain 2023-01-06 or 2023-12-08 		 (GOOGL)
Error: 2 price_data does not contain 2023-01-06 or 2023-12-08 		 (JNJ)
Error: 2 price_data does not contain 2023-01-06 or 2023-12-08 		 (UNH)
Error: 2 price_data does not contain 2022-07-08 or 2023-06-09 		 (MSFT)


In [39]:
with open('columns.csv', 'w') as file: 
    for col in df_all.columns.to_list(): 
        file.write(f'{col},')