**<div style="text-align: right;">Uqaily, Raafay</div>**


**FINM 36000 - Project Lab**
<br>
Autumn 2024

<h2><center> Bank of America: Leveraging LLMs for Financial Data Analysis & Risk Management </center></h2>

<h2> 1. Introduction </h2>
This notebook includes code for data extraction phase of the project from NASDAQ/Quandl.

<h3> Import Statements </h3>

In [1]:
import os
import quandl
import zipfile
import datetime
import warnings
import scipy as sp
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from scipy.stats import zscore
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
from pandas_market_calendars import get_calendar

<h3> API Key </h3>

In [2]:
api_key = '6SrNeGHs_yp2pmDei8FV'

<h3> Helper Functions </h3>

In [3]:
import shutil

def grab_quandl_table(
    table_path,
    avoid_download=False,
    replace_existing=False,
    date_override=None,
    allow_old_file=False,
    **kwargs,
):
    # change your file path
    root_data_dir = r"E:\\【Chicago】\\[Autumn Semester\\Project Lab\\Database\\2024 data\\NASDAQ data"
    data_symlink = os.path.join(root_data_dir, f"{table_path}_latest.zip")
    if avoid_download and os.path.exists(data_symlink):
        print(f"Skipping any possible download of {table_path}")
        return data_symlink
    
    table_dir = os.path.dirname(data_symlink)
    if not os.path.isdir(table_dir):
        print(f'Creating new data dir {table_dir}')
        os.mkdir(table_dir)

    if date_override is None:
        my_date = datetime.datetime.now().strftime("%Y%m%d")
    else:
        my_date = date_override
    data_file = os.path.join(root_data_dir, f"{table_path}_{my_date}.zip")

    if os.path.exists(data_file):
        file_size = os.stat(data_file).st_size
        if replace_existing or not file_size > 0:
            print(f"Removing old file {data_file} size {file_size}")
        else:
            print(
                f"Data file {data_file} size {file_size} exists already, no need to download"
            )
            return data_file

    dl = quandl.export_table(
        table_path, filename=data_file, api_key=api_key, **kwargs
    )
    file_size = os.stat(data_file).st_size
    if os.path.exists(data_file) and file_size > 0:
        print(f"Download finished: {file_size} bytes")
        if not date_override:
            if os.path.exists(data_symlink):
                print(f"Removing old symlink or file")
                os.remove(data_symlink)
            print(f"Copying file: {data_file} to {data_symlink}")
            shutil.copy2(data_file, data_symlink)
    else:
        print(f"Data file {data_file} failed download")
        return
    return data_symlink if (date_override is None or allow_old_file) else "NoFileAvailable"


def fetch_quandl_table(table_path, avoid_download=True, **kwargs):
    zip_file_path = grab_quandl_table(table_path, avoid_download=avoid_download, **kwargs)
    try:
        with zipfile.ZipFile(zip_file_path) as zip_file:
            print("Contents of the zip file:")
            print(zip_file.namelist())
    except zipfile.BadZipFile:
        print(f"Error: {zip_file_path} is not a valid zip file.")

    return pd.read_csv(zip_file_path)

<h2> 2. Data Extraction and Cleaning </h2>

<span style="color: rgb(0, 0, 128);"> The code below utilizes the pandas_market_calendars python package to determine the number of trading days between 1st January, 2024 to 30th September, 2024. </span>

In [4]:
start_date = '2006-01-01'
end_date = '2024-11-25'

# Use the NYSE calendar for reference
nyse = get_calendar('XNYS')
trading_days = nyse.valid_days(start_date=start_date, end_date=end_date)
num_trading_days = len(trading_days)

print("Number of trading days over the entire period:", num_trading_days, "days.")

Number of trading days over the entire period: 4757 days.


### Zacks FC Data Table

<span style="color: rgb(0, 0, 128);"> The code below uses the fetch_quandl_table function to pull data for the ZACKS/FC data table from QUANDL. It then formats the per_end_date column in datetime and filters the table to only include data between the start and end dates specified above. The rows are sorted by per_end_date column. Any ticker which has missing filling date data was also excluded  and the index is reset. </span>

In [5]:
# change your file path
directory = 'E:\\【Chicago】\\[Autumn Semester\\Project Lab\\Database'

os.makedirs(directory, exist_ok=True)

t_zacks_fc = fetch_quandl_table('ZACKS/FC', avoid_download=False)

pd.to_datetime(t_zacks_fc.per_end_date, format='%Y-%m-%d')

t_zacks_fc = t_zacks_fc[(t_zacks_fc.per_end_date >= start_date) & (t_zacks_fc.per_end_date <= end_date)]
t_zacks_fc = t_zacks_fc.sort_values(by=['per_end_date'])

keep_cols = ['ticker', 'comp_name', 'exchange', 'per_end_date', 'per_type', 'filing_date',
            'filing_type', 'zacks_sector_code', 'eps_diluted_net', 'basic_net_eps', 
            'tot_lterm_debt', 'net_lterm_debt', 'net_curr_debt', 'tot_revnu']
t_zacks_fc = t_zacks_fc[keep_cols]

t_zacks_fc.tail()

Creating new data dir E:\\【Chicago】\\[Autumn Semester\\Project Lab\\Database\\2024 data\\NASDAQ data\ZACKS
Download finished: 198768139 bytes
Copying file: E:\\【Chicago】\\[Autumn Semester\\Project Lab\\Database\\2024 data\\NASDAQ data\ZACKS/FC_20241125.zip to E:\\【Chicago】\\[Autumn Semester\\Project Lab\\Database\\2024 data\\NASDAQ data\ZACKS/FC_latest.zip
Contents of the zip file:
['ZACKS_FC_2_76e4bece47ce87cb8f221f639c7f829b.csv']


Unnamed: 0,ticker,comp_name,exchange,per_end_date,per_type,filing_date,filing_type,zacks_sector_code,eps_diluted_net,basic_net_eps,tot_lterm_debt,net_lterm_debt,net_curr_debt,tot_revnu
100668,CATO,Cato Corporation,NYSE,2024-10-31,Q,,PRELIM,3.0,-0.79,,,,,146.17
495441,SCVL,Shoe Carnival,NASDAQ,2024-10-31,Q,,PRELIM,3.0,0.7,,317.679,,,306.885
27988,AMAT,Applied Materials,NASDAQ,2024-10-31,Q,,PRELIM,10.0,2.09,,5460.0,592.0,1.0,7045.0
145294,CSCO,Cisco Systems,NASDAQ,2024-10-31,Q,,PRELIM,10.0,0.68,,19623.0,911.0,68.0,13841.0
404375,NVDA,NVIDIA,NASDAQ,2024-10-31,Q,2024-11-20,10-Q,10.0,0.78,,8462.0,-1250.0,,35082.0


In [6]:
# Group by ticker and remove rows with missing values in filing_date, exchange
ticker_nan_count = t_zacks_fc.groupby('ticker')['filing_date'].apply(lambda x: x.isna().sum())
shortlisted_tickers = ticker_nan_count[ticker_nan_count == 0].index
t_zacks_fc = t_zacks_fc[t_zacks_fc['ticker'].isin(shortlisted_tickers)]
t_zacks_fc.reset_index(inplace=True)
exchange_nan_count = t_zacks_fc.groupby('ticker')['exchange'].apply(lambda x: x.isna().sum())
exchange_tickers = exchange_nan_count[exchange_nan_count == 0].index
t_zacks_fc = t_zacks_fc[t_zacks_fc['ticker'].isin(exchange_tickers)]

# Create the eps_check column and remove the rows where all the data in the EPS column is missing
t_zacks_fc['eps_check'] = ~t_zacks_fc['eps_diluted_net'].isna() | ~t_zacks_fc['basic_net_eps'].isna()
eps_check_false_count = t_zacks_fc.groupby('ticker')['eps_check'].apply(lambda x: (x == False).sum())
eps_check_tickers = eps_check_false_count[eps_check_false_count == 0].index
t_zacks_fc = t_zacks_fc[t_zacks_fc['ticker'].isin(eps_check_tickers)]

# Filter out tickers where both net_lterm_debt and tot_lterm_debt are entirely missing across all rows
debt_check_all_nan = t_zacks_fc.groupby('ticker').apply(
    lambda x: x['net_lterm_debt'].isna().all() and x['tot_lterm_debt'].isna().all()
)
debt_check_tickers = debt_check_all_nan[~debt_check_all_nan].index
t_zacks_fc = t_zacks_fc[t_zacks_fc['ticker'].isin(debt_check_tickers)]

# Drop unnecessary columns, create consolidated columns by filling missing values, and clean up redundant columns
t_zacks_fc = t_zacks_fc.drop(columns=['eps_check', 'tot_revnu', 'net_curr_debt'])

t_zacks_fc['lterm_debt_net_tot'] = t_zacks_fc['net_lterm_debt'].fillna(t_zacks_fc['tot_lterm_debt'])
t_zacks_fc['eps_diluted_net_basic'] = t_zacks_fc['eps_diluted_net'].fillna(t_zacks_fc['basic_net_eps'])

t_zacks_fc = t_zacks_fc.drop(columns=['index', 'net_lterm_debt', 'tot_lterm_debt', 'eps_diluted_net', 'basic_net_eps'])

t_zacks_fc.tail()


Unnamed: 0,ticker,comp_name,exchange,per_end_date,per_type,filing_date,filing_type,zacks_sector_code,lterm_debt_net_tot,eps_diluted_net_basic
266479,LNW,Light & Wonder,NASDAQ,2024-09-30,Q,2024-11-12,10-Q,2.0,-5.0,0.71
266481,IDA,IDACORP,NYSE,2024-09-30,Q,2024-10-31,10-Q,14.0,299.814,2.12
266483,GWW,W.W. Grainger,NYSE,2024-09-30,Q,2024-10-31,10-Q,7.0,465.0,9.87
266484,INTU,Intuit,NASDAQ,2024-10-31,Q,2024-11-21,10-Q,10.0,5625.0,0.7
266485,NVDA,NVIDIA,NASDAQ,2024-10-31,Q,2024-11-20,10-Q,10.0,-1250.0,0.78


In [7]:
zacks_fc_tickers = t_zacks_fc['ticker'].nunique()
print("Unique number of tickers in Zacks FC Table:", zacks_fc_tickers)

Unique number of tickers in Zacks FC Table: 1803


In [8]:
t_zacks_fc[t_zacks_fc['ticker'] == 'AAPL']

Unnamed: 0,ticker,comp_name,exchange,per_end_date,per_type,filing_date,filing_type,zacks_sector_code,lterm_debt_net_tot,eps_diluted_net_basic
2629,AAPL,Apple,NASDAQ,2006-03-31,Q,2006-05-05,10-Q,10.0,,0.0168
8568,AAPL,Apple,NASDAQ,2006-06-30,Q,2006-12-29,10-Q,10.0,,0.0193
13220,AAPL,Apple,NASDAQ,2006-09-30,Q,2006-12-29,10-Q,10.0,,0.0218
13290,AAPL,Apple,NASDAQ,2006-09-30,A,2006-12-29,10-K,10.0,,0.0811
20761,AAPL,Apple,NASDAQ,2006-12-31,Q,2007-02-02,10-Q,10.0,,0.0407
...,...,...,...,...,...,...,...,...,...,...
257471,AAPL,Apple,NASDAQ,2023-12-31,Q,2024-02-02,10-Q,10.0,95088.0,2.1800
261227,AAPL,Apple,NASDAQ,2024-03-31,Q,2024-05-03,10-Q,10.0,-3150.0,1.5300
264487,AAPL,Apple,NASDAQ,2024-06-30,Q,2024-08-02,10-Q,10.0,-7400.0,1.4000
265116,AAPL,Apple,NASDAQ,2024-09-30,Q,2024-11-01,10-Q,10.0,-9958.0,0.9700


In [9]:
# t_zacks_fc.to_csv('t_zacks_fc_2024.csv', index=False)

In [10]:
# t_zacks_fc.to_parquet('t_zacks_fc.parquet', index=False)

### Zacks FR Data Table

<span style="color: rgb(0, 0, 128);"> The code below uses the fetch_quandl_table function to pull data for the ZACKS/FR data table from QUANDL. It then formats the per_end_date column in datetime and filters the table to only include data between the start and end dates specified above. The rows are sorted by per_end_date column. Then, selected columns are kept while all others are dropped from the dataframe. Then, any tickers which are not in the filtered Zacks/FC table are dropped and the index is reset. </span>

In [11]:
t_zacks_fr = fetch_quandl_table('ZACKS/FR', avoid_download=False)

pd.to_datetime(t_zacks_fr.per_end_date, format='%Y-%m-%d')

t_zacks_fr = t_zacks_fr[(t_zacks_fr.per_end_date >= start_date) & (t_zacks_fr.per_end_date <= end_date)]
t_zacks_fr = t_zacks_fr.sort_values(by=['per_end_date'])

Download finished: 40859841 bytes
Copying file: E:\\【Chicago】\\[Autumn Semester\\Project Lab\\Database\\2024 data\\NASDAQ data\ZACKS/FR_20241125.zip to E:\\【Chicago】\\[Autumn Semester\\Project Lab\\Database\\2024 data\\NASDAQ data\ZACKS/FR_latest.zip
Contents of the zip file:
['ZACKS_FR_2_f40c6a304f87d9f492c1f21839d474e2.csv']


In [12]:
# Select only the necessary columns, filter rows in t_zacks_fr to match tickers in t_zacks_fc, and reset the index
keep_cols = ['ticker', 'per_end_date', 'per_type', 'ret_invst',
            'tot_debt_tot_equity', 'free_cash_flow_per_share']
t_zacks_fr = t_zacks_fr[keep_cols]

unique_tickers = t_zacks_fc['ticker'].unique()
t_zacks_fr = t_zacks_fr.loc[t_zacks_fr['ticker'].isin(unique_tickers)]
t_zacks_fr.reset_index(inplace=True)

# Group by ticker and check if tot_debt_tot_equity is empty in all groups, if true, then delete all
tot_debt_tot_equity_all_nan = t_zacks_fr.groupby('ticker')['tot_debt_tot_equity'].apply(lambda x: x.isna().all())

valid_tickers = tot_debt_tot_equity_all_nan[~tot_debt_tot_equity_all_nan].index
t_zacks_fr = t_zacks_fr[t_zacks_fr['ticker'].isin(valid_tickers)]

# Filter out tickers with any missing values in the 'ret_invst' column and drop unnecessary columns 'index' and 'free_cash_flow_per_share'
ret_invst_nan_count = t_zacks_fr.groupby('ticker')['ret_invst'].apply(lambda x: x.isna().sum())
ret_invst_tickers = ret_invst_nan_count[ret_invst_nan_count == 0].index
t_zacks_fr = t_zacks_fr[t_zacks_fr['ticker'].isin(ret_invst_tickers)]

t_zacks_fr = t_zacks_fr.drop(columns=['index', 'free_cash_flow_per_share'])

t_zacks_fr.tail()

Unnamed: 0,ticker,per_end_date,per_type,ret_invst,tot_debt_tot_equity
158984,ABG,2024-09-30,Q,1.8959,1.448
158985,BHB,2024-09-30,Q,1.7253,0.5366
158986,FE,2024-09-30,Q,1.3203,1.7313
158988,INTU,2024-10-31,Q,0.8291,0.3377
158989,NVDA,2024-10-31,Q,25.9666,0.1284


In [13]:
zacks_fr_tickers = t_zacks_fr['ticker'].nunique()
print("Unique number of tickers in Zacks FR Table:", zacks_fr_tickers)

Unique number of tickers in Zacks FR Table: 1754


In [14]:
t_zacks_fr[t_zacks_fr['ticker'] == 'AAPL']

Unnamed: 0,ticker,per_end_date,per_type,ret_invst,tot_debt_tot_equity
1306,AAPL,2006-03-31,Q,4.7224,
3130,AAPL,2006-06-30,Q,5.0589,
4964,AAPL,2006-09-30,Q,5.4287,
4978,AAPL,2006-09-30,A,19.9219,
7524,AAPL,2006-12-31,Q,8.9419,
...,...,...,...,...,...
153013,AAPL,2023-12-31,Q,20.0463,1.4580
154603,AAPL,2024-03-31,Q,14.2364,1.4097
156235,AAPL,2024-06-30,Q,14.0271,1.5186
157585,AAPL,2024-09-30,Q,10.3266,1.8723


In [15]:
# t_zacks_fr.to_csv('t_zacks_fr_2024.csv', index=False)

In [16]:
# t_zacks_fr.to_parquet('t_zacks_fr.parquet', index=False)

### Zacks MKTV Data Table

<span style="color: rgb(0, 0, 128);"> The code below uses the fetch_quandl_table function to pull data for the ZACKS/MKTV data table from QUANDL. It then formats the per_end_date column in datetime and filters the table to only include data between the start and end dates specified above. The rows are sorted by per_end_date column. Then, selected columns are kept while all others are dropped from the dataframe. Then, any tickers which are not in the filtered Zacks/FC table are dropped and the index is reset. </span>

In [17]:
t_zacks_mktv = fetch_quandl_table('ZACKS/MKTV', avoid_download=False)

pd.to_datetime(t_zacks_mktv.per_end_date, format='%Y-%m-%d')

t_zacks_mktv = t_zacks_mktv[(t_zacks_mktv.per_end_date >= start_date) & (t_zacks_mktv.per_end_date <= end_date)]
t_zacks_mktv = t_zacks_mktv.sort_values(by=['per_end_date'])


t_zacks_mktv.tail()

Download finished: 7340154 bytes
Copying file: E:\\【Chicago】\\[Autumn Semester\\Project Lab\\Database\\2024 data\\NASDAQ data\ZACKS/MKTV_20241125.zip to E:\\【Chicago】\\[Autumn Semester\\Project Lab\\Database\\2024 data\\NASDAQ data\ZACKS/MKTV_latest.zip
Contents of the zip file:
['ZACKS_MKTV_2_ecb7f768974bbdd26964caefe2fd0378.csv']


Unnamed: 0,ticker,m_ticker,comp_name,fye,per_type,per_end_date,active_ticker_flag,mkt_val,ep_val
595049,LWINQ,LWIN,LEAP WIRELS-OLD,12,Q,2024-09-30,N,,
595124,LWLW,EXGP,LONGWEN GROUP,12,Q,2024-09-30,Y,9.3,9.15
595199,LWNTF,ACF&,LANGUAGEWARE,12,Q,2024-09-30,N,,
595349,LX,LX,LEXINFINTEC HLD,12,Q,2024-09-30,Y,446.43,
547874,KLIC,KSI,KULICKE & SOFFA,9,Q,2024-09-30,Y,2460.0,1882.86


In [18]:
keep_cols = ['ticker', 'per_end_date', 'per_type', 'mkt_val']
t_zacks_mktv = t_zacks_mktv[keep_cols]

t_zacks_mktv = t_zacks_mktv.loc[t_zacks_mktv['ticker'].isin(unique_tickers)]
t_zacks_mktv.reset_index(inplace=True)

# Filter out tickers with any missing values in the 'mkt_val' column from t_zacks_mktv, then drop the 'index' column to clean up the data.
mkt_val_nan_count = t_zacks_mktv.groupby('ticker')['mkt_val'].apply(lambda x: x.isna().sum())
mkt_val_tickers = mkt_val_nan_count[mkt_val_nan_count == 0].index
t_zacks_mktv = t_zacks_mktv[t_zacks_mktv['ticker'].isin(mkt_val_tickers)]

t_zacks_mktv = t_zacks_mktv.drop(columns=['index'])

In [19]:
zacks_mktv_tickers = t_zacks_mktv['ticker'].nunique()
print("Unique number of tickers in Zacks MKTV Table:", zacks_mktv_tickers)

Unique number of tickers in Zacks MKTV Table: 1516


In [20]:
t_zacks_mktv[t_zacks_mktv['ticker'] == 'AAPL']

Unnamed: 0,ticker,per_end_date,per_type,mkt_val
1598,AAPL,2006-03-31,Q,53417.28
3329,AAPL,2006-06-30,Q,48708.63
3624,AAPL,2006-09-30,Q,65663.06
6786,AAPL,2006-12-31,Q,72900.84
7168,AAPL,2007-03-31,Q,80076.88
...,...,...,...,...
125194,AAPL,2023-09-30,Q,2662786.75
125513,AAPL,2023-12-31,Q,2973025.25
127156,AAPL,2024-03-31,Q,2629488.25
130197,AAPL,2024-06-30,Q,3202295.25


In [21]:
# t_zacks_mktv.to_csv('t_zacks_mktv_2024.csv', index=False)

In [22]:
# t_zacks_mktv.to_parquet('t_zacks_mktv.parquet', index=False)

### Zacks SHRS Data Table

<span style="color: rgb(0, 0, 128);"> The code below uses the fetch_quandl_table function to pull data for the ZACKS/SHRS data table from QUANDL. It then formats the per_end_date column in datetime and filters the table to only include data between the start and end dates specified above. The rows are sorted by per_end_date column. Then, selected columns are kept while all others are dropped from the dataframe. Then, any tickers which are not in the filtered Zacks/FC table are dropped and the index is reset. </span>

In [23]:
t_zacks_shrs = fetch_quandl_table('ZACKS/SHRS', avoid_download=False)

pd.to_datetime(t_zacks_shrs.per_end_date, format='%Y-%m-%d')

t_zacks_shrs = t_zacks_shrs[(t_zacks_shrs.per_end_date >= start_date) & (t_zacks_shrs.per_end_date <= end_date)]
t_zacks_shrs = t_zacks_shrs.sort_values(by=['per_end_date'])


t_zacks_shrs.tail()

Download finished: 5553773 bytes
Copying file: E:\\【Chicago】\\[Autumn Semester\\Project Lab\\Database\\2024 data\\NASDAQ data\ZACKS/SHRS_20241125.zip to E:\\【Chicago】\\[Autumn Semester\\Project Lab\\Database\\2024 data\\NASDAQ data\ZACKS/SHRS_latest.zip
Contents of the zip file:
['ZACKS_SHRS_2_99db6fa97ac677f3c0d45a9fa9a70196.csv']


Unnamed: 0,ticker,m_ticker,comp_name,fye,per_type,per_end_date,active_ticker_flag,shares_out,avg_d_shares
1029544,VIRC,VIR2,VIRCO MFG,1,Q,2024-10-31,Y,16.29,
368124,FIFS,FIFS,FIRST INVESTORS,4,Q,2024-10-31,N,,
112034,BBY,BBUY,BEST BUY,1,Q,2024-10-31,Y,214.73,
747427,PD,PD2,PAGERDUTY INC,1,Q,2024-10-31,Y,93.06,
754860,PFRMF,PMU,PACIFIC RIM MNG,4,Q,2024-10-31,N,,


In [24]:
# Select relevant columns from t_zacks_shrs, filter rows to include only tickers in unique_tickers, and reset the index.
# Remove tickers where the 'shares_out' column has any missing values, then drop the 'index' column for data cleanup.
keep_cols = ['ticker', 'per_end_date', 'per_type', 'shares_out']
t_zacks_shrs = t_zacks_shrs[keep_cols]

t_zacks_shrs = t_zacks_shrs.loc[t_zacks_shrs['ticker'].isin(unique_tickers)]
t_zacks_shrs.reset_index(inplace=True)

shares_out_nan_count = t_zacks_shrs.groupby('ticker')['shares_out'].apply(lambda x: x.isna().sum())
shares_out_tickers = shares_out_nan_count[shares_out_nan_count == 0].index
t_zacks_shrs = t_zacks_shrs[t_zacks_shrs['ticker'].isin(shares_out_tickers)]

t_zacks_shrs = t_zacks_shrs.drop(columns=['index'])

In [25]:
zacks_shrs_tickers = t_zacks_shrs['ticker'].nunique()
print("Unique number of tickers in Zacks SHRS Table:", zacks_shrs_tickers)

Unique number of tickers in Zacks SHRS Table: 1528


In [26]:
t_zacks_shrs[t_zacks_shrs['ticker'] == 'AAPL']

Unnamed: 0,ticker,per_end_date,per_type,shares_out
1512,AAPL,2006-03-31,Q,23847.00
3478,AAPL,2006-06-30,Q,23814.24
3884,AAPL,2006-09-30,Q,23883.68
5748,AAPL,2006-12-31,Q,24059.68
8430,AAPL,2007-03-31,Q,24132.52
...,...,...,...,...
124047,AAPL,2023-09-30,Q,15552.75
126138,AAPL,2023-12-31,Q,15441.88
127418,AAPL,2024-03-31,Q,15334.08
130013,AAPL,2024-06-30,Q,15204.14


In [27]:
# t_zacks_shrs.to_csv('t_zacks_shrs_2024.csv', index=False)

In [None]:
# t_zacks_shrs.to_parquet('t_zacks_shrs.parquet', index=False)

## ~ Still Needs Some Work Done/ IGNORE BELOW ~

### Zacks Data Table Combined

<span style="color: rgb(0, 0, 128);"> The code below first merges all 4 Zacks data tables into 1 by matching ticker, per_end_date, and per_type columns. Then, I start my filtering process. Firstly, any ticker that doesn't have an exchange listed is dropped. Then, any ticker with missing debt to equity, shares outstanding, return on investment, and market value data is dropped. Then, I make sure for each period end date, each ticker has either diluted or basic net EPS data available. If neither is available, the ticker is dropped. The same is also done to make sure that for each period end date, each ticker has either net long term debt or total long term debt data available. If neither is availble, the ticker is dropped. I also make sure we have complete data for the entire time period for each ticker. Finally, I created a new column for long term debt which is equal to lterm_debt_net and if there is no value, then it takes the corresponding value for lterm_debt_tot. Similarly, I created a new column for net eps which is equal to eps_diluted_net and if there is no value, then it takes the corresponding value for basic_eps_net. </span>

In [67]:
#t_zacks = t_zacks_fc.merge(t_zacks_fr, how = 'inner', left_on = ['ticker', 'per_end_date', 'per_type'], 
#                           right_on = ['ticker', 'per_end_date', 'per_type'], suffixes=('_fc', '_fr'))
#t_zacks = t_zacks.merge(t_zacks_mktv, how = 'inner', left_on = ['ticker', 'per_end_date', 'per_type'],
#                        right_on = ['ticker', 'per_end_date', 'per_type'], suffixes=('_z1', '_mktv'))
#t_zacks = t_zacks.merge(t_zacks_shrs, how = 'inner', left_on = ['ticker', 'per_end_date', 'per_type'], 
#                        right_on = ['ticker', 'per_end_date', 'per_type'], suffixes=('_z2', '_shrs'))

#t_zacks = t_zacks.drop(columns=['index_fc', 'index_fr', 'index_z2', 'index_shrs'])

#exchange_nan_count = t_zacks.groupby('ticker')['exchange'].apply(lambda x: x.isna().sum())
#exchange_tickers = exchange_nan_count[exchange_nan_count == 0].index
#t_zacks = t_zacks[t_zacks['ticker'].isin(exchange_tickers)]

#tot_debt_tot_equity_nan_count = t_zacks.groupby('ticker')['tot_debt_tot_equity'].apply(lambda x: x.isna().sum())
#tot_debt_tot_equity_tickers = tot_debt_tot_equity_nan_count[tot_debt_tot_equity_nan_count == 0].index
#t_zacks = t_zacks[t_zacks['ticker'].isin(tot_debt_tot_equity_tickers)]

#shares_out_nan_count = t_zacks.groupby('ticker')['shares_out'].apply(lambda x: x.isna().sum())
#shares_out_tickers = shares_out_nan_count[shares_out_nan_count == 0].index
#t_zacks = t_zacks[t_zacks['ticker'].isin(shares_out_tickers)]

#mkt_val_nan_count = t_zacks.groupby('ticker')['mkt_val'].apply(lambda x: x.isna().sum())
#mkt_val_tickers = mkt_val_nan_count[mkt_val_nan_count == 0].index
#t_zacks = t_zacks[t_zacks['ticker'].isin(mkt_val_tickers)]

#ret_invst_nan_count = t_zacks.groupby('ticker')['ret_invst'].apply(lambda x: x.isna().sum())
#ret_invst_tickers = ret_invst_nan_count[ret_invst_nan_count == 0].index
#t_zacks = t_zacks[t_zacks['ticker'].isin(ret_invst_tickers)]

#t_zacks['eps_check'] = ~t_zacks['eps_diluted_net'].isna() | ~t_zacks['basic_net_eps'].isna()
#eps_check_false_count = t_zacks.groupby('ticker')['eps_check'].apply(lambda x: (x == False).sum())
#eps_check_tickers = eps_check_false_count[eps_check_false_count == 0].index
#t_zacks = t_zacks[t_zacks['ticker'].isin(eps_check_tickers)]

#t_zacks['debt_check'] = ~t_zacks['net_lterm_debt'].isna() | ~t_zacks['tot_lterm_debt'].isna()
#debt_check_false_count = t_zacks.groupby('ticker')['debt_check'].apply(lambda x: (x == False).sum())
#debt_check_tickers = debt_check_false_count[debt_check_false_count == 0].index
#t_zacks = t_zacks[t_zacks['ticker'].isin(debt_check_tickers)]

#t_zacks = t_zacks.drop(columns=['eps_check', 'debt_check', 'tot_revnu', 'net_curr_debt', 'free_cash_flow_per_share'])

#t_zacks['lterm_debt_net_tot'] = t_zacks['net_lterm_debt'].fillna(t_zacks['tot_lterm_debt'])
#t_zacks['net_eps_diluted_basic'] = t_zacks['eps_diluted_net'].fillna(t_zacks['basic_net_eps'])

#t_zacks = t_zacks.drop(columns=['net_lterm_debt', 'tot_lterm_debt', 'eps_diluted_net', 'basic_net_eps'])

#t_zacks

Unnamed: 0,ticker,comp_name,exchange,per_end_date,per_type,filing_date,filing_type,zacks_sector_code,ret_invst,tot_debt_tot_equity,mkt_val,shares_out,lterm_debt_net_tot,net_eps_diluted_basic
4,HOG,HarleyDavidson,NYSE,2022-03-31,Q,2022-05-05,10-Q,5.0,3.1881,2.6358,5978.23,151.73,-325.7140,1.45
7,NEU,NewMarket,NYSE,2022-03-31,Q,2022-04-28,10-Q,6.0,3.7047,1.1065,3326.42,10.26,-306.0990,5.75
9,PTVE,Pactiv Evergreen,NASDAQ,2022-03-31,Q,2022-05-05,10-Q,2.0,0.7646,3.6459,1787.34,177.67,-6.0000,0.24
15,EVBN,Evans Bancorp,NYSE MKT (ex. AMEX),2022-03-31,Q,2022-04-29,10-Q,13.0,2.3716,0.3801,209.89,5.52,-5.8880,0.86
19,GCTS,"GCT Semiconductor Holding, Inc.",NYSE,2022-03-31,Q,2022-05-09,10-Q,10.0,-38.8615,-0.2029,428.66,43.13,5.3570,0.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41956,RTX,RTX Corporation,NYSE,2024-09-30,Q,2024-10-22,10-Q,11.0,1.5101,0.6710,161171.81,1330.24,-1700.0000,1.09
41957,KREF,KKR Real Estate Finance Trust,NYSE,2024-09-30,Q,2024-10-21,10-Q,13.0,-0.1093,3.7501,856.27,69.33,-779.8401,-0.19
41958,WINA,Winmark,NASDAQ,2024-09-30,Q,2024-10-16,10-Q,3.0,38.7652,-1.9530,1348.04,3.52,-3.1875,3.03
41959,PHM,PulteGroup,NYSE,2024-09-30,Q,2024-10-22,10-Q,8.0,5.2921,0.1857,29785.92,207.52,-325.9880,3.35


In [68]:
zacks_tickers = t_zacks['ticker'].nunique()
print("Unique number of tickers in Zacks Table:", zacks_tickers)

Unique number of tickers in Zacks Table: 2453


<span style="color: rgb(255,69,0);"> As shown above, starting from 55400 unique tickers in the t_zacks_fc dataframe, I was able to reduce that number down to 2453 unique tickers within our available universe. </span>

### Prices Data Table

<span style="color: rgb(0, 0, 128);"> The code below uses the fetch_quandl_table function to pull data for the QUOTEMEDIA/PRICES data table from QUANDL. It then formats the date column in datetime and filters the table to only include data between the start and end dates specified above. The rows are sorted by the date column. Then, selected columns are kept while all others are dropped from the dataframe. Then, I check to make sure each ticker has data for all 689 trading days within our timeframe of interest, and if not, that ticker is removed from the dataframe. </span>

In [69]:
zacks_tickers = t_zacks_fc['ticker'].unique()

t_price = fetch_quandl_table('QUOTEMEDIA/PRICES', ticker = list(zacks_tickers), avoid_download=False)

pd.to_datetime(t_price.date, format='%Y-%m-%d')

t_price = t_price[(t_price.date >= start_date) & (t_price.date <= end_date)]
t_price = t_price.sort_values(by=['date'])
t_price.reset_index(inplace=True)

keep_cols = ['ticker', 'date', 'adj_close']
t_price = t_price[keep_cols]

ticker_dates_count = t_price.groupby('ticker')['date'].nunique()
selected_tickers = ticker_dates_count[ticker_dates_count == 689].index
t_price = t_price[t_price['ticker'].isin(selected_tickers)]

t_price

Download finished: 592856417 bytes
Creating symlink: /Users/raafayuqaily/quandl_data_table_downloads/QUOTEMEDIA/PRICES_20241024.zip -> /Users/raafayuqaily/quandl_data_table_downloads/QUOTEMEDIA/PRICES_latest.zip
Contents of the zip file:
['QUOTEMEDIA_PRICES_51e9aa01f866906b898b4803faa4e3de.csv']


Unnamed: 0,ticker,date,adj_close
0,A,2022-01-03,153.564781
1,RDUS,2022-01-03,48.078330
2,HNNA,2022-01-03,9.135281
3,HNRG,2022-01-03,2.650000
4,RDNT,2022-01-03,29.820000
...,...,...,...
3058074,SONO,2024-09-30,12.290000
3058075,DUK,2024-09-30,115.300000
3058076,DTST,2024-09-30,3.770000
3058077,SOPA,2024-09-30,0.920000


In [70]:
price_tickers = t_price['ticker'].nunique()
print("Unique number of tickers in Prices Table:", price_tickers)

Unique number of tickers in Prices Table: 3724


### Merged Data Table

In [25]:
t_price['date'] = pd.to_datetime(t_price['date'])
t_zacks['filing_date'] = pd.to_datetime(t_zacks['filing_date'])

unique_tickers = t_price['ticker'].unique()

add_dates = ['2016-12-31', '2017-09-30', '2017-12-31', '2018-03-31', '2018-06-30', '2018-09-30', '2019-03-31', '2019-06-30',
             '2022-12-31', '2023-09-30']

weekend_dates = pd.DataFrame({'date': pd.to_datetime(add_dates)})

t_merged = pd.DataFrame()

for ticker in unique_tickers:
    # Filter DataFrame for the current ticker
    ticker_df = t_price[t_price['ticker'] == ticker]

    # Get existing dates for the current ticker
    existing_dates = ticker_df['date'].tolist()

    # Convert existing_dates to DataFrame
    existing_dates_df = pd.DataFrame({'date': existing_dates})

    # Concatenate existing_dates_df with weekend_dates
    all_dates = pd.concat([existing_dates_df, weekend_dates])

    # Sort the dates
    all_dates.sort_values('date', inplace=True)

    # Merge with the original DataFrame to forward fill adj_close
    merged_df = pd.merge(all_dates, ticker_df, on='date', how='left').ffill()

    # Concatenate the results to the final DataFrame
    t_merged = pd.concat([t_merged, merged_df], ignore_index=True)

t_merged = t_merged.merge(t_zacks, how='left', left_on=['ticker', 'date'], right_on=['ticker', 'filing_date']).fillna(method='ffill')

t_merged = t_merged.sort_values(by=['ticker', 'date'])
t_merged = t_merged.drop_duplicates(subset=['ticker', 'date'], keep='first')
ticker_dates_count = t_merged.groupby('ticker')['date'].nunique()
selected_tickers = ticker_dates_count[ticker_dates_count == 2023].index
t_merged = t_merged[t_merged['ticker'].isin(selected_tickers)]

per_end_date_nan_count = t_merged.groupby('ticker')['per_end_date'].apply(lambda x: x.isna().sum())
per_end_date_tickers = per_end_date_nan_count[per_end_date_nan_count == 0].index
t_merged = t_merged[t_merged['ticker'].isin(per_end_date_tickers)]

t_merged

Unnamed: 0,date,ticker,adj_close,comp_name,exchange,per_end_date,per_type,filing_date,filing_type,zacks_sector_code,ret_invst,tot_debt_tot_equity,mkt_val,shares_out,lterm_debt_net_tot,net_eps_diluted_basic
1246179,2015-10-01,AAOI,18.040000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28
1246180,2015-10-02,AAOI,18.170000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28
1246181,2015-10-05,AAOI,19.440000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28
1246182,2015-10-06,AAOI,19.080000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28
1246183,2015-10-07,AAOI,19.810000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
368184,2023-09-26,ZTS,176.049317,Zoetis,NYSE,2023-06-30,Q,2023-08-08,10-Q,4.0,5.9950,1.4190,79580.33,462.11,-1350.000,1.45
368185,2023-09-27,ZTS,173.869478,Zoetis,NYSE,2023-06-30,Q,2023-08-08,10-Q,4.0,5.9950,1.4190,79580.33,462.11,-1350.000,1.45
368186,2023-09-28,ZTS,173.202587,Zoetis,NYSE,2023-06-30,Q,2023-08-08,10-Q,4.0,5.9950,1.4190,79580.33,462.11,-1350.000,1.45
368187,2023-09-29,ZTS,173.172726,Zoetis,NYSE,2023-06-30,Q,2023-08-08,10-Q,4.0,5.9950,1.4190,79580.33,462.11,-1350.000,1.45


In [26]:
merged_tickers = t_merged['ticker'].nunique()
print("Unique number of tickers in Merged Table:", merged_tickers)

Unique number of tickers in Merged Table: 853


<span style="color: rgb(255,69,0);"> As shown above, the last row of the stock ZTS for 2023-09-30 is a period end date that falls on a weekend. However, using the ffill method, I was able to forward fill the adj close price for that equity, and all equities, from the trading day prior. </span>

<span style="color: rgb(0, 0, 128);"> Till now, I have successfully retrieved quarterly data and forward filled it for the entire timeframe. Here, a new column called 'per_end_date_adj_close' is created which lists the close price on the period end date for each row. </span>

In [27]:
t_merged1 = t_merged.copy()
t_merged1['date'] = pd.to_datetime(t_merged1['date'], format='%Y-%m-%d')
t_merged1['per_end_date'] = pd.to_datetime(t_merged1['per_end_date'], format='%Y-%m-%d')
t_merged1['filing_date'] = pd.to_datetime(t_merged1['filing_date'], format='%Y-%m-%d')

t_merged1.sort_values(by=['ticker', 'date'], inplace=True)

t_merged1 = t_merged1.merge(t_merged1[['date','ticker','adj_close']],how='left',left_on=['per_end_date','ticker'],
                            right_on=['date','ticker'])
t_merged1 = t_merged1.rename(columns={'date_x':'date','adj_close_x':'adj_close','adj_close_y':'per_end_date_adj_close'})
t_merged1 = t_merged1.drop(columns=['date_y'])
t_merged1

Unnamed: 0,date,ticker,adj_close,comp_name,exchange,per_end_date,per_type,filing_date,filing_type,zacks_sector_code,ret_invst,tot_debt_tot_equity,mkt_val,shares_out,lterm_debt_net_tot,net_eps_diluted_basic,per_end_date_adj_close
0,2015-10-01,AAOI,18.040000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28,5.960000
1,2015-10-02,AAOI,18.170000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28,5.960000
2,2015-10-05,AAOI,19.440000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28,5.960000
3,2015-10-06,AAOI,19.080000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28,5.960000
4,2015-10-07,AAOI,19.810000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28,5.960000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1725614,2023-09-26,ZTS,176.049317,Zoetis,NYSE,2023-06-30,Q,2023-08-08,10-Q,4.0,5.9950,1.4190,79580.33,462.11,-1350.000,1.45,171.037191
1725615,2023-09-27,ZTS,173.869478,Zoetis,NYSE,2023-06-30,Q,2023-08-08,10-Q,4.0,5.9950,1.4190,79580.33,462.11,-1350.000,1.45,171.037191
1725616,2023-09-28,ZTS,173.202587,Zoetis,NYSE,2023-06-30,Q,2023-08-08,10-Q,4.0,5.9950,1.4190,79580.33,462.11,-1350.000,1.45,171.037191
1725617,2023-09-29,ZTS,173.172726,Zoetis,NYSE,2023-06-30,Q,2023-08-08,10-Q,4.0,5.9950,1.4190,79580.33,462.11,-1350.000,1.45,171.037191


<span style="color: rgb(0, 0, 128);"> Moreover, it is very important to note that period data, while released on a filing date, is typically released towards the end of the day. Hence, on any given filing date, we should use data from the period prior. To address this issue, I loop through the dataframe and for all filing dates, I replace data with that from the row above (which includes data from the previous reporting period as my data is sorted by date). This process has been vectorized to expedite processing time. Additionally, the start and end dates of the dataframe have been adjusted to reflect the actual timeframe of interest). </span>

In [28]:
columns_to_replace = ['per_end_date', 'ret_invst', 'tot_debt_tot_equity', 'mkt_val', 'shares_out',
                      'lterm_debt_net_tot', 'net_eps_diluted_basic', 'per_end_date_adj_close']

date_equal_filing_mask = t_merged1['date'] == t_merged1['filing_date']
ticker_equal_previous_mask = t_merged1['ticker'] == t_merged1['ticker'].shift()

for col in columns_to_replace:
    t_merged1[col] = t_merged1[col].mask(date_equal_filing_mask & ticker_equal_previous_mask,
                                        t_merged1[col].shift())

t_merged1 = t_merged1.drop_duplicates(subset=['date', 'ticker'], keep='first')
t_merged1 = t_merged1[(t_merged1['date'] >= mod_start_date) & (t_merged1['date'] <= mod_end_date)]
t_merged1

Unnamed: 0,date,ticker,adj_close,comp_name,exchange,per_end_date,per_type,filing_date,filing_type,zacks_sector_code,ret_invst,tot_debt_tot_equity,mkt_val,shares_out,lterm_debt_net_tot,net_eps_diluted_basic,per_end_date_adj_close
64,2016-01-04,AAOI,16.290000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28,5.960000
65,2016-01-05,AAOI,16.080000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28,5.960000
66,2016-01-06,AAOI,16.050000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28,5.960000
67,2016-01-07,AAOI,15.290000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28,5.960000
68,2016-01-08,AAOI,14.790000,Matthews International,NASDAQ,2023-06-30,Q,2023-07-28,10-Q,1.0,0.6702,1.4852,1298.53,30.47,-31.442,0.28,5.960000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1725550,2023-06-26,ZTS,166.398995,Zoetis,NYSE,2023-03-31,Q,2023-05-04,10-Q,4.0,4.9864,1.4611,76913.95,462.11,-1350.000,1.19,164.951802
1725551,2023-06-27,ZTS,169.507679,Zoetis,NYSE,2023-03-31,Q,2023-05-04,10-Q,4.0,4.9864,1.4611,76913.95,462.11,-1350.000,1.19,164.951802
1725552,2023-06-28,ZTS,167.948371,Zoetis,NYSE,2023-03-31,Q,2023-05-04,10-Q,4.0,4.9864,1.4611,76913.95,462.11,-1350.000,1.19,164.951802
1725553,2023-06-29,ZTS,170.699506,Zoetis,NYSE,2023-03-31,Q,2023-05-04,10-Q,4.0,4.9864,1.4611,76913.95,462.11,-1350.000,1.19,164.951802


In [29]:
merged_tickers = t_merged1['ticker'].nunique()
print("Unique number of tickers in Merged Table:", merged_tickers)

Unique number of tickers in Merged Table: 853


### Daily Ratios Table

<span style="color: rgb(0, 0, 128);"> The code below calculates the following daily financial accounting ratios for all tickers within our current universe. 
1. **Debt to Market Cap Ratio**: $\frac{\text{tot\_debt\_tot\_equity} \times \text{per\_end\_date\_adj\_close}}{\text{adj\_close}}$

2. **Return on Investment**: $\frac{\text{ret\_invst} \times (\text{lterm\_debt\_net\_tot} + \text{mkt\_val})}
{(\text{lterm\_debt\_net\_tot} + \text{mkt\_val} \times \text{per\_end\_date\_adj\_close / adj\_close})}$


3. **Price to Earnings Ratio**: $\frac{\text{adj\_close}}{\text{net\_eps\_diluted\_basic}}$


In [30]:
t_ratios = t_merged1.copy()

t_ratios['ratio'] = t_ratios['adj_close'] / t_ratios['per_end_date_adj_close']
t_ratios['debt_to_market_cap_ratio'] = t_ratios['tot_debt_tot_equity'] * 1/t_ratios['ratio']
t_ratios['return_on_investment_ratio'] = (t_ratios['ret_invst'] * (t_ratios['lterm_debt_net_tot'] + t_ratios['mkt_val']) /
    (t_ratios['lterm_debt_net_tot'] + t_ratios['mkt_val'] * t_ratios['ratio']))
t_ratios['price_to_earnings_ratio'] = t_ratios['adj_close'] / t_ratios['net_eps_diluted_basic']

keep = ['date', 'ticker', 'adj_close', 'per_end_date', 'debt_to_market_cap_ratio', 'return_on_investment_ratio', 'price_to_earnings_ratio'
        , 'zacks_sector_code', 'mkt_val']
t_ratios = t_ratios[keep]
t_ratios

Unnamed: 0,date,ticker,adj_close,per_end_date,debt_to_market_cap_ratio,return_on_investment_ratio,price_to_earnings_ratio,zacks_sector_code,mkt_val
64,2016-01-04,AAOI,16.290000,2023-06-30,0.543388,0.241406,58.178571,1.0,1298.53
65,2016-01-05,AAOI,16.080000,2023-06-30,0.550485,0.244588,57.428571,1.0,1298.53
66,2016-01-06,AAOI,16.050000,2023-06-30,0.551514,0.245049,57.321429,1.0,1298.53
67,2016-01-07,AAOI,15.290000,2023-06-30,0.578927,0.257345,54.607143,1.0,1298.53
68,2016-01-08,AAOI,14.790000,2023-06-30,0.598498,0.266131,52.821429,1.0,1298.53
...,...,...,...,...,...,...,...,...,...
1725550,2023-06-26,ZTS,166.398995,2023-03-31,1.448393,4.942265,139.831088,4.0,76913.95
1725551,2023-06-27,ZTS,169.507679,2023-03-31,1.421830,4.850051,142.443427,4.0,76913.95
1725552,2023-06-28,ZTS,167.948371,2023-03-31,1.435031,4.895871,141.133085,4.0,76913.95
1725553,2023-06-29,ZTS,170.699506,2023-03-31,1.411903,4.815604,143.444963,4.0,76913.95
