# Testing Read Times
* Will be looking at passing varying numbers of assets and times to the method get_asset_data

In [1]:
import pandas as pd

from db_access.asset_query import AssetQuery
from db_access.db_query import ReadDB
from api.mongo_connection import MongoAPI
import datetime

In [2]:
# making our api connection
api = MongoAPI('DS4300')

# making our class to construct our queries
reader = ReadDB(api)

In [3]:
# reading a csv with the sp500 components since 2010
sp500 = pd.read_csv('/Users/alex/Documents/Notebook/improved_momentum/data/index/sp500_2010.csv',usecols=['symbol'])

sp500

Unnamed: 0,symbol
0,EIX
1,1284849D
2,FE
3,AA
4,AXP
...,...
716,IR
717,POOL
718,VNT
719,TSLA


In [4]:
start = pd.Timestamp(year=2010, month=1, day=1)
end = pd.Timestamp(year=2021, month=1, day=1)

In [5]:
def test_time(assets, id, start_date, end_date):
    start = datetime.datetime.now()
    query = reader.get_asset_data(assets=assets,
                ts_fields=["prccd", "prchd", "prcld", "prcod"], # timeseries fields we want
                static_fields=['tic', 'conm', 'weburl'], # static fields
                start = start_date, # start date
                end = end_date, # end date
                search_by=id # in this query we are using ticker as our asset id
                )
    results = query.df
    total_time = datetime.datetime.now() - start
    print(f'Took {total_time.total_seconds()} seconds to get fetch results and format DataFrame')
    print('\nInfo on returned DataFrame:')
    print(results.info())

### Testing the speed for fetching ALL timeseries data for 721 unique tickers
* The query took under 35 seconds.

In [6]:
test_time(sp500['symbol'].tolist(), 'tic', start, end)

Took 20.944078 seconds to get fetch results and format DataFrame

Info on returned DataFrame:
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1682882 entries, (Timestamp('2013-12-09 00:00:00'), 'AAL') to (Timestamp('2020-12-31 00:00:00'), 'ALLE')
Data columns (total 6 columns):
 #   Column  Non-Null Count    Dtype  
---  ------  --------------    -----  
 0   conm    1682882 non-null  object 
 1   weburl  1682882 non-null  object 
 2   prccd   1682733 non-null  float64
 3   prchd   1682733 non-null  float64
 4   prcld   1682733 non-null  float64
 5   prcod   1682517 non-null  float64
dtypes: float64(4), object(2)
memory usage: 83.6+ MB
None


### Fetching ALL data for single asset

* Took under 5 seconds

In [7]:
test_time(['AAPL', 'JNJ'], 'tic', start, end)

Took 3.067581 seconds to get fetch results and format DataFrame

Info on returned DataFrame:
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 5538 entries, (Timestamp('2010-01-04 00:00:00'), 'AAPL') to (Timestamp('2020-12-31 00:00:00'), 'JNJ')
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   conm    5538 non-null   object 
 1   weburl  5538 non-null   object 
 2   prccd   5538 non-null   float64
 3   prchd   5538 non-null   float64
 4   prcld   5538 non-null   float64
 5   prcod   5538 non-null   float64
dtypes: float64(4), object(2)
memory usage: 362.2+ KB
None
