In [1]:
from soam.workflow.time_series_extractor import TimeSeriesExtractor
from muttlib.dbconn import get_client

In [2]:
sqlite_cfg = {
    "db_type": "sqlite",
    "database": "soam_quickstart.db"
}

sqlite_client = get_client(sqlite_cfg)[1]

In [3]:
extractor = TimeSeriesExtractor(db=sqlite_client, table_name='stock')

### Query 1

Simple query, just retrieving all the data from the database.

Query shape: build_query_kwargs: dict of {str: obj}
            Configuration of the extraction query to be used for the extraction.

In [4]:
query={
    'columns': '*'
}

In [5]:
df = extractor.run(build_query_kwargs = query)

df.head()

Unnamed: 0,id,date,symbol,avg_num_trades,avg_price
0,1,2021-03-01,AAPL,80000.0,125.0
1,2,2021-03-02,AAPL,70000.0,126.0
2,3,2021-03-03,AAPL,80000.0,123.0
3,4,2021-03-04,AAPL,70000.0,121.0
4,5,2021-03-05,AAPL,80000.0,119.0


### Query 2
Adding some extra conditionals:
- Filtering data by just retrieving Apple's stock valuations.
- Querying only a subset of the columns.
- Renaming some columns with aliases.

In [6]:
query={
    'columns': ['date', 'symbol', 'avg_price AS Valuation'],
    'extra_where_conditions': ["symbol = 'AAPL'"]
}

In [7]:
df = extractor.run(build_query_kwargs = query)
df.head()

Unnamed: 0,date,symbol,Valuation
0,2021-03-01,AAPL,125.0
1,2021-03-02,AAPL,126.0
2,2021-03-03,AAPL,123.0
3,2021-03-04,AAPL,121.0
4,2021-03-05,AAPL,119.0


### Query 3
Adding some extra conditionals:
- Filtering data by certain days.
- Ordering results based on their dates.

In [8]:
query={
    'columns': ['date', 'symbol', 'avg_price AS Valuation'],
    'timestamp_col': 'date',
    'start_date': "2021-03-01",
    'end_date': "2021-03-20",
    'extra_where_conditions': ["symbol = 'AAPL'"],
    'order_by': ["date ASC"]
}

In [9]:
df = extractor.run(build_query_kwargs = query)
df.head()

Unnamed: 0,date,symbol,Valuation
0,2021-03-01,AAPL,125.0
1,2021-03-02,AAPL,126.0
2,2021-03-03,AAPL,123.0
3,2021-03-04,AAPL,121.0
4,2021-03-05,AAPL,119.0


### Query 4

Adding some aggregated data.
- Multiply the average valuation with the amount of trades to obtain the transactional volume of the day.
- Group by date and symbol, this logic is implicit in the class, you don't need to specify it.
- Filter by a certain level of volume by using the having method.

In [40]:
query={
    'columns': ['date', 'symbol', 'avg_num_trades * avg_price AS Volume'],
    'dimensions': ['date','symbol'],
    'timestamp_col': 'date',
    'start_date': "2021-03-01",
    'end_date': "2021-03-20",
    'order_by': ["date ASC"],
    'extra_having_conditions': ['Volume > 1000000']
}

In [41]:
df = extractor.run(build_query_kwargs = query)
df.head()

Unnamed: 0,date,symbol,Volume
0,2021-03-01,AAPL,10000000.0
1,2021-03-01,TSLA,6300000.0
2,2021-03-02,AAPL,8820000.0
3,2021-03-02,TSLA,6448000.0
4,2021-03-03,AAPL,9840000.0


### Query 5

Adding some aggregated data.
- Retrieve the day with the biggest transactional volume for each company.

In [47]:
query={
    'columns': ['date', 'symbol', 'max(avg_num_trades * avg_price) AS Max_Volume'],
    'dimensions': ['symbol']
}

In [48]:
df = extractor.run(build_query_kwargs = query)
df.head()

Unnamed: 0,date,symbol,Max_Volume
0,2021-03-22,AAPL,21300000.0
1,2021-03-08,TSLA,10324000.0
