# DataFrameClient 

In [None]:
from influxdb import DataFrameClient
import pandas as pd

In [None]:
df_client = DataFrameClient(host='localhost', port=8086)
df_client.get_list_database()

In [None]:
# Drop db if exist
db = 'testdb'
if len(list(filter(lambda x: x['name'] == db, df_client.get_list_database()))) > 0:
    df_client.drop_database(db)
df_client.get_list_database()

In [None]:
# Create db
df_client.create_database(db)
df_client.get_list_database()

In [None]:
df_client.switch_database(db)
df_client.get_list_measurements()

In [None]:
# Sample Data
df = pd.read_csv('resources/data/influxdb_sample.csv', delimiter='\t',
                 index_col=['time'],
                 parse_dates=['time'],
                 date_parser=lambda x: pd.to_datetime(x, format='%Y-%m-%dT%H:%M:%SZ'))
df.head()

In [None]:
df.info()

In [None]:
# Insert data
measurement = 'test_tbl'
df_client.write_points(df, measurement, tag_columns=['location', 'scientist'])

In [None]:
# Select data
query = f'''\
SELECT * FROM "{measurement}"\
'''

results = df_client.query(query)
results

In [None]:
results[measurement].head()

In [None]:
# Show tags
df_client.query('SHOW TAG KEYS')

In [None]:
# Show fields
df_client.query('SHOW FIELD KEYS')

# InfluxDBClient

In [None]:
from influxdb import InfluxDBClient

In [None]:
client = InfluxDBClient(host='localhost', port=8086)
client.get_list_database()

In [None]:
# Drop db if exist
db = 'testdb'
if len(list(filter(lambda x: x['name'] == db, client.get_list_database()))) > 0:
    client.drop_database(db)
client.get_list_database()

In [None]:
# Create db
client.create_database(db)
client.get_list_database()

In [None]:
client.switch_database(db)
client.get_list_measurements()

In [None]:
# Populate sample data
measurement = 'test_tbl'
json_body = [
    {
        'measurement': measurement,
        'time': '2015-08-18T00:00:00Z',
        'tags': {
            'location': 1,
            'scientist': 'langstroth'
        },
        'fields': {
            'butterflies': 12,
            'honeybees': 23
        }
    },
    {
        'measurement': measurement,
        'time': '2015-08-18T00:00:00Z',
        'tags': {
            'location': 1,
            'scientist': 'perpetua'
        },
        'fields': {
            'butterflies': 1,
            'honeybees': 30
        }
    },
    {
        'measurement': measurement,
        'time': '2015-08-18T00:06:00Z',
        'tags': {
            'location': 1,
            'scientist': 'langstroth'
        },
        'fields': {
            'butterflies': 11,
            'honeybees': 28
        }
    },
    {
        'measurement': measurement,
        'time': '2015-08-18T00:06:00Z',
        'tags': {
            'location': 1,
            'scientist': 'perpetua'
        },
        'fields': {
            'butterflies': 3,
            'honeybees': 28
        }
    },
    {
        'measurement': measurement,
        'time': '2015-08-18T05:54:00Z',
        'tags': {
            'location': 2,
            'scientist': 'langstroth'
        },
        'fields': {
            'butterflies': 2,
            'honeybees': 11
        }
    },
    {
        'measurement': measurement,
        'time': '2015-08-18T06:00:00Z',
        'tags': {
            'location': 2,
            'scientist': 'langstroth'
        },
        'fields': {
            'butterflies': 1,
            'honeybees': 10
        }
    },
    {
        'measurement': measurement,
        'time': '2015-08-18T06:06:00Z',
        'tags': {
            'location': 2,
            'scientist': 'perpetua'
        },
        'fields': {
            'butterflies': 8,
            'honeybees': 23
        }
    },
    {
        'measurement': measurement,
        'time': '2015-08-18T06:12:00Z',
        'tags': {
            'location': 2,
            'scientist': 'perpetua'
        },
        'fields': {
            'butterflies': 7,
            'honeybees': 22
        }
    }
]
json_body

In [None]:
client.write_points(json_body)

In [None]:
# Select data
query = f'''\
SELECT * FROM "{measurement}"\
'''

results = client.query(query)
results

In [None]:
results.raw

In [None]:
results.raw['series'][0]['columns']

In [None]:
results.raw['series'][0]['values']

In [None]:
points = results.get_points(tags={'scientist': 'langstroth'})
for point in points:
    print(point)

In [None]:
# Show tags
client.query('SHOW TAG KEYS')

In [None]:
# Show fields
client.query('SHOW FIELD KEYS')

# TEST: Import Tick Data

In [None]:
from influxdb import DataFrameClient
import pandas as pd

In [None]:
df_client = DataFrameClient(host='localhost', port=8086)
df_client.get_list_database()

In [None]:
# Drop db if exist
db = 'tickdb'
if len(list(filter(lambda x: x['name'] == db, df_client.get_list_database()))) > 0:
    df_client.drop_database(db)
df_client.get_list_database()

In [None]:
# Create db
df_client.create_database(db)
df_client.get_list_database()

In [None]:
df_client.switch_database(db)
df_client.get_list_measurements()

In [None]:
periods = [f'2019{x+1:02}' for x in range(3)]
chunk_size = 10000

for period in periods:
    currency_pair = 'AUDUSD'
    file = f'resources/data/DAT_ASCII_{currency_pair}_T_{period}.csv'
    print(f'Reading: {file}')
    
    df_chunks = pd.read_csv(file, sep=',',
                            header=None, names=['datetime', 'bid', 'ask', 'vol'],
                            usecols=['datetime', 'bid', 'ask'],
                            index_col=['datetime'],
                            parse_dates=["datetime"],
                            date_parser=lambda x: pd.to_datetime(x, format="%Y%m%d %H%M%S%f"),
                            chunksize=chunk_size)
    
    for df in df_chunks:
        df['currency_pair'] = currency_pair
        
        # Insert data
        measurement = 'tick_tbl'
        df_client.write_points(df, measurement, tag_columns=['currency_pair'], batch_size=chunk_size)

In [None]:
# Select data
query = f'''
SELECT COUNT(bid) AS bid_count, COUNT(ask) AS ask_count
FROM "{measurement}"
GROUP BY time(1m)
'''

# query = f'''
# SELECT bid, ask
# FROM "{measurement}"
# WHERE time >= '2019-01-01 00:00:00' AND time <= '2019-08-31 23:59:59'
# '''

results = df_client.query(query)

In [None]:
res_df = results[measurement]
res_df.head()

In [None]:
res_df[res_df['ask_count'] == 0].head()

In [None]:
res_df[res_df['ask_count'] == 0].tail()