# InfluxDB2.0 Primer

This notebook shows you how to write a dataframe into an Influx instance. Keep all your secret vars in a locally stored `.env` file

In [1]:
import pandas as pd
import os
from datetime import datetime
from influxdb_client import InfluxDBClient, Point, WritePrecision
from influxdb_client.client.write_api import SYNCHRONOUS
from dotenv import find_dotenv, load_dotenv
# You can generate a Token from the "Tokens Tab" in the UI
load_dotenv(find_dotenv())

True

Code below populates InfluxDB bucket with air quality data in csv format pulled from https://aqicn.org/data-platform/covid19/

For testing purposes you can choose only some city (cities) from a list.

# Set up InfluxDB bindings 

Documentation on Python bindings with examples can be found here:
https://github.com/influxdata/influxdb-client-python

In [2]:
influx = InfluxDBClient(url=os.environ['INFLUX_HOST'], token=os.environ['INFLUX_TOKEN'])
write_api = influx.write_api(write_options=SYNCHRONOUS)

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/AntonBiryukovUofC/air_quality_check/luis-testing/src/data/waqi-covid19-airqualitydata-filtered.csv',
                 parse_dates = ['Date']).set_index('Date')

In [4]:
print(df['City'].unique())

['Jieyang' 'Beijing' 'Kunming' 'Hangzhou' 'Chongqing' 'Qingdao' 'Haikou'
 'Qiqihar' 'Guiyang' 'Ürümqi' 'Shenzhen' 'Yunfu' 'Xuchang' 'Yinchuan'
 'Shenyang' 'Lhasa' 'Shanghai' 'Changchun' 'Foshan' 'Nanning' 'Fushun'
 'Hefei' 'Chengdu' 'Hohhot' 'Qinhuangdao' 'Shijiazhuang' 'Shantou'
 'Zhengzhou' 'Nanjing' 'Xining' 'Xi’an' 'Zhuzhou' 'Wuhan' 'Tianjin'
 'Changzhou' 'Nanchang' 'Shiyan' 'Harbin' 'Xinxiang' 'Suzhou' 'Lanzhou'
 'Jinan' 'Changsha' 'Hegang' 'Anyang' 'Taiyuan' 'Guangzhou' 'Fuzhou'
 'Wuxi' 'Ningbo' 'Xiamen' 'Dongguan' 'Hamilton' 'Calgary' 'Winnipeg'
 'Halifax' 'Kitchener' 'Edmonton' 'Mississauga' 'Surrey' 'Québec'
 'Vancouver' 'Victoria' 'Montréal' 'Toronto' 'Ottawa' 'London']


In [5]:
# select columns and cities to pull into dataframe

cols = ['no2','o3','co','so2','temperature','City','Country']
df_to_write = df.loc[df['Country'] == 'CA',cols]
#df_to_write = df.loc[df['City'].isin(['Ottawa', 'Toronto', 'Montreal']),cols]
df_to_write.sample(10)

Unnamed: 0_level_0,no2,o3,co,so2,temperature,City,Country
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-04-20,2.4,14.8,0.1,0.1,,Victoria,CA
2020-08-24,4.7,0.1,1.2,0.8,13.8,Edmonton,CA
2020-04-23,7.4,25.0,0.2,5.1,2.7,Montréal,CA
2016-02-24,11.1,2.8,0.1,0.4,,Vancouver,CA
2015-03-24,12.1,0.1,6.3,2.9,,Calgary,CA
2020-04-11,11.9,24.0,0.2,,3.0,Québec,CA
2020-05-26,1.8,10.4,,0.3,16.4,Halifax,CA
2019-05-18,3.3,21.6,1.7,0.2,12.2,Toronto,CA
2018-06-15,1.9,8.1,0.1,0.4,15.1,Victoria,CA
2019-05-14,4.1,8.0,0.1,0.2,11.6,Surrey,CA


In [6]:
df_to_write.shape

(164652, 7)

In [7]:
# Write dataframe into InfluxDB bucket

write_api.write(os.environ['INFLUX_BUCKET'], os.environ['INFLUX_ORG'], record=df_to_write, data_frame_measurement_name='luis-airquality',data_frame_tag_columns = ['City','Country'])

In [None]:
# Check database write operation was ok, with a query

query_api = influx.query_api()

In [None]:
#query= '''
#from(bucket:"spe-project")
#    |> range(start: 2019-01-01T23:30:00Z, stop: 2020-12-31T00:00:00Z)
#    |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
#    '''

query= '''
from(bucket:"ts_spe")
        |> range(start: 2019-01-01T23:30:00Z, stop: 2020-12-31T00:00:00Z)
        |> filter(fn: (r) => r["_measurement"] == "luis-airquality")
        |> group(columns:["City"])
        |> distinct(column:"City")
        |> keep(columns: ["_value"])
        '''

# query= '''
# from(bucket:"ts_spe")
#     |> range(start: 2019-01-01T23:30:00Z, stop: 2020-12-31T00:00:00Z)
#     '''

In [None]:
test = query_api.query_data_frame(org=os.environ['INFLUX_ORG'], query=query)
# display(test.head())
display(test)