# InfluxDB2.0 Primer

This notebook shows you how to write a dataframe into an Influx instance. Keep all your secret vars in a locally stored `.env` file

In [1]:
import pandas as pd
import os
from datetime import datetime
from influxdb_client import InfluxDBClient, Point, WritePrecision
from influxdb_client.client.write_api import SYNCHRONOUS
from dotenv import find_dotenv, load_dotenv
# You can generate a Token from the "Tokens Tab" in the UI
load_dotenv(find_dotenv())

True

Code below populates InfluxDB bucket with air quality data in csv format pulled from https://aqicn.org/data-platform/covid19/

For testing purposes you can choose only some city (cities) from a list.

# Set up InfluxDB bindings 

Documentation on Python bindings with examples can be found here:
https://github.com/influxdata/influxdb-client-python

In [2]:
influx = InfluxDBClient(url=os.environ['INFLUX_HOST'], token=os.environ['INFLUX_TOKEN'])
write_api = influx.write_api(write_options=SYNCHRONOUS)

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/AntonBiryukovUofC/air_quality_check/luis-testing/src/data/waqi-covid19-airqualitydata-filtered.csv',
                 parse_dates = ['Date']).set_index('Date')
df['year'] = pd.DatetimeIndex(df.index).year
df['month'] = pd.DatetimeIndex(df.index).month
df['DOY'] = pd.DatetimeIndex(df.index).dayofyear

In [4]:
print(df['City'].unique())

['Jieyang' 'Beijing' 'Kunming' 'Hangzhou' 'Chongqing' 'Qingdao' 'Haikou'
 'Qiqihar' 'Guiyang' 'Ürümqi' 'Shenzhen' 'Yunfu' 'Xuchang' 'Yinchuan'
 'Shenyang' 'Lhasa' 'Shanghai' 'Changchun' 'Foshan' 'Nanning' 'Fushun'
 'Hefei' 'Chengdu' 'Hohhot' 'Qinhuangdao' 'Shijiazhuang' 'Shantou'
 'Zhengzhou' 'Nanjing' 'Xining' 'Xi’an' 'Zhuzhou' 'Wuhan' 'Tianjin'
 'Changzhou' 'Nanchang' 'Shiyan' 'Harbin' 'Xinxiang' 'Suzhou' 'Lanzhou'
 'Jinan' 'Changsha' 'Hegang' 'Anyang' 'Taiyuan' 'Guangzhou' 'Fuzhou'
 'Wuxi' 'Ningbo' 'Xiamen' 'Dongguan' 'Hamilton' 'Calgary' 'Winnipeg'
 'Halifax' 'Kitchener' 'Edmonton' 'Mississauga' 'Surrey' 'Québec'
 'Vancouver' 'Victoria' 'Montréal' 'Toronto' 'Ottawa' 'London']


In [5]:
bins = [2014, 2019, 2020]
labels = ['baseline', '2020']
date_bins = pd.cut(df['year'], bins=bins, labels=labels, include_lowest=True)

annual_stats = df.fillna(0).groupby(['City', date_bins]).agg(
                            {'Country': ['first'],
                             'aqi': ['mean'],
                             'co': ['mean'],
                             'dew': ['mean'],
                             'humidity': ['mean'],
                             'mepaqi': ['mean'],
                             'no2': ['mean'],
                             'o3': ['mean'],
                             'pm10': ['mean'],
                             'pm25': ['mean'],
                             'precipitation': ['mean'],
                             'pressure': ['mean'],
                             'so2': ['mean'],
                             'temperature': ['mean'],
                            }
    ).reset_index()
annual_stats.columns = ['City', 'date_bins','Country', 'aqi', 'co', 'dew', 'humidity', 'mepaqi', 'no2', 'o3', 'pm10', 'pm25', 'preciptation', 'pressure', 'so2', 'temperature']
annual_stats.head(65)

Unnamed: 0,City,date_bins,Country,aqi,co,dew,humidity,mepaqi,no2,o3,pm10,pm25,preciptation,pressure,so2,temperature
0,Anyang,baseline,CN,56.528486,11.246248,3.149660,28.242104,0.0,18.596171,28.029085,84.630547,138.968620,6.039423,265.868681,13.982685,8.308470
1,Anyang,2020,CN,31.686490,7.835592,7.025337,61.605633,0.0,15.457720,26.137455,72.433124,125.752020,10.812118,962.131777,6.195736,14.648205
2,Beijing,baseline,CN,14.507823,7.195159,2.308642,27.241867,0.0,20.198078,25.095065,64.946445,114.225139,4.496304,555.139872,5.044366,8.338895
3,Beijing,2020,CN,9.409601,5.914244,2.602176,50.893786,0.0,14.349394,23.927254,48.871467,92.610139,0.269000,988.106550,1.939726,12.899282
4,Calgary,baseline,CA,0.000000,2.672445,0.162390,34.547695,0.0,8.779666,0.099299,0.163360,23.587349,0.000000,563.378442,0.876831,3.257258
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,Mississauga,baseline,CA,0.000000,1.201557,0.073353,45.274646,0.0,6.269740,22.796745,0.000000,28.290297,0.000000,636.028376,1.038421,5.608628
61,Mississauga,2020,CA,0.000000,2.254334,0.000000,66.879281,0.0,4.713848,21.758668,0.000000,25.583774,0.000000,994.609012,0.348362,9.946987
62,Montréal,baseline,CA,0.000000,0.535203,1.582269,40.789003,0.0,10.102348,12.585197,0.000000,30.533671,0.000000,589.032720,5.213641,5.266317
63,Montréal,2020,CA,0.000000,0.422191,2.835830,68.962450,0.0,8.993051,15.921555,0.000000,27.746996,0.000000,1000.970012,5.146337,9.364664


In [6]:
annual_stats = annual_stats.pivot(index=['City','Country'], columns='date_bins').swaplevel(0, 1, axis=1).sort_index(axis=1)
annual_stats.head(10)

Unnamed: 0_level_0,date_bins,baseline,baseline,baseline,baseline,baseline,baseline,baseline,baseline,baseline,baseline,...,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020
Unnamed: 0_level_1,Unnamed: 1_level_1,aqi,co,dew,humidity,mepaqi,no2,o3,pm10,pm25,preciptation,...,humidity,mepaqi,no2,o3,pm10,pm25,preciptation,pressure,so2,temperature
City,Country,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Anyang,CN,56.528486,11.246248,3.14966,28.242104,0.0,18.596171,28.029085,84.630547,138.96862,6.039423,...,61.605633,0.0,15.45772,26.137455,72.433124,125.75202,10.812118,962.131777,6.195736,14.648205
Beijing,CN,14.507823,7.195159,2.308642,27.241867,0.0,20.198078,25.095065,64.946445,114.225139,4.496304,...,50.893786,0.0,14.349394,23.927254,48.871467,92.610139,0.269,988.10655,1.939726,12.899282
Calgary,CA,0.0,2.672445,0.16239,34.547695,0.0,8.779666,0.099299,0.16336,23.587349,0.0,...,58.962483,0.0,7.800611,0.1,0.0,20.483755,0.0,988.316523,0.647459,5.5188
Changchun,CN,0.0,6.572737,0.437119,26.152141,0.0,15.803278,43.360629,54.519647,98.204084,7.970232,...,58.834202,0.0,13.647429,36.508489,45.007186,91.251291,14.582798,987.608354,4.444487,7.306782
Changsha,CN,0.0,7.195024,6.531198,36.718406,0.0,14.600539,23.787439,55.982827,117.202114,13.464894,...,80.332246,0.0,11.590308,20.727899,43.04846,99.387908,15.805118,988.075634,3.397803,18.396558
Changzhou,CN,0.0,7.662798,3.854145,35.803154,0.0,17.892557,19.429074,61.101608,119.01147,3.65048,...,77.494527,0.0,15.262882,10.110158,47.732792,96.72347,11.921821,991.683556,4.516061,16.761568
Chengdu,CN,0.0,7.17973,6.341976,36.438618,0.0,19.077181,19.216868,61.537797,118.577646,21.141102,...,71.023518,0.0,15.050221,18.914966,46.340925,95.64211,10.116477,987.412271,3.095468,17.988845
Chongqing,CN,0.0,7.367623,6.725472,36.701809,0.0,17.71636,15.337077,52.52267,107.984446,9.952289,...,75.634292,0.0,15.231164,14.514132,41.64726,86.409589,12.016644,987.715753,3.803995,18.190046
Dongguan,CN,0.0,6.502916,6.942299,37.707883,0.0,15.372442,19.075473,42.753247,92.501595,0.600091,...,71.738725,0.0,11.660046,17.899073,33.202086,71.364079,0.0,986.057798,3.99409,23.719119
Edmonton,CA,0.0,3.355309,0.597037,36.920757,0.0,9.626456,0.099319,2.025851,27.840353,0.0,...,68.399215,0.0,8.477128,0.1,0.0,22.119591,0.0,990.652401,1.77884,4.372706


In [7]:
annual_stats['2020'].head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,aqi,co,dew,humidity,mepaqi,no2,o3,pm10,pm25,preciptation,pressure,so2,temperature
City,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Anyang,CN,31.68649,7.835592,7.025337,61.605633,0.0,15.45772,26.137455,72.433124,125.75202,10.812118,962.131777,6.195736,14.648205
Beijing,CN,9.409601,5.914244,2.602176,50.893786,0.0,14.349394,23.927254,48.871467,92.610139,0.269,988.10655,1.939726,12.899282
Calgary,CA,0.0,3.874507,0.0,58.962483,0.0,7.800611,0.1,0.0,20.483755,0.0,988.316523,0.647459,5.5188
Changchun,CN,0.0,6.313968,-0.424972,58.834202,0.0,13.647429,36.508489,45.007186,91.251291,14.582798,987.608354,4.444487,7.306782
Changsha,CN,0.0,7.042188,15.052219,80.332246,0.0,11.590308,20.727899,43.04846,99.387908,15.805118,988.075634,3.397803,18.396558
Changzhou,CN,0.0,7.359799,12.792065,77.494527,0.0,15.262882,10.110158,47.732792,96.72347,11.921821,991.683556,4.516061,16.761568
Chengdu,CN,0.0,6.432582,12.72238,71.023518,0.0,15.050221,18.914966,46.340925,95.64211,10.116477,987.412271,3.095468,17.988845
Chongqing,CN,0.0,7.47137,14.022055,75.634292,0.0,15.231164,14.514132,41.64726,86.409589,12.016644,987.715753,3.803995,18.190046
Dongguan,CN,0.0,6.054253,18.322943,71.738725,0.0,11.660046,17.899073,33.202086,71.364079,0.0,986.057798,3.99409,23.719119
Edmonton,CA,0.0,2.961436,-1.914337,68.399215,0.0,8.477128,0.1,0.0,22.119591,0.0,990.652401,1.77884,4.372706


In [8]:
annual_stats_change = (annual_stats['2020'].divide(annual_stats['baseline']) - 1) * 100
annual_stats_change = annual_stats_change.round(1)
annual_stats_change.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,aqi,co,dew,humidity,mepaqi,no2,o3,pm10,pm25,preciptation,pressure,so2,temperature
City,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Anyang,CN,-43.9,-30.3,123.1,118.1,,-16.9,-6.7,-14.4,-9.5,79.0,261.9,-55.7,76.3
Beijing,CN,-35.1,-17.8,12.7,86.8,,-29.0,-4.7,-24.8,-18.9,-94.0,78.0,-61.5,54.7
Calgary,CA,,45.0,-100.0,70.7,,-11.2,0.7,-100.0,-13.2,,75.4,-26.2,69.4
Changchun,CN,,-3.9,-197.2,125.0,,-13.6,-15.8,-17.4,-7.1,83.0,103.2,-51.2,88.9
Changsha,CN,,-2.1,130.5,118.8,,-20.6,-12.9,-23.1,-15.2,17.4,92.8,-46.0,111.0
Changzhou,CN,,-4.0,231.9,116.4,,-14.7,-48.0,-21.9,-18.7,226.6,152.8,-44.9,89.8
Chengdu,CN,,-10.4,100.6,94.9,,-21.1,-1.6,-24.7,-19.3,-52.1,94.7,-39.8,111.7
Chongqing,CN,,1.4,108.5,106.1,,-14.0,-5.4,-20.7,-20.0,20.7,103.5,-29.7,109.6
Dongguan,CN,,-6.9,163.9,90.2,,-24.1,-6.2,-22.3,-22.9,-100.0,85.4,-23.6,82.9
Edmonton,CA,,-11.7,-420.6,85.3,,-11.9,0.7,-100.0,-20.5,,84.2,-4.6,45.0


In [9]:
# source: https://simplemaps.com/data/world-cities
cities = pd.read_csv('..\src\data\worldcities.csv', usecols = ['city','iso2', 'lat','lng'])

# There are some cities within China with same name in different provinces (e.g. Changsha)
# AQ data does not have province info to resolve
# for now decided to take coordinates of first city in database, which has largest population
cities = cities.drop_duplicates(subset=['city', 'iso2'])

cities= cities.rename(columns={"city": "City", "iso2": "Country"}).set_index(['City','Country'])
cities.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lng
City,Country,Unnamed: 2_level_1,Unnamed: 3_level_1
Tokyo,JP,35.6897,139.6922
Jakarta,ID,-6.2146,106.8451
Delhi,IN,28.66,77.23
Mumbai,IN,18.9667,72.8333
Manila,PH,14.5958,120.9772
Shanghai,CN,31.1667,121.4667
São Paulo,BR,-23.5504,-46.6339
Seoul,KR,37.5833,127.0
Mexico City,MX,19.4333,-99.1333
Guangzhou,CN,23.1288,113.259


In [14]:
cities.loc['Calgary']

Unnamed: 0_level_0,lat,lng
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
CA,51.05,-114.0667


In [11]:
result = pd.merge(annual_stats_change, cities, how="left", left_index=True, right_index=True)
result.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,aqi,co,dew,humidity,mepaqi,no2,o3,pm10,pm25,preciptation,pressure,so2,temperature,lat,lng
City,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Anyang,CN,-43.9,-30.3,123.1,118.1,,-16.9,-6.7,-14.4,-9.5,79.0,261.9,-55.7,76.3,,
Beijing,CN,-35.1,-17.8,12.7,86.8,,-29.0,-4.7,-24.8,-18.9,-94.0,78.0,-61.5,54.7,39.905,116.3914
Calgary,CA,,45.0,-100.0,70.7,,-11.2,0.7,-100.0,-13.2,,75.4,-26.2,69.4,51.05,-114.0667
Changchun,CN,,-3.9,-197.2,125.0,,-13.6,-15.8,-17.4,-7.1,83.0,103.2,-51.2,88.9,43.9,125.2
Changsha,CN,,-2.1,130.5,118.8,,-20.6,-12.9,-23.1,-15.2,17.4,92.8,-46.0,111.0,28.1987,112.9709
Changzhou,CN,,-4.0,231.9,116.4,,-14.7,-48.0,-21.9,-18.7,226.6,152.8,-44.9,89.8,31.8122,119.9692
Chengdu,CN,,-10.4,100.6,94.9,,-21.1,-1.6,-24.7,-19.3,-52.1,94.7,-39.8,111.7,30.6636,104.0667
Chongqing,CN,,1.4,108.5,106.1,,-14.0,-5.4,-20.7,-20.0,20.7,103.5,-29.7,109.6,29.55,106.5069
Dongguan,CN,,-6.9,163.9,90.2,,-24.1,-6.2,-22.3,-22.9,-100.0,85.4,-23.6,82.9,23.0475,113.7493
Edmonton,CA,,-11.7,-420.6,85.3,,-11.9,0.7,-100.0,-20.5,,84.2,-4.6,45.0,53.5344,-113.4903


In [12]:
# some cities do not have coordinates in current cities file used
# for now those are dropped
result.dropna(subset=['lat', 'lng'],inplace=True)
result.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,aqi,co,dew,humidity,mepaqi,no2,o3,pm10,pm25,preciptation,pressure,so2,temperature,lat,lng
City,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Beijing,CN,-35.1,-17.8,12.7,86.8,,-29.0,-4.7,-24.8,-18.9,-94.0,78.0,-61.5,54.7,39.905,116.3914
Calgary,CA,,45.0,-100.0,70.7,,-11.2,0.7,-100.0,-13.2,,75.4,-26.2,69.4,51.05,-114.0667
Changchun,CN,,-3.9,-197.2,125.0,,-13.6,-15.8,-17.4,-7.1,83.0,103.2,-51.2,88.9,43.9,125.2
Changsha,CN,,-2.1,130.5,118.8,,-20.6,-12.9,-23.1,-15.2,17.4,92.8,-46.0,111.0,28.1987,112.9709
Changzhou,CN,,-4.0,231.9,116.4,,-14.7,-48.0,-21.9,-18.7,226.6,152.8,-44.9,89.8,31.8122,119.9692
Chengdu,CN,,-10.4,100.6,94.9,,-21.1,-1.6,-24.7,-19.3,-52.1,94.7,-39.8,111.7,30.6636,104.0667
Chongqing,CN,,1.4,108.5,106.1,,-14.0,-5.4,-20.7,-20.0,20.7,103.5,-29.7,109.6,29.55,106.5069
Dongguan,CN,,-6.9,163.9,90.2,,-24.1,-6.2,-22.3,-22.9,-100.0,85.4,-23.6,82.9,23.0475,113.7493
Edmonton,CA,,-11.7,-420.6,85.3,,-11.9,0.7,-100.0,-20.5,,84.2,-4.6,45.0,53.5344,-113.4903
Foshan,CN,,-2.7,139.2,95.8,,-22.1,2.1,-18.2,-23.0,-100.0,87.5,-28.8,81.6,23.0292,113.1056


In [13]:
result.to_csv('..\src\data\city_stats.csv')

In [None]:
# InfluxDB free version only allows timestamps younger than 30 days
# Data is shifted to a future date in order to be retained in DB
timeshift = float(2000)
df = df.shift(periods=timeshift, freq="D")
df.head(10)

In [None]:
# select columns and cities to pull into dataframe

cols = ['no2','o3','co','so2','temperature','City','Country']
df_to_write = df.loc[df['Country'] == 'CA',cols]

# add time shift factor as a column in database
df_to_write['timeshift']=timeshift

#df_to_write = df.loc[df['City'].isin(['Ottawa', 'Toronto', 'Montreal']),cols]
df_to_write.sample(10)

In [None]:
df_to_write.shape

In [None]:
# Write dataframe into InfluxDB bucket
write_api.write(os.environ['INFLUX_BUCKET'], os.environ['INFLUX_ORG'], record=df_to_write, data_frame_measurement_name='luis-airquality',data_frame_tag_columns = ['City','Country'])

In [None]:
# Check database write operation was ok, with a query

query_api = influx.query_api()

In [None]:
#query= '''
#from(bucket:"spe-project")
#    |> range(start: 2019-01-01T23:30:00Z, stop: 2020-12-31T00:00:00Z)
#    |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
#    '''

query= '''
from(bucket:"ts_spe")
        |> range(start: 2019-01-01T23:30:00Z, stop: 2020-12-31T00:00:00Z)
        |> filter(fn: (r) => r["_measurement"] == "luis-airquality")
        |> group(columns:["City"])
        |> distinct(column:"City")
        |> keep(columns: ["_value"])
        '''

# query= '''
# from(bucket:"ts_spe")
#     |> range(start: 2019-01-01T23:30:00Z, stop: 2020-12-31T00:00:00Z)
#     '''

In [None]:
test = query_api.query_data_frame(org=os.environ['INFLUX_ORG'], query=query)
# display(test.head())
display(test)

In [None]:
# Code to delete measurement from db
start = "2020-01-01T00:00:00Z"
stop = "2041-01-01T00:00:00Z"
delete_api = influx.delete_api()
delete_api.delete(
    start, 
    stop, 
    '_measurement="luis-airquality"',
    bucket=os.environ['INFLUX_BUCKET'],
    org=os.environ['INFLUX_ORG'],
)