In [66]:
"""
Download Noise sensor data (CESVA TA-120) from server and load it into a Pandas DataFrame.

Data has been collected during mySMARTLife project 2018-2020.

mySMARTLife project has received funding from the European Union’s Horizon 2020 research and innovation programme under grant agreement No 731297.
"""

# Import libraries
import os
import gzip
import requests
import pandas as pd

# Get one month's sample data
baseurl = 'https://broker.fvh.io/static/noise/'
index_json = f'{baseurl}index.json'

# index_json contains list of data files and their sizes
"""
res = requests.get(index_json)
files = res.json()
print('Files available:\n============================')
for fobj in files:
    print('{} ({:.2f} MB)'.format(fobj['name'], fobj['size'] / 2**20))

datafile = files[6]['name']  # Pick just one of all files
"""
datafile = 'LAeq-2018-head-200k.csv.gz'
#datafile = 'LAeq1s-2018-05.csv.gz'
#datafile = 'LAeq-2018-all.csv.gz'

# Cache file locally
print('\nFile status:')
if os.path.isfile(datafile):
    print(f'{datafile} is already downloaded')
else:
    dataurl = f'{baseurl}{datafile}'
    res = requests.get(dataurl)
    with open(datafile, 'wb') as f:
        f.write(res.content)
    print(f'Saved {datafile} locally')



File status:
LAeq-2018-head-200k.csv.gz is already downloaded


In [85]:
# Load data into Dataframe from locally cached file
usecols = ['readable_time', 'dBA', 'dev-id']
dtypes = {'dBA': 'float', 'dev-id': 'str'}
# names = ['time', 'dBA', 'dev-id']

df = pd.read_csv(datafile, compression='gzip', header=0, 
                 usecols=usecols, dtype=dtypes,
                 parse_dates=['readable_time'], sep=',')
df.rename(columns={"readable_time": "time"}, inplace=True)
df['date'] = pd.to_datetime(df['time']).dt.date
df.reset_index(drop=True, inplace=True)
df.set_index('time', inplace=True)
df


Unnamed: 0_level_0,dBA,dev-id,date
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-02 14:40:02+00:00,47.5,TA120-T246177,2018-01-02
2018-01-02 14:41:02+00:00,48.8,TA120-T246177,2018-01-02
2018-01-02 14:42:02+00:00,46.7,TA120-T246177,2018-01-02
2018-01-02 14:43:02+00:00,50.3,TA120-T246177,2018-01-02
2018-01-02 14:44:02+00:00,46.1,TA120-T246177,2018-01-02
...,...,...,...
2018-02-25 19:01:54+00:00,30.6,TA120-T246183,2018-02-25
2018-02-25 19:02:10+00:00,35.3,TA120-T246177,2018-02-25
2018-02-25 19:02:39+00:00,46.2,TA120-T246189,2018-02-25
2018-02-25 19:02:54+00:00,30.2,TA120-T246183,2018-02-25


In [99]:
# Print daily measurements per Noise sensor.
# There should be one measurement per minute, it is about 1440 per day per sensor
daily_measurements = df.groupby('date')['dev-id'].value_counts().unstack().fillna(0)
daily_measurements['foo'] = daily_measurements.index
daily_measurements[-10:]
#daily_measurements.iloc[0]


dev-id,TA120-T246177,TA120-T246183,TA120-T246184,TA120-T246189,foo
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-02-16,1440.0,1440.0,0.0,1440.0,2018-02-16
2018-02-17,1440.0,1440.0,0.0,1440.0,2018-02-17
2018-02-18,1440.0,1440.0,0.0,1440.0,2018-02-18
2018-02-19,1440.0,1440.0,0.0,1440.0,2018-02-19
2018-02-20,1440.0,1440.0,0.0,1440.0,2018-02-20
2018-02-21,1439.0,1440.0,0.0,1440.0,2018-02-21
2018-02-22,1440.0,1440.0,0.0,1440.0,2018-02-22
2018-02-23,1440.0,1440.0,0.0,1440.0,2018-02-23
2018-02-24,1440.0,1440.0,0.0,1440.0,2018-02-24
2018-02-25,1144.0,1143.0,0.0,1143.0,2018-02-25


In [108]:
# Visualise above
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import *
init_notebook_mode(connected=True)
import plotly.graph_objects as go

traces = []

for col in daily_measurements.filter(regex='^TA120').columns:
    # print(col)
    trace = go.Scatter(
        x = daily_measurements['foo'],
        y = daily_measurements[col],
        mode = 'lines',
        name = col
    )
    traces.append(trace)

data = traces
layout = Layout(
    # showlegend=False,
    height=400,
    width=1000,
)

fig = dict( data=data, layout=layout )
iplot(fig)
