In [1]:
"""
Download Noise sensor data (CESVA TA-120) from server and load it into a Pandas DataFrame.

Data has been collected during mySMARTLife project 2018-2020.

mySMARTLife project has received funding from the European Union’s Horizon 2020 research and innovation programme under grant agreement No 731297.
"""

# Import libraries
import os
import gzip
import requests
import pandas as pd

# Plotly graphics
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import *
init_notebook_mode(connected=True)
import plotly.graph_objects as go


# Get one month's sample data
baseurl = 'https://iot.fvh.fi/opendata/noise/'
index_json = f'{baseurl}index.json'

# index_json contains list of data files and their sizes
"""
res = requests.get(index_json)
files = res.json()
print('Files available:\n============================')
for fobj in files:
    print('{} ({:.2f} MB)'.format(fobj['name'], fobj['size'] / 2**20))

datafile = files[6]['name']  # Pick just one of all files
"""
#datafile = 'LAeq-2018-head-200k.csv.gz'
#datafile = 'LAeq1s-2018-05.csv.gz'
datafile = 'LAeq-2018-all.csv.gz'

# Cache file locally
print('\nFile status:')
if os.path.isfile(datafile):
    print(f'{datafile} is already downloaded')
else:
    dataurl = f'{baseurl}{datafile}'
    res = requests.get(dataurl)
    with open(datafile, 'wb') as f:
        f.write(res.content)
    print(f'Saved {datafile} locally')



File status:
LAeq-2018-all.csv.gz is already downloaded


In [2]:
# Load data into a Dataframe from locally cached file
usecols = ['readable_time', 'dBA', 'dev-id']
dtypes = {'dBA': 'float', 'dev-id': 'str'}
# names = ['time', 'dBA', 'dev-id']

df = pd.read_csv(datafile, compression='gzip', header=0, 
                 usecols=usecols, dtype=dtypes,
                 parse_dates=['readable_time'], sep=',')
df.rename(columns={"readable_time": "time"}, inplace=True)
df['date'] = pd.to_datetime(df['time']).dt.date
df.reset_index(drop=True, inplace=True)
df.set_index('time', inplace=True)
df


Unnamed: 0_level_0,dBA,dev-id,date
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-02 14:40:02+00:00,47.5,TA120-T246177,2018-01-02
2018-01-02 14:41:02+00:00,48.8,TA120-T246177,2018-01-02
2018-01-02 14:42:02+00:00,46.7,TA120-T246177,2018-01-02
2018-01-02 14:43:02+00:00,50.3,TA120-T246177,2018-01-02
2018-01-02 14:44:02+00:00,46.1,TA120-T246177,2018-01-02
...,...,...,...
2018-12-30 23:59:31+00:00,35.0,TA120-T246177,2018-12-30
2018-12-30 23:59:32+00:00,48.8,TA120-T246187,2018-12-30
2018-12-30 23:59:46+00:00,28.7,TA120-T246183,2018-12-30
2018-12-30 23:59:51+00:00,51.4,TA120-T246182,2018-12-30


In [8]:
# Visualise one noise sensor data
devs = ['TA120-T246183']
traces = []


for dev in devs:
    data = df[df['dev-id'] == dev].head(20000)
    print(data)
    # print(dev)
    trace = go.Scatter(
        x = data.index.to_pydatetime(),
        y = data['dBA'],
        mode = 'lines',
        name = dev
    )
    traces.append(trace)

data = traces
layout = Layout(
    # showlegend=False,
    height=600,
    width=1000,
)

fig = dict( data=data, layout=layout )
iplot(fig)



dBA         dev-id        date
time                                                             
2018-01-22 09:22:52.025000+00:00  48.0  TA120-T246183  2018-01-22
2018-01-22 09:23:45+00:00         46.1  TA120-T246183  2018-01-22
2018-01-22 09:24:45+00:00         45.4  TA120-T246183  2018-01-22
2018-01-22 09:25:45+00:00         48.6  TA120-T246183  2018-01-22
2018-01-22 09:26:45+00:00         45.7  TA120-T246183  2018-01-22
...                                ...            ...         ...
2018-02-05 06:38:35+00:00         37.2  TA120-T246183  2018-02-05
2018-02-05 06:39:35+00:00         37.6  TA120-T246183  2018-02-05
2018-02-05 06:40:35+00:00         37.8  TA120-T246183  2018-02-05
2018-02-05 06:41:35+00:00         37.3  TA120-T246183  2018-02-05
2018-02-05 06:42:35+00:00         37.4  TA120-T246183  2018-02-05

[20000 rows x 3 columns]


In [3]:
# Print daily measurements per Noise sensor.
# There should be one measurement per minute, it is about 1440 per day per sensor
daily_measurements = df.groupby('date')['dev-id'].value_counts().unstack().fillna(0)
daily_measurements['foo'] = daily_measurements.index
daily_measurements[-10:]
#daily_measurements.iloc[0]


dev-id,TA120-T246174,TA120-T246177,TA120-T246182,TA120-T246183,TA120-T246184,TA120-T246187,TA120-T246189,TA120-T246191,foo
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-12-21,0.0,1440.0,1440.0,1440.0,1440.0,1439.0,1440.0,1440.0,2018-12-21
2018-12-22,0.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,2018-12-22
2018-12-23,0.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,2018-12-23
2018-12-24,0.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,2018-12-24
2018-12-25,0.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,2018-12-25
2018-12-26,0.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,2018-12-26
2018-12-27,0.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1439.0,2018-12-27
2018-12-28,0.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1439.0,2018-12-28
2018-12-29,0.0,1440.0,1440.0,1440.0,1439.0,1440.0,1439.0,1440.0,2018-12-29
2018-12-30,0.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,1440.0,2018-12-30


In [5]:
# Visualise above
traces = []

for col in daily_measurements.filter(regex='^TA120').columns:
    # print(col)
    trace = go.Scatter(
        x = daily_measurements['foo'],
        y = daily_measurements[col],
        mode = 'lines',
        name = col
    )
    traces.append(trace)

data = traces
layout = Layout(
    # showlegend=False,
    height=600,
    width=1000,
)

fig = dict( data=data, layout=layout )
iplot(fig)
