# Example of Data Analysis with DCD Hub Data

First, we import the Python SDK

In [28]:
from dcd.entities.thing import Thing

We provide the thing ID and access token (replace with yours)

In [29]:
from dotenv import load_dotenv
import os
load_dotenv()
THING_ID = os.environ['THING_ID']
THING_TOKEN = os.environ['THING_TOKEN']

We instantiate a Thing with its credential, then we fetch its details

In [30]:
my_thing = Thing(thing_id=THING_ID, token=THING_TOKEN)
my_thing.read()

INFO:dcd:things:wheelchair_speed-e706:Initialising MQTT connection for Thing 'dcd:things:wheelchair_speed-e706'
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): dwd.tudelft.nl:443
INFO:dcd:things:wheelchair_speed-e706:Connection successful
DEBUG:urllib3.connectionpool:https://dwd.tudelft.nl:443 "GET /api/things/dcd:things:wheelchair_speed-e706 HTTP/1.1" 200 6724


DigiCertCA.crt exist.
{'thing': {'id': 'dcd:things:wheelchair_speed-e706', 'name': 'Wheelchair_speed', 'description': '', 'type': 'Speed', 'properties': [{'type': 'THREE_DIMENSIONS', 'name': 'My Random Property', 'description': '', 'dimensions': [{'name': 'Value1', 'description': '', 'unit': ''}, {'name': 'Value2', 'description': '', 'unit': ''}, {'name': 'Value3', 'description': '', 'unit': ''}, {'name': 'Value1', 'description': '', 'unit': ''}, {'name': 'Value2', 'description': '', 'unit': ''}, {'name': 'Value3', 'description': '', 'unit': ''}], 'id': 'my-random-property-5f3c', 'classes': [], 'values': [], 'entityId': 'dcd:things:wheelchair_speed-e706', 'readAt': 1572095737158, 'registeredAt': 1571392699000}, {'type': 'ONE_DIMENSION', 'name': 'one', 'description': '', 'dimensions': [{'name': 'Value', 'description': '', 'unit': ''}], 'id': 'one-28d7', 'classes': [], 'values': [], 'entityId': 'dcd:things:wheelchair_speed-e706', 'readAt': 1572095737158, 'registeredAt': 1571994791000}, {

What does a Thing look like?

In [31]:
my_thing.to_json()

{'id': 'dcd:things:wheelchair_speed-e706',
 'name': 'Wheelchair_speed',
 'description': '',
 'type': 'Speed',
 'properties': [{'id': 'my-random-property-5f3c',
   'name': 'My Random Property',
   'description': '',
   'type': 'THREE_DIMENSIONS',
   'dimensions': [{'name': 'Value1', 'description': '', 'unit': ''},
    {'name': 'Value2', 'description': '', 'unit': ''},
    {'name': 'Value3', 'description': '', 'unit': ''},
    {'name': 'Value1', 'description': '', 'unit': ''},
    {'name': 'Value2', 'description': '', 'unit': ''},
    {'name': 'Value3', 'description': '', 'unit': ''}]},
  {'id': 'one-28d7',
   'name': 'one',
   'description': '',
   'type': 'ONE_DIMENSION',
   'dimensions': [{'name': 'Value', 'description': '', 'unit': ''}]},
  {'id': 'random-shit-aa03',
   'name': 'RANDOM SHIT',
   'description': '',
   'type': 'THREE_DIMENSIONS',
   'dimensions': [{'name': 'Value1', 'description': '', 'unit': ''},
    {'name': 'Value2', 'description': '', 'unit': ''},
    {'name': 'Val

Which property do we want to explore and over which time frame?

In [26]:
from datetime import datetime
# What dates?
START_DATE = "2019-10-08 21:17:00"
END_DATE = "2019-11-08 21:25:00"

from datetime import datetime
DATE_FORMAT = '%Y-%m-%d %H:%M:%S'
from_ts = datetime.timestamp(datetime.strptime(START_DATE, DATE_FORMAT)) * 1000
to_ts = datetime.timestamp(datetime.strptime(END_DATE, DATE_FORMAT)) * 1000

Let's find this property and read the data.

In [27]:
PROPERTY_NAME = "one"

my_property = my_thing.find_property_by_name(PROPERTY_NAME)
my_property.read(from_ts, to_ts)

AttributeError: 'NoneType' object has no attribute 'read'

How many data point did we get?

In [None]:
print(len(my_property.values))

Display values

In [None]:
my_property.values

# From CSV

In [None]:
from numpy import genfromtxt
import pandas as pd
data = genfromtxt('test.csv', delimiter=',')
data_frame = pd.DataFrame(data[:,1:], index = pd.DatetimeIndex(pd.to_datetime(data[:,0], unit='ms')), columns = ['x', 'y', 'z'])
data_frame

# Plot some charts with Matplotlib
In this example we plot an histogram, distribution of all values and dimensions.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from numpy import ma
data = np.array(my_property.values)

In [None]:
figure(num=None, figsize=(15, 5))
t = data_frame.index
plt.plot(t, data_frame.x, t, data_frame.y, t, data_frame.z)

In [None]:
plt.hist(data[:,1:])
plt.show()

# Generate statistics with NumPy and Pandas

In [None]:
import numpy as np
from scipy.stats import kurtosis, skew

In [None]:
np.min(data[:,1:4], axis=0)

In [None]:
skew(data[:,1:4])

You can select a column (slice) of data, or a subset of data. In the example below we select rows
from 10 to 20 (10 in total) and the colum 1 to x (i.e skiping the first column representing the time).

In [None]:
data[:10,1:]

Out of the box, Pandas give you some statistics, do not forget to convert your array into a DataFrame.

In [None]:
data_frame = pd.DataFrame(data[:,1:], index = pd.DatetimeIndex(pd.to_datetime(data[:,0], unit='ms')))
pd.DataFrame.describe(data_frame)

In [None]:
data_frame.rolling(10).std()

# Rolling / Sliding Window
To apply statistics on a sliding (or rolling) window, we can use the rolling() function of a data frame. In the example below, we roll with a window size of 4 elements to apply a skew()

In [None]:
rolling2s = data_frame.rolling('2s').std()
plt.plot(rolling2s)
plt.show()

In [32]:
rolling100_data_points = data_frame.rolling(100).skew()
plt.plot(rolling100_data_points)
plt.show()

NameError: name 'data_frame' is not defined

# Zero Crossing

In [None]:
plt.hist(np.where(np.diff(np.sign(data[:,1]))))
plt.show()

https://docs.scipy.org/doc/scipy/reference/stats.html#discrete-distributions