In [None]:
## Cannonical sensor data (downloaded from kaggle)
Each row represents sensor readings captured at a specific timestamp.
| Column Name   | Type      | Description |
|--------------|-----------|-------------|
| timestamp    | datetime  | Time at which data was recorded |
| temperature  | float     | Ambient temperature in Â°C (DHT11) |
| humidity     | float     | Relative humidity in % (DHT11) |
| distance     | float     | Distance measured by ultrasonic sensor in cm |


## Notes
- `timestamp` is mandatory and must be converted to datetime
- All sensor values are numeric
- Distance values change with human presence or object movement
- Additional sensors can be added as new columns without changing the pipeline


In [4]:
import pandas as pd
df=pd.read_csv("iot_telemetry_data.csv")

In [6]:
df.head()
df.dtypes

ts          float64
humidity    float64
motion         bool
temp        float64
dtype: object

In [9]:
df['ts'].head(1)

0    1.590000e+09
Name: ts, dtype: float64

In [10]:
df.head(4)

Unnamed: 0,ts,humidity,motion,temp
0,1590000000.0,51.0,False,22.7
1,1590000000.0,76.0,False,19.700001
2,1590000000.0,50.9,False,22.6
3,1590000000.0,76.800003,False,27.0


In [11]:
df['timestamp']=pd.to_datetime(df['ts'], unit='s')

In [20]:
df[['timestamp']].head()

Unnamed: 0,timestamp
0,2020-05-20 18:40:00
1,2020-05-20 18:40:00
2,2020-05-20 18:40:00
3,2020-05-20 18:40:00
4,2020-05-20 18:40:00


In [18]:
df['temperature']=(df['temp']-32)*5/9

In [19]:
df['temperature'].head()


0   -5.166667
1   -6.833333
2   -5.222222
3   -2.777778
4   -5.222222
Name: temperature, dtype: float64

In [21]:
df['distance'] = df['motion'].apply(lambda x: 50 if x else 200)


In [22]:
df[['motion', 'distance']].head()


Unnamed: 0,motion,distance
0,False,200
1,False,200
2,False,200
3,False,200
4,False,200


In [23]:
df.head()

Unnamed: 0,ts,humidity,motion,temp,timestamp,temperature,distance
0,1590000000.0,51.0,False,22.7,2020-05-20 18:40:00,-5.166667,200
1,1590000000.0,76.0,False,19.700001,2020-05-20 18:40:00,-6.833333,200
2,1590000000.0,50.9,False,22.6,2020-05-20 18:40:00,-5.222222,200
3,1590000000.0,76.800003,False,27.0,2020-05-20 18:40:00,-2.777778,200
4,1590000000.0,50.9,False,22.6,2020-05-20 18:40:00,-5.222222,200


In [32]:
final_df=df[['timestamp', 'temperature', 'humidity', 'distance']]

In [33]:
final_df.head()


Unnamed: 0,timestamp,temperature,humidity,distance
0,2020-05-20 18:40:00,-5.166667,51.0,200
1,2020-05-20 18:40:00,-6.833333,76.0,200
2,2020-05-20 18:40:00,-5.222222,50.9,200
3,2020-05-20 18:40:00,-2.777778,76.800003,200
4,2020-05-20 18:40:00,-5.222222,50.9,200


In [34]:
final_df.dtypes

timestamp      datetime64[ns]
temperature           float64
humidity              float64
distance                int64
dtype: object