# Datathink 2023
Roma 2023. Biblioteca Hertziana + DSV@UZH + Max Planck Society
# Sensing data fusion
This notebook illustrates how to combine and aggregates different types of data collected from different sensors:
- GPS tracks from location devices (i.e. smartphones, smartwatches, activity trackers, etc.)
- Gas / air quality data from the CoCi's CoSense unit by COSS@ETHZ
- GQ multimeter for electric+electromagnetic+radiofrequency

In [34]:
import pandas as pd
import os
pd.set_option('display.max_rows', 200)

# Importing GPS tracks

## method 2 with gpxo

In [35]:
import gpxo
paths = []
folder = os.getcwd()
for file in sorted(os.listdir(folder)):
    if file.endswith(".gpx"):
        paths.append(os.path.join(folder, file))
print(sorted(paths))

df_gps = pd.DataFrame()
for gps_tracks in paths:
    gps_data = gpxo.Track(gps_tracks)
    df_gps_temp = gps_data.data
    df_gps = pd.concat([df_gps, df_gps_temp])
df_gps

['/mnt/c/Users/jaargota/Documents/202302-Spring 2023/20230227-Datathink_Rome/test_roma_20230226/2023-02-26_Feb_26_2023_5_38_55_PM.gpx']


Unnamed: 0_level_0,latitude (°),longitude (°),distance (km),compass (°),duration (s),velocity (km/h),elevation (m)
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-02-26 16:38:56,41.903963,12.485487,0.000000,328.038520,0.0,0.246150,89.27
2023-02-26 16:39:12,41.903972,12.485480,0.001094,258.570558,16.0,0.213750,88.24
2023-02-26 16:39:34,41.903963,12.485478,0.002128,190.889652,38.0,0.288327,88.29
2023-02-26 16:39:45,41.903953,12.485475,0.003191,201.081261,49.0,0.326018,89.31
2023-02-26 16:40:07,41.903940,12.485465,0.004916,257.757030,71.0,1.456305,89.37
...,...,...,...,...,...,...,...
2023-02-26 18:30:12,41.893852,12.477600,7.408815,163.732832,6676.0,2.515423,60.51
2023-02-26 18:30:17,41.893836,12.477593,7.410674,152.489931,6681.0,1.190380,63.34
2023-02-26 18:30:21,41.893833,12.477606,7.411865,93.124002,6685.0,3.537900,65.97
2023-02-26 18:30:22,41.893835,12.477620,7.413019,165.808082,6686.0,3.928800,66.21


In [36]:
df_gps.index

DatetimeIndex(['2023-02-26 16:38:56', '2023-02-26 16:39:12',
               '2023-02-26 16:39:34', '2023-02-26 16:39:45',
               '2023-02-26 16:40:07', '2023-02-26 16:40:10',
               '2023-02-26 16:40:11', '2023-02-26 16:40:12',
               '2023-02-26 16:40:14', '2023-02-26 16:40:55',
               ...
               '2023-02-26 18:30:06', '2023-02-26 18:30:07',
               '2023-02-26 18:30:08', '2023-02-26 18:30:09',
               '2023-02-26 18:30:10', '2023-02-26 18:30:12',
               '2023-02-26 18:30:17', '2023-02-26 18:30:21',
               '2023-02-26 18:30:22', '2023-02-26 18:30:24'],
              dtype='datetime64[ns]', name='time', length=4795, freq=None)

In [37]:
# read Datetime Index in GMT, convert to CET and remove timezone while preserving local time
df_gps["time_CET"] = df_gps.index.tz_localize("GMT").tz_convert('CET').tz_localize(None)
df_gps= df_gps.set_index("time_CET")
df_gps = df_gps.sort_index(ascending=True)
df_gps

Unnamed: 0_level_0,latitude (°),longitude (°),distance (km),compass (°),duration (s),velocity (km/h),elevation (m)
time_CET,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-02-26 17:38:56,41.903963,12.485487,0.000000,328.038520,0.0,0.246150,89.27
2023-02-26 17:39:12,41.903972,12.485480,0.001094,258.570558,16.0,0.213750,88.24
2023-02-26 17:39:34,41.903963,12.485478,0.002128,190.889652,38.0,0.288327,88.29
2023-02-26 17:39:45,41.903953,12.485475,0.003191,201.081261,49.0,0.326018,89.31
2023-02-26 17:40:07,41.903940,12.485465,0.004916,257.757030,71.0,1.456305,89.37
...,...,...,...,...,...,...,...
2023-02-26 19:30:12,41.893852,12.477600,7.408815,163.732832,6676.0,2.515423,60.51
2023-02-26 19:30:17,41.893836,12.477593,7.410674,152.489931,6681.0,1.190380,63.34
2023-02-26 19:30:21,41.893833,12.477606,7.411865,93.124002,6685.0,3.537900,65.97
2023-02-26 19:30:22,41.893835,12.477620,7.413019,165.808082,6686.0,3.928800,66.21


In [38]:
gps_data.map(embed=True)



## resampling to seconds and interpolating

In [18]:
df_gps = df_gps.resample("s").interpolate("linear")
df_gps

Unnamed: 0_level_0,latitude (°),longitude (°),distance (km),compass (°),duration (s),velocity (km/h),elevation (m)
time_CET,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-02-26 17:38:56,41.903963,12.485487,0.000000,328.038520,0.0,0.246150,89.270000
2023-02-26 17:38:57,41.903964,12.485486,0.000068,323.696773,1.0,0.244125,89.205625
2023-02-26 17:38:58,41.903964,12.485486,0.000137,319.355025,2.0,0.242100,89.141250
2023-02-26 17:38:59,41.903965,12.485486,0.000205,315.013278,3.0,0.240075,89.076875
2023-02-26 17:39:00,41.903965,12.485485,0.000273,310.671530,4.0,0.238050,89.012500
...,...,...,...,...,...,...,...
2023-02-26 19:30:20,41.893834,12.477603,7.411567,107.965484,6684.0,2.951020,65.312500
2023-02-26 19:30:21,41.893833,12.477606,7.411865,93.124002,6685.0,3.537900,65.970000
2023-02-26 19:30:22,41.893835,12.477620,7.413019,165.808082,6686.0,3.928800,66.210000
2023-02-26 19:30:23,41.893832,12.477609,7.413985,207.920710,6687.0,3.703200,65.960000


# Getting gas sensor data

In [19]:
time_correction_factor_gas = pd.Timedelta(hours=0, minutes=0, seconds=0)

df_gas = pd.read_csv("data.csv", sep=",", index_col="Date/Time")
df_gas["time"] = pd.to_datetime(df_gas.index) - time_correction_factor_gas
df_gas = df_gas.set_index("time")
df_gas

Unnamed: 0_level_0,Temperature (C),Humidity (%),PM1 (ug/m3),PM2.5 (ug/m3),PM10 (ug/m3)
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-02-23 17:40:54,20.690475,39.124599,2.71,7.26,12.19
2023-02-23 17:43:19,21.646548,39.015591,4.69,9.19,13.82
2023-02-23 17:44:20,22.337467,38.364841,2.60,6.28,10.21
2023-02-23 17:45:19,22.976740,38.016118,6.02,6.63,6.63
2023-02-23 17:46:20,23.021676,37.418868,4.24,14.03,24.82
...,...,...,...,...,...
2023-02-27 01:22:20,19.651100,71.693410,3.08,5.64,8.23
2023-02-27 01:23:19,20.163813,70.154319,6.73,11.51,16.24
2023-02-27 01:24:19,20.608618,68.929953,1.24,3.77,6.54
2023-02-27 01:25:19,21.245445,67.584478,2.36,4.56,6.82


## grouping per time (index) for avoiding errors due to duplicated indexes

In [20]:
df_gas = df_gas.groupby(by="time", as_index=True).agg("mean")

## resampling to seconds and interpolating

In [21]:
df_gas = df_gas.resample("s").interpolate("linear")
df_gas

Unnamed: 0_level_0,Temperature (C),Humidity (%),PM1 (ug/m3),PM2.5 (ug/m3),PM10 (ug/m3)
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-02-23 17:40:54,20.690475,39.124599,2.710000,7.260000,12.190000
2023-02-23 17:40:55,20.697068,39.123848,2.723655,7.273310,12.201241
2023-02-23 17:40:56,20.703662,39.123096,2.737310,7.286621,12.212483
2023-02-23 17:40:57,20.710256,39.122344,2.750966,7.299931,12.223724
2023-02-23 17:40:58,20.716849,39.121592,2.764621,7.313241,12.234966
...,...,...,...,...,...
2023-02-27 01:26:16,21.235066,66.744493,1.836721,4.718852,7.819836
2023-02-27 01:26:17,21.234884,66.729757,1.827541,4.721639,7.837377
2023-02-27 01:26:18,21.234702,66.715020,1.818361,4.724426,7.854918
2023-02-27 01:26:19,21.234520,66.700283,1.809180,4.727213,7.872459


# Getting GQ data

In [28]:
# in test file emfhistory_02242023_145731.csv last timestamp is 2023/02/24 18:57:09, but file was writen at 15:06:39. Shift: 3:50:30
time_correction_factor_GQ = pd.Timedelta(hours=3, minutes=50, seconds=30) 

df_GQ = pd.read_csv("emfhistory_02272023_012226.csv", sep=",", header=[2])
names_cols_GQ = {"mG":"EMF(mG)", 
                 "V/m": "EF(V/m)", 
                 "mW/m2": "RF Power Density(mW/m2)", 
                 "mW/cm2": "RF Power Density(mW/cm2)", 
                 "pW/cm2": "RF Power Density(pW/cm2)"}
df_GQ = df_GQ.rename(columns=names_cols_GQ)

df_GQ["time"] = pd.to_datetime(df_GQ["Date and Time"])# - time_correction_factor_GQ
df_GQ = df_GQ.set_index("time")
df_GQ

Unnamed: 0_level_0,Date and Time,EMF(mG),EF(V/m),RF Power Density(mW/m2),RF Power Density(mW/cm2),RF Power Density(pW/cm2),Possible Source
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-02-26 00:00:59,2023/02/26 00:00:59,0.6,1.1,3.144,0.000,314415.875,WiFi/Phone
2023-02-26 00:01:00,2023/02/26 00:01:00,0.6,1.1,3.144,0.000,314415.875,WiFi/Phone
2023-02-26 00:01:01,2023/02/26 00:01:01,0.6,1.1,3.144,0.000,314415.875,WiFi/Phone
2023-02-26 00:01:02,2023/02/26 00:01:02,0.6,1.1,3.144,0.000,314415.875,WiFi/Phone
2023-02-26 00:01:03,2023/02/26 00:01:03,0.6,24.5,15.154,0.002,1515382.625,WiFi/Phone
...,...,...,...,...,...,...,...
2023-02-27 01:22:46,2023/02/27 01:22:46,0.4,802.5,0.586,0.000,58557.145,Mixed
2023-02-27 01:22:47,2023/02/27 01:22:47,0.4,802.5,0.586,0.000,58557.145,Mixed
2023-02-27 01:22:48,2023/02/27 01:22:48,0.4,802.5,0.586,0.000,58557.145,Mixed
2023-02-27 01:22:49,2023/02/27 01:22:49,0.4,802.5,1.533,0.000,153272.547,Mixed


# merging

In [29]:
df_merge = df_gps.merge(df_gas, how="left", left_index=True, right_index=True).merge(df_GQ, left_index=True, right_index=True)

In [30]:
df_merge#.head(10)

Unnamed: 0,latitude (°),longitude (°),distance (km),compass (°),duration (s),velocity (km/h),elevation (m),Temperature (C),Humidity (%),PM1 (ug/m3),PM2.5 (ug/m3),PM10 (ug/m3),Date and Time,EMF(mG),EF(V/m),RF Power Density(mW/m2),RF Power Density(mW/cm2),RF Power Density(pW/cm2),Possible Source
2023-02-26 17:41:00,41.904009,12.485442,0.013558,294.708345,124.0,0.995074,76.450000,16.507948,47.437417,2.170000,3.214915,4.168136,2023/02/26 17:41:00,0.0,1.5,0.000,0.000,0.000000e+00,Static
2023-02-26 17:41:01,41.904006,12.485438,0.014022,289.825418,125.0,1.473882,76.356923,16.508724,47.429296,2.130000,3.127288,4.030339,2023/02/26 17:41:01,0.0,1.6,8.769,0.001,8.768507e+05,WiFi/Phone
2023-02-26 17:41:02,41.904003,12.485433,0.014486,284.942491,126.0,1.952691,76.263846,16.509501,47.421175,2.090000,3.039661,3.892542,2023/02/26 17:41:02,0.0,0.2,0.453,0.000,4.525961e+04,---
2023-02-26 17:41:03,41.904000,12.485429,0.014950,280.059564,127.0,2.431499,76.170769,16.510277,47.413053,2.050000,2.952034,3.754746,2023/02/26 17:41:03,0.6,0.6,0.024,0.000,2.403450e+03,Mixed
2023-02-26 17:41:04,41.903997,12.485425,0.015415,275.176636,128.0,2.910307,76.077692,16.511053,47.404932,2.010000,2.864407,3.616949,2023/02/26 17:41:04,0.6,1.8,0.005,0.000,4.761250e+02,Mixed
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-02-26 19:30:20,41.893834,12.477603,7.411567,107.965484,6684.0,2.951020,65.312500,13.497863,56.871721,6.400000,9.070000,11.410000,2023/02/26 19:30:20,1.5,1.7,393.900,0.039,3.939001e+07,WiFi/Phone
2023-02-26 19:30:21,41.893833,12.477606,7.411865,93.124002,6685.0,3.537900,65.970000,13.513804,56.930372,6.512167,9.196333,11.539333,2023/02/26 19:30:21,1.7,1.1,393.900,0.039,3.939001e+07,WiFi/Phone
2023-02-26 19:30:22,41.893835,12.477620,7.413019,165.808082,6686.0,3.928800,66.210000,13.529745,56.989022,6.624333,9.322667,11.668667,2023/02/26 19:30:22,1.7,0.4,428.721,0.043,4.287211e+07,WiFi/Phone
2023-02-26 19:30:23,41.893832,12.477609,7.413985,207.920710,6687.0,3.703200,65.960000,13.545686,57.047673,6.736500,9.449000,11.798000,2023/02/26 19:30:23,1.6,1.3,484.937,0.048,4.849374e+07,WiFi/Phone


In [31]:
df_merge.to_csv("20230226_1741_ROMA_test_merged.csv", sep=",")