# Data union

4 archives are presented with data from year 2017 to 2019 for each hive (names: Wurzburg and Schwartau)

- flow(nameofthehive).csv : For a date it contains the number of departures and arrivals from/to the beehive. A positive number indicates the number of arrivals and a negative number of departures. Note that this 2 values are in the data set with the same timestamp.
- humidty(nameofthehive).csv : Level of humidity through time of the beehive expressed in %
- temperature(nameofthehive).csv : Temperature of the beehive through time of the beehive in Cº
- weight(nameofthehive).csv : Weight of the beehive through time in Kg.

Resample daily information and join everything into a single dataframe. 


In [25]:
# Pandas for data loading and processing
import pandas as pd

In [26]:
# Reading data from schwartau
df_flow_schwartau = pd.read_csv('data/flow_schwartau.csv')
df_humidity_schwartau = pd.read_csv('data/humidity_schwartau.csv')
df_temperature_schwartau = pd.read_csv('data/temperature_schwartau.csv')
df_weight_schwartau = pd.read_csv('data/weight_schwartau.csv')

# Reading data from wurzburg
df_flow_wurzburg = pd.read_csv('data/flow_wurzburg.csv')
df_humidity_wurzburg = pd.read_csv('data/humidity_wurzburg.csv')
df_temperature_wurzburg = pd.read_csv('data/temperature_wurzburg.csv')
df_weight_wurzburg = pd.read_csv('data/weight_wurzburg.csv')

In [27]:
# Changing data type to timestamp from schwartau
df_flow_schwartau['timestamp'] =  pd.to_datetime(df_flow_schwartau['timestamp'], format='%Y-%m-%d %H:%M:%S')
df_humidity_schwartau['timestamp'] =  pd.to_datetime(df_humidity_schwartau['timestamp'], format='%Y-%m-%d %H:%M:%S')
df_temperature_schwartau['timestamp'] =  pd.to_datetime(df_temperature_schwartau['timestamp'], format='%Y-%m-%d %H:%M:%S')
df_weight_schwartau['timestamp'] =  pd.to_datetime(df_weight_schwartau['timestamp'], format='%Y-%m-%d %H:%M:%S')

# Changing data type to timestamp from wurzburg
df_flow_wurzburg['timestamp'] =  pd.to_datetime(df_flow_wurzburg['timestamp'], format='%Y-%m-%d %H:%M:%S')
df_humidity_wurzburg['timestamp'] =  pd.to_datetime(df_humidity_wurzburg['timestamp'], format='%Y-%m-%d %H:%M:%S')
df_temperature_wurzburg['timestamp'] =  pd.to_datetime(df_temperature_wurzburg['timestamp'], format='%Y-%m-%d %H:%M:%S')
df_weight_wurzburg['timestamp'] =  pd.to_datetime(df_weight_wurzburg['timestamp'], format='%Y-%m-%d %H:%M:%S')

In [28]:
print(df_flow_schwartau.head(5))
print(df_humidity_schwartau.head(5))
print(df_temperature_schwartau.head(5))
print(df_weight_schwartau.head(5))

            timestamp  flow
0 2017-01-01 14:15:00     0
1 2017-01-01 14:16:00     0
2 2017-01-01 14:17:00     0
3 2017-01-01 14:18:00     0
4 2017-01-01 14:19:00     0
            timestamp   humidity
0 2017-01-01 13:00:00  98.040310
1 2017-01-02 01:00:00  98.610556
2 2017-01-02 13:00:00  99.002083
3 2017-01-03 01:00:00  98.486806
4 2017-01-03 13:00:00  98.320139
            timestamp  temperature
0 2017-01-01 14:10:00          NaN
1 2017-01-01 14:15:00       12.340
2 2017-01-01 14:20:00       12.270
3 2017-01-01 14:25:00       12.276
4 2017-01-01 14:30:00       12.356
            timestamp        weight
0 2017-01-01 13:00:00  50736.790078
1 2017-01-02 01:00:00  50700.685000
2 2017-01-02 13:00:00  50614.907500
3 2017-01-03 01:00:00  50739.824167
4 2017-01-03 13:00:00  50799.746944


In [39]:
print(df_flow_wurzburg.head(5))
print(df_humidity_wurzburg.head(5))
print(df_temperature_wurzburg.head(5))
print(df_weight_wurzburg.head(5))

            timestamp  flow
0 2017-01-01 05:15:00     0
1 2017-01-01 05:16:00     0
2 2017-01-01 05:17:00     0
3 2017-01-01 05:18:00     0
4 2017-01-01 05:19:00     0
            timestamp   humidity
0 2017-01-01 05:00:00  92.406667
1 2017-01-01 06:00:00  92.270000
2 2017-01-01 07:00:00  92.575000
3 2017-01-01 08:00:00  92.840000
4 2017-01-01 09:00:00  93.640000
            timestamp  temperature
0 2017-01-01 05:00:00    -1.911244
1 2017-01-01 06:00:00    -1.866717
2 2017-01-01 07:00:00    -2.077833
3 2017-01-01 08:00:00    -2.327167
4 2017-01-01 09:00:00    -2.791233
            timestamp   weight
0 2017-01-01 05:15:00  52.6974
1 2017-01-01 05:16:00  52.6974
2 2017-01-01 05:17:00  52.6974
3 2017-01-01 05:18:00  52.6974
4 2017-01-01 05:19:00  52.6973


In [45]:
# Resampling data daily 
flow_schwartau_daily = df_flow_schwartau.groupby(pd.Grouper(key = 'timestamp',freq='D')).sum()
humidity_schwartau_daily = df_humidity_schwartau.groupby(pd.Grouper(key = 'timestamp',freq='D')).mean()
temperature_schwartau_daily = df_temperature_schwartau.groupby(pd.Grouper(key = 'timestamp',freq='D')).mean()
weight_schwartau_daily = df_weight_schwartau.groupby(pd.Grouper(key = 'timestamp',freq='D')).mean()

# Resampling data daily 
flow_wurzburg_daily = df_flow_wurzburg.groupby(pd.Grouper(key = 'timestamp',freq='D')).sum()
humidity_wurzburg_daily = df_humidity_wurzburg.groupby(pd.Grouper(key = 'timestamp',freq='D')).mean()
temperature_wurzburg_daily = df_temperature_wurzburg.groupby(pd.Grouper(key = 'timestamp',freq='D')).mean()
weight_wurzburg_daily = df_weight_wurzburg.groupby(pd.Grouper(key = 'timestamp',freq='D')).mean()

In [46]:
print(flow_schwartau_daily.head(5))
print(humidity_schwartau_daily.head(5))
print(temperature_schwartau_daily.head(5))
print(weight_schwartau_daily.head(5))

            flow
timestamp       
2017-01-01    -5
2017-01-02   -22
2017-01-03   -30
2017-01-04   -28
2017-01-05   -11
             humidity
timestamp            
2017-01-01  98.040310
2017-01-02  98.806319
2017-01-03  98.403472
2017-01-04  97.292292
2017-01-05  96.583889
            temperature
timestamp              
2017-01-01    15.477829
2017-01-02    13.376389
2017-01-03    16.181965
2017-01-04    14.724111
2017-01-05     9.058442
                  weight
timestamp               
2017-01-01  50736.790078
2017-01-02  50657.796250
2017-01-03  50769.785556
2017-01-04  50532.602778
2017-01-05  50109.808194


In [47]:
print(flow_wurzburg_daily.head(5))
print(humidity_wurzburg_daily.head(5))
print(temperature_wurzburg_daily.head(5))
print(weight_wurzburg_daily.head(5))

            flow
timestamp       
2017-01-01    -6
2017-01-02    -9
2017-01-03    -7
2017-01-04   -21
2017-01-05   -12
             humidity
timestamp            
2017-01-01  92.058947
2017-01-02  92.794722
2017-01-03  91.173472
2017-01-04  91.077917
2017-01-05  82.656076
            temperature
timestamp              
2017-01-01    -0.082160
2017-01-02     1.825578
2017-01-03     3.082557
2017-01-04     3.750071
2017-01-05     2.259225
               weight
timestamp            
2017-01-01  52.661690
2017-01-02  52.671111
2017-01-03  52.653767
2017-01-04  52.610835
2017-01-05  52.623591


In [48]:
schwartau_daily=flow_schwartau_daily.merge(humidity_schwartau_daily,on='timestamp').merge(temperature_schwartau_daily,on='timestamp').merge(weight_schwartau_daily,on='timestamp')
wurzburg_daily=flow_wurzburg_daily.merge(humidity_wurzburg_daily,on='timestamp').merge(temperature_wurzburg_daily,on='timestamp').merge(weight_wurzburg_daily,on='timestamp')

In [53]:
schwartau_daily.head(10)

Unnamed: 0_level_0,flow,humidity,temperature,weight
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-01,-5,98.04031,15.477829,50736.790078
2017-01-02,-22,98.806319,13.376389,50657.79625
2017-01-03,-30,98.403472,16.181965,50769.785556
2017-01-04,-28,97.292292,14.724111,50532.602778
2017-01-05,-11,96.583889,9.058442,50109.808194
2017-01-06,-14,96.399097,4.508425,50172.726875
2017-01-07,-7,96.107222,7.758132,50553.079583
2017-01-08,-18,97.23875,10.721146,50536.130833
2017-01-09,-18,98.338611,9.040472,50439.831597
2017-01-10,-18,99.142708,8.518958,50447.250069


In [54]:
wurzburg_daily.head(10)

Unnamed: 0_level_0,flow,humidity,temperature,weight
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-01,-6,92.058947,-0.08216,52.66169
2017-01-02,-9,92.794722,1.825578,52.671111
2017-01-03,-7,91.173472,3.082557,52.653767
2017-01-04,-21,91.077917,3.750071,52.610835
2017-01-05,-12,82.656076,2.259225,52.623591
2017-01-06,-13,91.545347,-0.937771,52.292326
2017-01-07,-6,89.077361,-2.648782,52.384759
2017-01-08,-10,90.131181,1.812449,52.649711
2017-01-09,-20,90.166528,3.659525,52.618317
2017-01-10,-21,86.372014,4.034587,52.574751


In [56]:
schwartau_daily.to_csv('data/summary/schwartau_daily.csv', index = True, header=True)
wurzburg_daily.to_csv('data/summary/wurzburg_daily.csv', index = True, header=True)