In [240]:
import pandas as pd
import numpy as np
import datetime
from scipy.spatial.distance import euclidean
from scipy.spatial import distance
from workalendar.europe import Italy
from astral import Astral
from tsfresh import extract_relevant_features

# Preparing the dataset. 

In [161]:
w = pd.read_csv('../traffic/T.csv')
file = pd.read_csv('../traffic/Traffic_dataset_v0.1.csv', parse_dates=['DATETIME'], date_parser=lambda x: pd.to_datetime(x, format='%Y-%m-%d %H:%M'))
file.head()

Unnamed: 0,ID_SENSOR,DIRECTION,DATETIME,NUM_VEHICLES,SPEED_MEAN,SPEED_SD,EVENT,LON,LAT
0,1,A,2018-09-01 02:00:00,,,,NO_EVENT,12.291754,41.800588
1,1,A,2018-09-01 02:15:00,,,,NO_EVENT,12.291754,41.800588
2,1,A,2018-09-01 02:30:00,,,,NO_EVENT,12.291754,41.800588
3,1,A,2018-09-01 02:45:00,,,,NO_EVENT,12.291754,41.800588
4,1,A,2018-09-01 03:00:00,,,,NO_EVENT,12.291754,41.800588


In [162]:
file_new = file.dropna(subset=['NUM_VEHICLES', 'SPEED_MEAN', 'SPEED_SD'], axis=0, thresh=2)
file_new = file_new.reset_index()

In [163]:
w.drop(['Unnamed: 0', 'Unnamed: 0.1'],axis=1, inplace=True)

In [164]:
file_final = pd.concat([file_new, w], axis=1)

In [165]:
file_final.head()

Unnamed: 0,index,ID_SENSOR,DIRECTION,DATETIME,NUM_VEHICLES,SPEED_MEAN,SPEED_SD,EVENT,LON,LAT,Temp,Humidity,Wind,Precipitation,Pressure
0,24,1,A,2018-09-01 08:00:00,504.0,99.3,17.19,NO_EVENT,12.291754,41.800588,24.69,0.69,5.04,rain,1014.98
1,25,1,A,2018-09-01 08:15:00,481.0,102.1,18.12,NO_EVENT,12.291754,41.800588,24.78,0.69,5.02,rain,1015.01
2,26,1,A,2018-09-01 08:30:00,617.0,99.2,17.91,NO_EVENT,12.291754,41.800588,24.86,0.69,5.07,rain,1015.03
3,27,1,A,2018-09-01 08:45:00,562.0,101.7,16.62,NO_EVENT,12.291754,41.800588,24.95,0.69,5.18,rain,1015.06
4,28,1,A,2018-09-01 09:00:00,521.0,99.7,17.06,NO_EVENT,12.291754,41.800588,25.03,0.69,5.35,rain,1015.09


In [166]:
file_final.drop(['index', 'Precipitation'], axis=1, inplace=True)
file_final = file_final.dropna(subset=['SPEED_SD'], axis=0)
file_final = file_final.reset_index()

In [167]:
file_final.DIRECTION = pd.get_dummies(file_final.DIRECTION, prefix = 'A', drop_first=True)

In [168]:
file_final['prev15'] = file_final.NUM_VEHICLES.diff()
file_final['prev30'] = file_final.NUM_VEHICLES.diff(periods=2)
file_final=file_final.drop(['index'], axis=1)
file_final.head()

Unnamed: 0,ID_SENSOR,DIRECTION,DATETIME,NUM_VEHICLES,SPEED_MEAN,SPEED_SD,EVENT,LON,LAT,Temp,Humidity,Wind,Pressure,prev15,prev30
0,1,0,2018-09-01 08:00:00,504.0,99.3,17.19,NO_EVENT,12.291754,41.800588,24.69,0.69,5.04,1014.98,,
1,1,0,2018-09-01 08:15:00,481.0,102.1,18.12,NO_EVENT,12.291754,41.800588,24.78,0.69,5.02,1015.01,-23.0,
2,1,0,2018-09-01 08:30:00,617.0,99.2,17.91,NO_EVENT,12.291754,41.800588,24.86,0.69,5.07,1015.03,136.0,113.0
3,1,0,2018-09-01 08:45:00,562.0,101.7,16.62,NO_EVENT,12.291754,41.800588,24.95,0.69,5.18,1015.06,-55.0,81.0
4,1,0,2018-09-01 09:00:00,521.0,99.7,17.06,NO_EVENT,12.291754,41.800588,25.03,0.69,5.35,1015.09,-41.0,-96.0


In [169]:
Speed_ser = file_final['SPEED_MEAN'][:-1]

In [170]:
Speed_ser.name = 'Prev_Speed'

In [171]:
file_final = pd.concat([file_final[1:].reset_index(), Speed_ser], axis=1, names=[file_final.columns, 'prev_speed'])
file_final=file_final.drop(['index'], axis=1)
file_final.head()

Unnamed: 0,ID_SENSOR,DIRECTION,DATETIME,NUM_VEHICLES,SPEED_MEAN,SPEED_SD,EVENT,LON,LAT,Temp,Humidity,Wind,Pressure,prev15,prev30,Prev_Speed
0,1,0,2018-09-01 08:15:00,481.0,102.1,18.12,NO_EVENT,12.291754,41.800588,24.78,0.69,5.02,1015.01,-23.0,,99.3
1,1,0,2018-09-01 08:30:00,617.0,99.2,17.91,NO_EVENT,12.291754,41.800588,24.86,0.69,5.07,1015.03,136.0,113.0,102.1
2,1,0,2018-09-01 08:45:00,562.0,101.7,16.62,NO_EVENT,12.291754,41.800588,24.95,0.69,5.18,1015.06,-55.0,81.0,99.2
3,1,0,2018-09-01 09:00:00,521.0,99.7,17.06,NO_EVENT,12.291754,41.800588,25.03,0.69,5.35,1015.09,-41.0,-96.0,101.7
4,1,0,2018-09-01 09:15:00,448.0,99.2,17.98,NO_EVENT,12.291754,41.800588,25.11,0.67,5.48,1015.09,-73.0,-114.0,99.7


In [172]:
cal = Italy()

In [173]:
file_final['DATETIME'][0].date()

datetime.date(2018, 9, 1)

In [174]:
feste = []
for i in file_final['DATETIME']:
    feste.append(int(cal.is_working_day(i.date())))

In [175]:
file_final['Working_Days'] = pd.Series(feste, name = 'Working_Days')

In [176]:
file_final.loc[file_final['Working_Days']==0]

Unnamed: 0,ID_SENSOR,DIRECTION,DATETIME,NUM_VEHICLES,SPEED_MEAN,SPEED_SD,EVENT,LON,LAT,Temp,Humidity,Wind,Pressure,prev15,prev30,Prev_Speed,Working_Days
0,1,0,2018-09-01 08:15:00,481.0,102.1,18.12,NO_EVENT,12.291754,41.800588,24.78,0.69,5.02,1015.01,-23.0,,99.3,0
1,1,0,2018-09-01 08:30:00,617.0,99.2,17.91,NO_EVENT,12.291754,41.800588,24.86,0.69,5.07,1015.03,136.0,113.0,102.1,0
2,1,0,2018-09-01 08:45:00,562.0,101.7,16.62,NO_EVENT,12.291754,41.800588,24.95,0.69,5.18,1015.06,-55.0,81.0,99.2,0
3,1,0,2018-09-01 09:00:00,521.0,99.7,17.06,NO_EVENT,12.291754,41.800588,25.03,0.69,5.35,1015.09,-41.0,-96.0,101.7,0
4,1,0,2018-09-01 09:15:00,448.0,99.2,17.98,NO_EVENT,12.291754,41.800588,25.11,0.67,5.48,1015.09,-73.0,-114.0,99.7,0
5,1,0,2018-09-01 09:30:00,574.0,97.5,16.57,NO_EVENT,12.291754,41.800588,25.18,0.64,5.65,1015.09,126.0,53.0,99.2,0
6,1,0,2018-09-01 09:45:00,539.0,97.4,17.95,NO_EVENT,12.291754,41.800588,25.25,0.61,5.87,1015.08,-35.0,91.0,97.5,0
7,1,0,2018-09-01 10:00:00,525.0,96.7,18.54,NO_EVENT,12.291754,41.800588,25.32,0.59,6.12,1015.08,-14.0,-49.0,97.4,0
8,1,0,2018-09-01 10:15:00,506.0,97.5,17.60,NO_EVENT,12.291754,41.800588,25.21,0.59,5.81,1015.05,-19.0,-33.0,96.7,0
9,1,0,2018-09-01 10:30:00,679.0,94.9,17.91,NO_EVENT,12.291754,41.800588,25.10,0.60,5.51,1015.02,173.0,154.0,97.5,0


In [187]:
file_final.EVENT = pd.to_numeric(file_final.EVENT, errors='coerce')
file_final.EVENT.fillna(0, inplace=True)

In [188]:
file_final.head()

Unnamed: 0,ID_SENSOR,DIRECTION,DATETIME,NUM_VEHICLES,SPEED_MEAN,SPEED_SD,EVENT,LON,LAT,Temp,Humidity,Wind,Pressure,prev15,prev30,Prev_Speed,Working_Days
0,1,0,2018-09-01 08:15:00,481.0,102.1,18.12,0.0,12.291754,41.800588,24.78,0.69,5.02,1015.01,-23.0,,99.3,0
1,1,0,2018-09-01 08:30:00,617.0,99.2,17.91,0.0,12.291754,41.800588,24.86,0.69,5.07,1015.03,136.0,113.0,102.1,0
2,1,0,2018-09-01 08:45:00,562.0,101.7,16.62,0.0,12.291754,41.800588,24.95,0.69,5.18,1015.06,-55.0,81.0,99.2,0
3,1,0,2018-09-01 09:00:00,521.0,99.7,17.06,0.0,12.291754,41.800588,25.03,0.69,5.35,1015.09,-41.0,-96.0,101.7,0
4,1,0,2018-09-01 09:15:00,448.0,99.2,17.98,0.0,12.291754,41.800588,25.11,0.67,5.48,1015.09,-73.0,-114.0,99.7,0


In [191]:
def make_harmonic_features(value, period=24):
    value *= 2 * np.pi / period 
    return np.cos(value), np.sin(value)

In [202]:
euclidean(make_harmonic_features(0), make_harmonic_features(23)) 

0.26105238444010403

In [206]:
make_harmonic_features(1)

(0.9659258262890683, 0.25881904510252074)

In [238]:
a = pd.Series(data=[make_harmonic_features(x.hour) for x in file_final.DATETIME])

In [237]:
file_final['Hour'] = pd.Series(data = a)
file_final.head()

Unnamed: 0,ID_SENSOR,DIRECTION,DATETIME,NUM_VEHICLES,SPEED_MEAN,SPEED_SD,EVENT,LON,LAT,Temp,Humidity,Wind,Pressure,prev15,prev30,Prev_Speed,Working_Days,Hour
0,1,0,2018-09-01 08:15:00,481.0,102.1,18.12,0.0,12.291754,41.800588,24.78,0.69,5.02,1015.01,-23.0,,99.3,0,"(-0.4999999999999998, 0.8660254037844387)"
1,1,0,2018-09-01 08:30:00,617.0,99.2,17.91,0.0,12.291754,41.800588,24.86,0.69,5.07,1015.03,136.0,113.0,102.1,0,"(-0.4999999999999998, 0.8660254037844387)"
2,1,0,2018-09-01 08:45:00,562.0,101.7,16.62,0.0,12.291754,41.800588,24.95,0.69,5.18,1015.06,-55.0,81.0,99.2,0,"(-0.4999999999999998, 0.8660254037844387)"
3,1,0,2018-09-01 09:00:00,521.0,99.7,17.06,0.0,12.291754,41.800588,25.03,0.69,5.35,1015.09,-41.0,-96.0,101.7,0,"(-0.7071067811865475, 0.7071067811865476)"
4,1,0,2018-09-01 09:15:00,448.0,99.2,17.98,0.0,12.291754,41.800588,25.11,0.67,5.48,1015.09,-73.0,-114.0,99.7,0,"(-0.7071067811865475, 0.7071067811865476)"


In [239]:
a = Astral()

In [None]:
a.

In [242]:
file_final.head()

Unnamed: 0,ID_SENSOR,DIRECTION,DATETIME,NUM_VEHICLES,SPEED_MEAN,SPEED_SD,EVENT,LON,LAT,Temp,Humidity,Wind,Pressure,prev15,prev30,Prev_Speed,Working_Days,Hour
0,1,0,2018-09-01 08:15:00,481.0,102.1,18.12,0.0,12.291754,41.800588,24.78,0.69,5.02,1015.01,-23.0,,99.3,0,"(-0.4999999999999998, 0.8660254037844387)"
1,1,0,2018-09-01 08:30:00,617.0,99.2,17.91,0.0,12.291754,41.800588,24.86,0.69,5.07,1015.03,136.0,113.0,102.1,0,"(-0.4999999999999998, 0.8660254037844387)"
2,1,0,2018-09-01 08:45:00,562.0,101.7,16.62,0.0,12.291754,41.800588,24.95,0.69,5.18,1015.06,-55.0,81.0,99.2,0,"(-0.4999999999999998, 0.8660254037844387)"
3,1,0,2018-09-01 09:00:00,521.0,99.7,17.06,0.0,12.291754,41.800588,25.03,0.69,5.35,1015.09,-41.0,-96.0,101.7,0,"(-0.7071067811865475, 0.7071067811865476)"
4,1,0,2018-09-01 09:15:00,448.0,99.2,17.98,0.0,12.291754,41.800588,25.11,0.67,5.48,1015.09,-73.0,-114.0,99.7,0,"(-0.7071067811865475, 0.7071067811865476)"


In [245]:
y = file_final['SPEED_MEAN']
X = file_final.drop(['SPEED_MEAN', 'SPEED_SD', 'LON', 'LAT', 'Prev_Speed'], axis=1)
features_filtered_direct = extract_relevant_features(X[1:], y[1:],
                                                     column_id='DATETIME')

Feature Extraction:   0%|          | 0/20 [00:00<?, ?it/s]

ZeroDivisionError: float division by zero

In [244]:
X

Unnamed: 0,ID_SENSOR,DIRECTION,DATETIME,NUM_VEHICLES,EVENT,Temp,Humidity,Wind,Pressure,prev15,prev30,Working_Days,Hour
0,1,0,2018-09-01 08:15:00,481.0,0.0,24.78,0.69,5.02,1015.01,-23.0,,0,"(-0.4999999999999998, 0.8660254037844387)"
1,1,0,2018-09-01 08:30:00,617.0,0.0,24.86,0.69,5.07,1015.03,136.0,113.0,0,"(-0.4999999999999998, 0.8660254037844387)"
2,1,0,2018-09-01 08:45:00,562.0,0.0,24.95,0.69,5.18,1015.06,-55.0,81.0,0,"(-0.4999999999999998, 0.8660254037844387)"
3,1,0,2018-09-01 09:00:00,521.0,0.0,25.03,0.69,5.35,1015.09,-41.0,-96.0,0,"(-0.7071067811865475, 0.7071067811865476)"
4,1,0,2018-09-01 09:15:00,448.0,0.0,25.11,0.67,5.48,1015.09,-73.0,-114.0,0,"(-0.7071067811865475, 0.7071067811865476)"
5,1,0,2018-09-01 09:30:00,574.0,0.0,25.18,0.64,5.65,1015.09,126.0,53.0,0,"(-0.7071067811865475, 0.7071067811865476)"
6,1,0,2018-09-01 09:45:00,539.0,0.0,25.25,0.61,5.87,1015.08,-35.0,91.0,0,"(-0.7071067811865475, 0.7071067811865476)"
7,1,0,2018-09-01 10:00:00,525.0,0.0,25.32,0.59,6.12,1015.08,-14.0,-49.0,0,"(-0.8660254037844385, 0.5000000000000003)"
8,1,0,2018-09-01 10:15:00,506.0,0.0,25.21,0.59,5.81,1015.05,-19.0,-33.0,0,"(-0.8660254037844385, 0.5000000000000003)"
9,1,0,2018-09-01 10:30:00,679.0,0.0,25.10,0.60,5.51,1015.02,173.0,154.0,0,"(-0.8660254037844385, 0.5000000000000003)"


In [190]:
file_final.to_csv('../traffic/file_final.csv')