In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sqlalchemy import create_engine
from dotenv import load_dotenv
load_dotenv('../.env')
import os

In [2]:
HOST = os.getenv('DB_HOST')
USER = os.getenv('DB_USER')
PASS = os.getenv('DB_PASSWORD')
DB = os.getenv('DB_DATABASE')

In [3]:
ENGINE = create_engine(
        f"mysql+pymysql://{USER}:{PASS}@{HOST}/{DB}"
    )
print(f"mysql://{USER}:{PASS}@{HOST}/{DB}?ssl_mode=VERIFY_IDENTITY")

mysql://None:None@None/None?ssl_mode=VERIFY_IDENTITY


In [4]:
df = pd.read_sql('SELECT * FROM rastreo_satelital LIMIT 500000', con = ENGINE).drop(columns='#')

df.head()

OperationalError: (pymysql.err.OperationalError) (2003, "Can't connect to MySQL server on 'None' ([Errno -2] Name or service not known)")
(Background on this error at: https://sqlalche.me/e/14/e3q8)

## Data Cleaning

In [None]:
#Data tranformation to datetype
df = df.rename(columns={'Date time':'Datetime'})
df.Datetime = pd.to_datetime(df.Datetime)
df['date'] = df['Datetime'].dt.date
df['day'] = df['Datetime'].dt.weekday
df['hour'] = df['Datetime'].dt.hour

In [None]:
#get only locations in Colombia
def GetCountry(row):
    """
    Function to determine whether
    the location is in Colombia.
    """
    try:
        row = row.split(', ')
        if row[-1] == 'Colombia':
            return 'Colombia'
        else:
            return None
    except:
        return None
df['Pais'] = df.apply(lambda row: GetCountry(row['Address']),axis=1)
df = df.where(df.Pais=='Colombia').dropna(how='all')
df.Speed = df.Speed.astype(int)

In [None]:
df['Departamento'] = df['Address'].str.split(', ').str[-2].replace('Bogota','Bogotá')
df = df.where(df.Departamento=='Bogotá').dropna(how='all')
#df.Departamento

In [None]:
df.Speed.unique()

array([  6.,  24.,   0.,  12.,  39.,   7.,   8.,  35.,   2.,  56.,  61.,
        43.,  52.,  42.,  49.,  46.,  59.,  47.,  45.,  30.,  25.,  28.,
        23.,  17.,  21.,  50.,  63.,  40.,  41.,  19.,  22.,  27.,   1.,
        26.,  33.,  29.,  14.,  37.,   9.,  10.,  11.,   5.,   4.,  13.,
         3.,  16.,  15.,  20.,  31.,  44.,  51.,  53.,  62.,  66.,  67.,
        65.,  38.,  73.,  68.,  54.,  69.,  18.,  36.,  58.,  34.,  32.,
        60.,  70.,  64.,  48.,  57.,  55.,  76.,  71.,  72.,  78.,  79.,
        75.,  91.,  77.,  81.,  74.,  86.,  82.,  80.,  87.,  89., 104.,
       209., 137., 105., 112.,  85.,  83.,  84.,  94.,  90.,  88.,  92.,
        93.])

In [None]:
def IgnicionCorreccion(row):
    if row.Speed>0:
        return 1.0
    else:
        if row.Ignicion in [0.0,1.0]:
            return row.Ignicion
        else:
            return 0.0
df.Ignicion = df.apply(lambda row: IgnicionCorreccion(row),axis=1)
df.Ignicion.unique()

array([1., 0.])

In [None]:
df['Localidad'] = df['Address'].str.split(', ').str[1]
df.Localidad.unique()
LocalidadesBogota = ['Fontibón', 'Kennedy', 'Suba', 'Engativá', 'Barrios Unidos',
       'Teusaquillo', 'Puente Aranda', 'Tunjuelito', 'Ciudad Bolívar',
       'Bosa', 'Chapinero', 'Usaquén', 'Usme', 'Antonio Nariño', 'Los Mártires',
        'La Candelaria', 'Rafael Uribe', 'San Cristóbal', 'Sumapaz', 'Santa Fe']
badL = df.where(~df.Localidad.isin(LocalidadesBogota)).dropna(how='all')

In [None]:
#WARNING: it is not recommended running this cell due to its high calculation time (at least 7 minutes)
import requests
def get_L(lat, lon):
    url = f'https://nominatim.openstreetmap.org/reverse?lat={lat}&lon={lon}&format=json&accept-language=en&zoom=14'
    try:
        result = requests.get(url=url)
        time.sleep(1)
        result_json = result.json()
        print(result_json['address']['suburb'].replace('Localidad ',''))
        return result_json['address']['suburb'].replace('Localidad ','')
    except:
        get_L(lat, lon)
badL.Localidad = badL.apply(lambda row: get_L(row.Latitude,row.Longitude),axis=1)
badL.Localidad.unique()

In [None]:
#Variable transformation (string to numeric)
df[['altitude', 'st']] = df['Altitud*'].str.split(' ', 1, expand=True) 
df = df.drop('st', axis=1)
df[['altitude']] = df[['altitude']].apply(pd.to_numeric)


df[['backup_battery', 'st']] = df['Bateria Respaldo*'].str.split(' ', 1, expand=True) 
df = df.drop('st', axis=1) 
df[['backup_battery']] = df[['backup_battery']].apply(pd.to_numeric) 


df[['battery_charge', 'st']] = df['Bateria Vehiculo*'].str.split(' ', 1, expand=True) 
df = df.drop('st', axis=1) 
df[['battery_charge']] = df[['battery_charge']].apply(pd.to_numeric)


df[['course', 'st']] = df['Curso*'].str.split(' ', 1, expand=True) 
df = df.drop('st', axis=1)
df[['course']] = df[['course']].apply(pd.to_numeric)


df[['mileage', 'st']] = df['Kilometraje*'].str.split(' ', 1, expand=True) 
df = df.drop('st', axis=1) 
df[['mileage']] = df[['mileage']].apply(pd.to_numeric)

df[['speed', 'st']] = df['Velocidad*'].str.split(' ', 1, expand=True) 
df = df.drop('st', axis=1) 
df[['speed']] = df[['speed']].apply(pd.to_numeric)

df[['eng_eff_on_fuelconsmath*']] = df[['eng_eff_on_fuelconsmath*']].apply(pd.to_numeric)

#Localization variables

df['country'] = df['Localización'].str.split(',').str[1]
df['city'] = df['Localización'].str.split(',').str[2]
df['location'] = df['Localización'].str.split(',').str[0]
df['address'] = df['Localización'].str.split(',', n=3).str[3]

df['eng_eff'] = df['eng_eff_on_fuelconsmath*'].astype('float32')
df['ignition'] = df['Ignicion*'].map({'Off': False, 'On': True})

df = df.drop('Altitud*', axis=1)
df = df.drop('Bateria Respaldo*', axis=1)
df = df.drop('Bateria Vehiculo*', axis=1)
df = df.drop('Curso*', axis=1)
df = df.drop('Kilometraje*', axis=1)
df = df.drop('Velocidad*', axis=1)
df = df.drop('Velocidad', axis=1)
df = df.drop('Tarjeta*', axis=1) 
df = df.drop('Tarjetas*', axis=1)
df = df.drop('Conductor', axis=1)
df = df.drop('Unnamed: 17', axis=1)
df = df.drop('Fecha y Hora', axis=1)
df = df.drop('Coordenadas', axis=1)
df = df.drop('Localización', axis=1)
df = df.drop('Aceleracion Frenadas*', axis=1)
df = df.drop('Ignicion*', axis=1)
df = df.drop('eng_eff_on_fuelconsmath*', axis=1)

df = df.set_index(pd.DatetimeIndex(df['datetime']), drop=True)
df.drop(columns=['datetime'], inplace=True)


KeyError: 'Altitud*'

Pasando localizacion a minusculas

In [None]:
df['country'] = df['country'].str.lower()
df['city'] = df['city'].str.lower()
df['location'] = df['location'].str.lower()

In [None]:
def normalize(s):
    replacements = (
        ("á", "a"),
        ("é", "e"),
        ("í", "i"),
        ("ó", "o"),
        ("ú", "u"),
    )
    for a, b in replacements:
        s = s.replace(a, b).replace(a.lower(), b.lower())
    return s


In [None]:
def tilde(df):
  a = []
  for i in df:
    a.append(normalize(i))
  return a

In [None]:
p = tilde(df['city'])
l = tilde(df['location'])

In [None]:
df['city'] = p
df['location'] = l

In [None]:
df

## EDA

In [None]:
df.describe()
#df.city.unique()
df = df[(df['city'] == ' bogota') | (df['city'] == ' cundinamarca')]


** This preliminary vision to the data shows a big picture of the information of our vehicule, we see an average spped of 8 km/h, something that is quite interesting because this might suggests that the vehicule is extremely slow, but we must keep in mind that we are including data points with the vehicule completely out of service.

In [None]:
df1 = df[df['ignition']==False]
df1

**After checking the information of the vehicule with the Ignition=Off, we see an average speed of 0.001475, probably a metric that included movements of the vehicule during reparations or during car reparations.

In [None]:
df1 = df[df['ignition']==True]
df1

**With the Ignition=On, we see an average speed of 8km/h, this probably needs further investigation, but we might suggest traffic issues as one of the main explanations for this number. We can see an average altitude of 2390 mts over the sea level, Bogota is located 2600 mts over the sea level, and its surrounding areas have this altitude on average too, so it's possible that the main work area for this vehicule is located around Bogota. 

In [None]:
df1['city'][df1['city']== 'bosa']

In [None]:
df2 = pd.DataFrame(df.groupby(['ignition','location'])['speed'].mean())

In [None]:
df2 = df2.reset_index()

In [None]:
#Speed verification per location
fig, ax = plt.subplots(figsize = ( 20 , 10 ))
  
# Plot the scatterplot
sns.lineplot(data=df2, x="location", y="speed")
ax.tick_params(labelrotation=90)
# Set label for x-axis
ax.set_xlabel( "location" , size = 12 )
  
# Set label for y-axis
ax.set_ylabel( "Km/h" , size = 12 )
  
# Set title for plot
ax.set_title( "Speed per location" , size = 24 )
  
# Display figure
plt.show()

**After associating the speed metric to a location, it's possible to see a big variation across locations, and the main explanation for this is the highway regulation (the lowest numbers are mostly located in urban areas where the maximum permitted is 30km/h, and the highest metrics are in highways or outside urban areas where the maximum is 80km/h).

In [None]:
df3 = pd.DataFrame(df.groupby(['ignition', 'day'])['speed'].mean())
df3 = df3.reset_index()

In [None]:
df1

In [None]:
#Speed verification per day of the week
fig, ax = plt.subplots(figsize = ( 20 , 10 ))
  
# Plot the scatterplot
sns.lineplot(data=df3, x="day", y="speed")
ax.tick_params(labelrotation=90)
# Set label for x-axis
ax.set_xlabel( "day" , size = 12 )
  
# Set label for y-axis
ax.set_ylabel( "Km/h" , size = 12 )
  
# Set title for plot
ax.set_title( "Speed per day" , size = 24 )
  
# Display figure
plt.show()

**Focusing our analysis on the ignition=On, we can see that from monday (0) to friday (4), the vehicule has low speed on average, and on the weekends experiments an increase in it's speed. This might suggest that it has a different way of working depending on the day of the week. It would be useful to see the distribution of the vehicule usage per day of the week to spot some trends. 

In [None]:
fig, ax = plt.subplots(figsize=(15,8))
sns.countplot(data=df1, x='day', ax=ax)
plt.show()

**We see a sustained usage from monday to saturday, and a big drop on sundays, showing that this vehicule works most of the week, and on saturdays has its day off. So now it's time to locate it's usage to better understand its behavior.

In [None]:
#Time between locations
df['time_range'] = df.index.to_series().shift(-1) - df.index.to_series()
df_vel = df[(df['speed'] <= 20) & (df['ignition'] == True)] #& (df.hour>=18)] 


In [None]:
import folium  #needed for interactive map
from folium.plugins import HeatMap

my_zip = zip(df_vel['LAT'], df_vel['LON'], df_vel['time_range'].dt.total_seconds())
list_of_my_zip = list(my_zip)

hm_layer = HeatMap(list_of_my_zip,
                   # These are parameters that we tweak manually to adjust color
                   # See folium docs for more information
                   min_opacity=0.3,
                   radius=6,
                   blur=5,
                 )

bo_coords = [4.65, -74.1] # lat, long
folium_map = folium.Map(location=bo_coords, zoom_start=12, tiles="OpenStreetMap")
folium_map.add_child(hm_layer)
folium_map

**The center of operation for the vehicule is the south of Bogota, mostly the location of Bosa (one of the most populated areas of the city), and the main avenue to reach 'outliers', or to move to other locations is 30th avenue, from wich it's possible to reach the north of the city and even reach other towns, wich apparently is a side usage of the vehicule.

In [None]:
#Monday map
df_vel1 = df[(df['ignition'] == True) & (df['day']==0)& (df.hour>=18)] 
my_zip1 = zip(df_vel1['LAT'], df_vel1['LON'], df_vel1['time_range'].dt.total_seconds())
list_of_my_zip1 = list(my_zip1)

hm_layer1 = HeatMap(list_of_my_zip1,
                   # These are parameters that we tweak manually to adjust color
                   # See folium docs for more information
                   min_opacity=0.3,
                   radius=6,
                   blur=5,
                 )

bo_coords = [4.65, -74.1] # lat, long
folium_map1 = folium.Map(location=bo_coords, zoom_start=12, tiles="OpenStreetMap")
folium_map1.add_child(hm_layer1)
folium_map1



** Reviewing the monday usage, we see it mostly ocncentrated on a location (Bosa), and we don't see long distance movements, so that average speed of 5km/h would be explained by the heavy traffic on the neighborhoods located in this location (an extremely populated area) that receives a huge amount of vehicules including public transport, and it also shows the little offer of avenues and its deterioration. 

In [None]:
#Tuesday map
df_vel2 = df[(df['ignition'] == True) & (df['day']==1)] 
df_vel2 = df_vel2[['LAT', 'LON', 'time_range']]
df_vel2 = df_vel2.dropna()
my_zip2 = zip(df_vel2['LAT'], df_vel2['LON'], df_vel2['time_range'].dt.total_seconds())
list_of_my_zip2 = list(my_zip2)

hm_layer2 = HeatMap(list_of_my_zip2,
                   # These are parameters that we tweak manually to adjust color
                   # See folium docs for more information
                   min_opacity=0.3,
                   radius=6,
                   blur=5,
                 )

bo_coords = [4.65, -74.1] # lat, long
folium_map2 = folium.Map(location=bo_coords, zoom_start=12, tiles="OpenStreetMap")
folium_map2.add_child(hm_layer2)
folium_map2



In [None]:
#Wednesday map
df_vel3 = df[(df['ignition'] == True) & (df['day']==2)] 
df_vel3 = df_vel2[['LAT', 'LON', 'time_range']]
df_vel3 = df_vel3.dropna()
my_zip3 = zip(df_vel3['LAT'], df_vel3['LON'], df_vel3['time_range'].dt.total_seconds())
list_of_my_zip3 = list(my_zip3)

hm_layer3 = HeatMap(list_of_my_zip3,
                   # These are parameters that we tweak manually to adjust color
                   # See folium docs for more information
                   min_opacity=0.3,
                   radius=6,
                   blur=5,
                 )

bo_coords = [4.65, -74.1] # lat, long
folium_map3 = folium.Map(location=bo_coords, zoom_start=12, tiles="OpenStreetMap")
folium_map3.add_child(hm_layer3)
folium_map3



In [None]:
#Thursday map
df_vel4 = df[(df['ignition'] == True) & (df['day']==3)& (df.hour>=18)] 
df_vel4 = df_vel4[['LAT', 'LON', 'time_range']]
df_vel4 = df_vel4.dropna()
my_zip4 = zip(df_vel4['LAT'], df_vel4['LON'], df_vel4['time_range'].dt.total_seconds())
list_of_my_zip4 = list(my_zip4)

hm_layer4 = HeatMap(list_of_my_zip4,
                   # These are parameters that we tweak manually to adjust color
                   # See folium docs for more information
                   min_opacity=0.3,
                   radius=6,
                   blur=5,
                 )

bo_coords = [4.65, -74.1] # lat, long
folium_map4 = folium.Map(location=bo_coords, zoom_start=12, tiles="OpenStreetMap")
folium_map4.add_child(hm_layer4)
folium_map4



In [None]:
#Friday map
df_vel5 = df[(df['ignition'] == True) & (df['day']==4)& (df.hour>=18)] 
df_vel5 = df_vel5[['LAT', 'LON', 'time_range']]
df_vel5 = df_vel5.dropna()
my_zip5 = zip(df_vel5['LAT'], df_vel5['LON'], df_vel5['time_range'].dt.total_seconds())
list_of_my_zip5 = list(my_zip5)

hm_layer5 = HeatMap(list_of_my_zip4,
                   # These are parameters that we tweak manually to adjust color
                   # See folium docs for more information
                   min_opacity=0.3,
                   radius=6,
                   blur=5,
                 )

bo_coords = [4.65, -74.1] # lat, long
folium_map5 = folium.Map(location=bo_coords, zoom_start=12, tiles="OpenStreetMap")
folium_map5.add_child(hm_layer5)
folium_map5


In [None]:
#Saturday map
df_vel6 = df[(df['ignition'] == True) & (df['day']==5)& (df.hour>=18)] 
df_vel6 = df_vel6[['LAT', 'LON', 'time_range']]
df_vel6 = df_vel6.dropna()
my_zip6 = zip(df_vel6['LAT'], df_vel6['LON'], df_vel6['time_range'].dt.total_seconds())
list_of_my_zip6 = list(my_zip6)

hm_layer6 = HeatMap(list_of_my_zip6,
                   # These are parameters that we tweak manually to adjust color
                   # See folium docs for more information
                   min_opacity=0.3,
                   radius=6,
                   blur=5,
                 )

bo_coords = [4.65, -74.1] # lat, long
folium_map6 = folium.Map(location=bo_coords, zoom_start=12, tiles="OpenStreetMap")
folium_map6.add_child(hm_layer6)
folium_map6


In [None]:
#Sunday map
df_vel7 = df[(df['ignition'] == True) & (df['day']==6)] 
df_vel7 = df_vel7[['LAT', 'LON', 'time_range']]
df_vel7 = df_vel7.dropna()
my_zip7 = zip(df_vel7['LAT'], df_vel7['LON'], df_vel7['time_range'].dt.total_seconds())
list_of_my_zip7 = list(my_zip7)

hm_layer7 = HeatMap(list_of_my_zip7,
                   # These are parameters that we tweak manually to adjust color
                   # See folium docs for more information
                   min_opacity=0.3,
                   radius=6,
                   blur=5,
                 )

bo_coords = [4.65, -74.1] # lat, long
folium_map7 = folium.Map(location=bo_coords, zoom_start=12, tiles="OpenStreetMap")
folium_map7.add_child(hm_layer7)
folium_map7


**From monday to friday we can see a pattern of use, pretty much the same streets and locations (mostly urban areas within the south of the city), so the low speed is understandable.

**Saturday shows more destinations outside the city, and it shoes more distance covered, so this sudden increase in average speed is understandable since highways offer more possibilities of acceleration and their speed limit is higher.

**On sundays we don't see as much movement as saturdays, but we see locations outside the city, so the average speed should be higher compared to the weekdays. 

In [None]:
#Distribution of activity per location
fig, ax = plt.subplots(figsize=(15,8))
sns.countplot(data=df, x='location', ax=ax)
ax.tick_params(labelrotation=90)
plt.show()

**This bar graph confirms what we saw on our maps, most of the action of our vehicule is concentrated in the location of Bosa, confirming that our vehicule is specialized on this zone of the city, and only moves occasionaly or on sepcial events. 

## Corr

Se realiza una correlación entre todas las variables pertenecientes al dataset utilizando el coeficiente de correlación de pearson (R)

In [None]:
df1.speed = df1.where(df1['speed']>1)['speed']
df1.eng_eff = df1.where(df1['eng_eff']>10)['eng_eff']
corr_df= df1.corr(method='pearson')
corr_df
plt.figure(figsize=(10,10 ),dpi=100)
sns.heatmap(corr_df, annot=True,vmin=-1,vmax=1,cmap='seismic',cbar_kws={"shrink": .9,'extend':'both'})
plt.show()

Como se puede observar en la anterior correlación existen variables "basura" que crean correlaciones que no poseen sentido físico ni relevancia de estudio. Solo se toman las variables consideradas como más relevantes para el estudio ('hour', 'battery_charge','speed', 'eng_eff').

In [None]:
 # dia, altitud, longitud latitud curso, addrees, city , country, location, ignicion
df2 = df1[['hour', 'battery_charge','speed', 'eng_eff']]
corr_df= df2.corr(method='pearson')
corr_df

In [None]:
plt.figure(figsize=(10,10 ),dpi=100)
sns.heatmap(corr_df, annot=True,vmin=-1,vmax=1,cmap='seismic',cbar_kws={"shrink": .9,'extend':'both'})
plt.show()

* There is a correlation between eng_eff and speed.

Let's analyze these variables a little bit:


As it may be seen, there exist a strong correlation between the automotor speed and it's engine efficiency. We now present a plot to evaluate it's behaviour.

In [None]:
fig,ax = plt.subplots(1,figsize=(20,10))
sns.lineplot(ax = ax, data = df2, x = 'speed', y = 'eng_eff')
ax.grid(ls = "--",alpha = 0.3,color = 'k')
ax.legend()
ax.set_xlabel('Speed [Km/h]')
ax.set_ylabel('eng_eff [Km/gal]')
plt.show()

Due to the behaviour of the variables an exponential relation is expected. We proceed to fit the observed behaviour with a custom exponential function.

In [None]:
from scipy.optimize import curve_fit

In [None]:
def func(x, a, b, c, d):
    return a * np.exp((b*(x-d))) + c 

In [None]:
df3 = df2[['speed', 'eng_eff']].dropna()
df3 = pd.DataFrame(df3.groupby('speed')['eng_eff'].mean()).reset_index()
"""
   For doing the fit the mean value of 
   the eng_eff over the speed is used
   to avoid unwanted noice.
"""
min1 = min(df2.speed.dropna())
max1 = max(df2.speed.dropna())
speed = np.linspace(min1,max1,3395)

In [None]:
popt, pcov = curve_fit(func, df3.speed, df3.eng_eff)
residuals = df3.eng_eff-func(df3.speed,*popt)
ss_ress = np.sum(residuals**2)
ss_tot = np.sum((df3.eng_eff-np.mean(df3.eng_eff))**2)
r2 = 1 - (ss_ress/ss_tot)
print("parameters:",popt,"\n"+r'Rsquared:',r2)#fitted parameters and R^2

In [None]:
fig, ax = plt.subplots(1,figsize = ( 20 , 10 ))
sns.lineplot(ax = ax, data = df2, x = 'speed', y = 'eng_eff')
sns.lineplot(ax = ax, data = df3, x = 'speed', y = 'eng_eff',color='k',label='Mean eng_eff')
ax.plot(speed, func(speed, *popt), 'r-',label = f'Regresion Model',lw=3)
ax.grid(ls = "--",alpha = 0.3,color = 'k')
ax.legend()
ax.set_xlabel('Speed [Km/h]')
ax.set_ylabel('eng_eff [Km/gal]')
plt.show()

It is noticed that at high speeds the engine reaches the maximun engine efficiencie.

With the available data we can obtain the vehicular flux of Bogota:

In [None]:
df = pd.read_csv('../data/raw/sep2021.csv', encoding='utf-8', sep=',', index_col=0)
df = df[df['Ignicion']==True]
df.head(1)

In [None]:
df['datetime'] = pd.to_datetime(df['Date time'], errors='raise')
df.drop('Date time',1)
df['date'] = df['datetime'].dt.date
df['day'] = df['datetime'].dt.weekday
df['hour'] = df['datetime'].dt.hour


In [None]:
df['city'] = df['Address'].str.split(',').str[2]
df = df[df.city.isin([' Bogotá', 'Bogota',' D.C.'])]
df

We decide to make a bar diagram to know the behaviour of the mean car velocity per week day and hour

In [None]:
#sns.catplot(col=df.day,x=df.hour,kind='box',y=df.Speed,col_wrap = 1)
dfg = df.groupby(['day','hour'])['Speed'].apply(pd.Series.mean).to_frame().reset_index()
dfg
sns.catplot(col='day',x = 'hour',y='Speed',data = dfg,kind='bar',col_wrap=1) #=)

It's noticed that the maximum velocities are reached at late night hours,specialy friday, saturday and sunday (days 4, 5 and 6), this may be explained due to the fact that they correspond to recreation days. To know a little bit more about the vehicular flux a box plot was proposed:

In [None]:
dfr = df[['day','hour','Speed']]
dfr
sns.catplot(col = 'day',data=dfr,x= 'hour',y='Speed',kind='box',col_wrap=2)

As it was expected, the speed distribution is left skewed. It is observed that in every day, at every hour 75% of the speed data is lower than 50 km/h. As a next step for a future study process it is propose to check the traffic flux with a map representation.