In [3]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from IPython.display import display, HTML, clear_output
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, Embedding, Concatenate, Flatten, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ReduceLROnPlateau

from random import shuffle
import datetime

In [2]:
full_sumo_data_path = r'C:\Users\ing_l\Tesis grado\Data\SUMO\full_SUMO_data_ocupation.csv'

In [3]:
data = pd.read_csv(full_sumo_data_path)
data.drop('Unnamed: 0', axis=1, inplace=True)

In [4]:
#Delete oficina SUMO and Tribunal de Faltas
data = data.loc[data.parquimetro != 37]
data = data.loc[data.parquimetro != 190]

In [5]:
data['tiempo_emision'] = data['tiempo_emision'].apply(lambda x: pd.to_datetime(x, dayfirst=True).time())

In [6]:
d = data.copy()

In [7]:
d

Unnamed: 0,parquimetro,fecha_emision,tiempo_emision,operacion,tiempo_estacionado,direccion,latitud,longitud,ocupacion
0,16,2019-01-01,10:55:00,0,10:00,Belgrano 660,-37.328838,-59.134455,0
1,60,2019-01-01,16:25:00,1,0:00,Chacabuco 357,-37.328143,-59.136740,1
2,60,2019-01-01,16:25:00,0,0:00,Chacabuco 357,-37.328143,-59.136740,0
3,60,2019-01-01,16:25:00,1,0:00,Chacabuco 357,-37.328143,-59.136740,1
4,60,2019-01-01,17:11:00,0,0:46,Chacabuco 357,-37.328143,-59.136740,0
...,...,...,...,...,...,...,...,...,...
2957537,61,2019-12-31,13:20:00,0,0:38,Yrigoyen 847,-37.323967,-59.139628,1
2957538,61,2019-12-31,13:28:00,0,2:22,Yrigoyen 847,-37.323967,-59.139628,0
2957539,61,2019-12-31,13:28:00,1,2:22,Yrigoyen 847,-37.323967,-59.139628,1
2957540,61,2019-12-31,13:28:00,0,2:22,Yrigoyen 847,-37.323967,-59.139628,0


In [8]:
def normalize_time(time):
    '''
    normalize time (datetime.time to values bertween 0 and 1
    '''
    max_time = 24 * 60 * 60 + 60 * 60 + 60
    return (time.hour * 60 * 60 + time.minute * 60 + time.second) / max_time


In [9]:
#Normalize time to values between 0 and 1
d['tiempo_emision'] = d['tiempo_emision'].apply(normalize_time)

In [10]:
d[0:50]

Unnamed: 0,parquimetro,fecha_emision,tiempo_emision,operacion,tiempo_estacionado,direccion,latitud,longitud,ocupacion
0,16,2019-01-01,0.436376,0,10:00,Belgrano 660,-37.328838,-59.134455,0
1,60,2019-01-01,0.656229,1,0:00,Chacabuco 357,-37.328143,-59.13674,1
2,60,2019-01-01,0.656229,0,0:00,Chacabuco 357,-37.328143,-59.13674,0
3,60,2019-01-01,0.656229,1,0:00,Chacabuco 357,-37.328143,-59.13674,1
4,60,2019-01-01,0.686875,0,0:46,Chacabuco 357,-37.328143,-59.13674,0
5,76,2019-01-01,0.942705,1,0:00,General Paz 547,-37.32521,-59.128828,1
6,76,2019-01-01,0.942705,0,0:00,General Paz 547,-37.32521,-59.128828,0
7,1,2019-01-01,0.607595,1,0:00,General Pinto 545,-37.327782,-59.136657,1
8,1,2019-01-01,0.609594,0,0:03,General Pinto 545,-37.327782,-59.136657,0
9,65,2019-01-01,0.608261,1,0:00,Rodriguez 348,-37.329387,-59.134833,1


In [11]:
max_time = 24 * 60 * 60 + 60 * 60 + 60

eight_am = (8 * 60 * 60) / max_time
ten_pm = (22 * 60 * 60) / max_time

d = d.loc[d.tiempo_emision >= eight_am]
d = d.loc[d.tiempo_emision <= ten_pm]

In [12]:
d

Unnamed: 0,parquimetro,fecha_emision,tiempo_emision,operacion,tiempo_estacionado,direccion,latitud,longitud,ocupacion
0,16,2019-01-01,0.436376,0,10:00,Belgrano 660,-37.328838,-59.134455,0
1,60,2019-01-01,0.656229,1,0:00,Chacabuco 357,-37.328143,-59.136740,1
2,60,2019-01-01,0.656229,0,0:00,Chacabuco 357,-37.328143,-59.136740,0
3,60,2019-01-01,0.656229,1,0:00,Chacabuco 357,-37.328143,-59.136740,1
4,60,2019-01-01,0.686875,0,0:46,Chacabuco 357,-37.328143,-59.136740,0
...,...,...,...,...,...,...,...,...,...
2957537,61,2019-12-31,0.532978,0,0:38,Yrigoyen 847,-37.323967,-59.139628,1
2957538,61,2019-12-31,0.538308,0,2:22,Yrigoyen 847,-37.323967,-59.139628,0
2957539,61,2019-12-31,0.538308,1,2:22,Yrigoyen 847,-37.323967,-59.139628,1
2957540,61,2019-12-31,0.538308,0,2:22,Yrigoyen 847,-37.323967,-59.139628,0


In [13]:
def get_mean_time(data):
    '''
    Get the mean of the difference in team on each row.
    '''
    
    
    df = data.copy()

    mean = 0
    cant = 0
    
    max_time = 24 * 60 * 60 + 60 * 60 + 60
    
    for street in df['parquimetro'].drop_duplicates().sort_values():
        clear_output()
        print('Street:', street)
        if (cant != 0):
            print('Mean:', mean/cant)
        d_street = df.loc[df['parquimetro'] == street]
        
        for date in d_street['fecha_emision'].drop_duplicates().sort_values():
            d_date = d_street.loc[d_street['fecha_emision'] == date].reset_index(drop=True)
            
            d_date = d_date.drop_duplicates(subset='tiempo_emision').reset_index(drop=True)
            #display(d_date)
            def add_mean(time):
                nonlocal d_date, mean, cant

                past_data = d_date[d_date['tiempo_emision'] < time]
                
                if len(past_data) > 0:
                    cant += 1
                    past_time_row_ix = past_data.tiempo_emision.idxmax()
                    other_time = d_date.iloc[past_time_row_ix]['tiempo_emision']
                    mean += (time - other_time)
                return time
            
            _ = d_date['tiempo_emision'].apply(add_mean)
                        
            '''for ix in d_date.index:
                row_to_add = d_date.iloc[ix]
                past_time = row_to_add['tiempo_emision'] - t
                
                past_data = d_date[d_date['tiempo_emision'] <= past_time]
                if len(past_data) > 0:
                    past_time_row_ix = past_data.tiempo_emision.idxmax()
                    row_to_add['ocupacion_pasada'] = d_date.iloc[past_time_row_ix]['ocupacion']
                else:
                    row_to_add['ocupacion_pasada'] = row_to_add['ocupacion']
                
                out = out.append(row_to_add)
            '''    
    mean = mean/cant
    return mean

In [14]:
mean_time = get_mean_time(d)

Street: 96
Mean: 0.00470024319771747


In [4]:
def denormalize_time(time):
    '''
    denormalize time (datetime.time to values bertween 0 and 1
    '''
    max_time = 24 * 60 * 60 + 60 * 60 + 60
    return pd.Timestamp(time * max_time, unit='s').time()

denormalize_time(0.00470024319771747)

datetime.time(0, 7, 3, 303902)