In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Embedding, Concatenate, Flatten, Reshape
from tensorflow.keras.models import Model
from random import shuffle
import datetime
import math
import tensorflow as tf
from IPython.display import clear_output
from tensorflow.keras.callbacks import ReduceLROnPlateau

In [2]:
pd.set_option('display.max_rows', 100)
#Just to have each print in the same row
CSS = """
.output {
    flex-direction: column;
}
"""

HTML('<style>{}</style>'.format(CSS))

In [3]:
r'''   
data_w_operacion: (DataFrame) data with their corresponding operacion (Entrada/Salida).
    path: C:\Users\ing_l\Tesis grado\Data\SUMO_data_w_operacion.csv

'''

data_path = r'C:\Users\ing_l\Tesis grado\Data\SUMO_data_w_operacion.csv'

In [4]:
#Time intervals
period = 3 #3 - 3

#How much data we will use to feed the lstm
train_size = 4 #7 - 4

#How much different values we want as input of the LSTM (i.e. tolerance 3/train_size 5: 0,1,2,2,1)
#Must be less or equals than train_size
tolerance = 3 #5 - 3

#which columns we'll use as input of the LSTM, features are how much colums we have
data_columns = ['tiempo','ocupacion','operacion']
features = len(data_columns)

#if we want the target to be different to the last value of the data
control_target = True

dateweek_columns = ['id_cuadra','dia de semana', 'tiempo', 'ocupacion','operacion','estacion']

In [None]:
data = pd.read_csv(data_path, parse_dates=['fecha_tiempo'])

In [None]:
data['dia de semana'] = data['fecha_tiempo'].apply(lambda x: pd.Timestamp(x).weekday())
data['tiempo'] = data['tiempo'].apply(lambda x: pd.Timestamp(x).time())
data

In [None]:
#Me quedo con los que estan entre las 8 y las 21hs
data = data.loc[(data.tiempo > datetime.time(8)) & (data.tiempo < datetime.time(21))].reset_index(drop=True)

In [None]:
max_street_num = data.loc[:,'id_cuadra'].max()
print('Max street id: ', max_street_num)

In [None]:
'''
La idea de la red seria que de input tenga fecha_tiempo (o por ahi mejor, solo dia de la semana), 
la ocupacion de esa fecha_tiempo, el clima.
Como output deberia tener la ocupacion de ese momento.

Aparte a todo eso se deberia ver la ocupacion actual y la ocupacion maxima para saber si hay lugar libre.
'''

def to_seconds(time):
    '''transform datetime.time to seconds'''
    return time.hour * 60 * 60 + time.minute * 60 + time.second

def normalize_time(time):
    '''normalize time (in seconds) to values bertween 0 and 1'''
    max_time = 23 * 60 * 60 + 59 * 60 + 59
    return time / max_time

def modify_operation(operation):
    '''if operacion == Entrada sets value to 1, == NaN sets to 0 and == Salida sets to -1'''
    if operation == 'Entrada':
        return 1
    if operation == 'Salida':
        return -1
    return 0

def normalize_ocupation(row):
    '''normalize ocupation to values between 0 and 1, using the lugares_cuadra value'''
    global max_ocup
    return (row['ocupacion'] / max_ocup)

def get_season(fecha):
    fecha = pd.to_datetime(fecha, dayfirst=True)
    d = fecha.day
    m = fecha.month * 100
    md = m + d
    if ((md >= 921) and (md <= 1220)):
        season = 0  # spring
    elif ((md >= 1221) and (md <= 320)):
        season = 1  # summer
    elif ((md >= 321) and (md <= 620)):
        season = 2  # fall
    elif ((md >= 621) and (md <= 920)):
        season = 3  # winter
    return season

data_week = data.loc[:, ['id_cuadra','dia de semana', 'tiempo', 'ocupacion','operacion','fecha']]
data_week['tiempo'] = data_week['tiempo'].apply(to_seconds)
data_week['operacion'] = data_week['operacion'].apply(modify_operation)
data_week['estacion'] = data_week['fecha'].apply(get_season)

max_ocup = data['lugares_cuadra'].max()
data_week['ocupacion'] = data_week.apply(normalize_ocupation, axis=1)
data_week = data_week.loc[:, dateweek_columns]

In [None]:
print('max_ocup: ', max_ocup)

In [None]:
data_week

In [None]:
#To obtain the start and finish of each day, usefull to normalize data times

indexes = [0]

total_len = 1

x = data_week.reset_index()
exit = False
while (total_len > 0) and not exit:
    total_len = len(x.loc[x.loc[indexes[-1]:, 'dia de semana'].drop_duplicates().index, 'index'])
    try:
        indexes.append(x.loc[x.loc[indexes[-1]:, 'dia de semana'].drop_duplicates().index, 'index'].iloc[1])
    except:
        exit = True
        print('Work complete.')

In [None]:
#Suponiendo que me llega solo un df conteniendo un solo dia (y esto lo hago para todos los dias y para cada calle)
def normalize_data_period(data, period=5):
    '''
        normalize data in periods for a day
    '''
    seconds = period * 60
    out = pd.DataFrame(columns=dateweek_columns)
    out = out.append(data.iloc[0])
    dist_to_period = out.iloc[-1].tiempo % seconds
    if (dist_to_period) != 0:
        out.iloc[-1].tiempo = out.iloc[-1].tiempo - dist_to_period
        
    for i in range(math.ceil((data.iloc[-1].tiempo - data.iloc[0].tiempo) / seconds)):
        next_period = data.loc[data.tiempo < (out.iloc[-1].tiempo+seconds)]
        if (len(next_period) == 0):
            out = out.append(out.iloc[-1])
            out.iloc[-1].tiempo = out.iloc[-1] + seconds
        else:
            out = out.append(next_period.iloc[-1])
            out.iloc[-1].tiempo = out.iloc[-2].tiempo + seconds
        if ( out.iloc[-2].ocupacion == out.iloc[-1].ocupacion):
            out.iloc[-1].operacion = 0
    return out.reset_index(drop=True)

In [None]:
#Normalizes all the data times. Divide the data into x minutes intervals
normalized_data = pd.DataFrame(columns=dateweek_columns)
for i in range(len(indexes)-1):
    clear_output()
    print('Iteracion {} de {}'.format(i, len(indexes)-2))
    normalized_data = normalized_data.append(normalize_data_period(data_week[indexes[i]:indexes[i+1]].copy(), period=period) )
normalized_data = normalized_data.append(normalize_data_period(data_week[indexes[-1]:len(data_week)].copy(), period=period) )

In [None]:
normalized_data

In [None]:
#Normalize the tiempo column to values between 0 and 1
normalized_data['tiempo'] = normalized_data['tiempo'].apply(normalize_time)

In [None]:
def separate_data_per_street(data, street):
    '''
    Divide the df that contains all the streets into a list.
    Which position contains a data of a designed street
    '''
    return data.loc[data.id_cuadra == street]

data_separate = []
for i in (normalized_data['id_cuadra'].drop_duplicates()):
    data_separate.append(separate_data_per_street(normalized_data, i))

In [None]:
data_separate

In [None]:
#este es el nuevo
def prepare_train_data(data, street, train_size=3, tolerance=0, data_columns=['tiempo','ocupacion','operacion'], control_target=False):
    '''
    Puts train_size values in a row with 
    '''   
    train_data = []
    train_street = []
    train_weekday = []
    train_season = []
    targets = []

    #We dont have any value in sunday
    for weekday in range(0, 6):
        data_weekday = data.loc[data['dia de semana'] == weekday]
        
        for s in range(0, 4):
            data_season = data_weekday.loc[data_weekday['estacion'] == s]

            for i in range(len(data_season)-train_size-1):
                data_to_add = data_season[i:i+train_size].loc[:, data_columns]
                if len(data_to_add['ocupacion'].drop_duplicates()) >= tolerance:
                    if (data_to_add['ocupacion'].iloc[train_size-1]) != (data_season.iloc[i+train_size+1]['ocupacion']):
                        train_data.append(np.array(data_to_add.values)) 
                        train_street.append(street)
                        train_weekday.append(weekday)
                        train_season.append(s)
                        targets.append(data_season.iloc[i+train_size+1]['ocupacion'])               

        #TODO: delete train_time
        #train_time.append(np.array(data_weekday[train_size:].loc[:,'tiempo'].values))

    #return np.array(train_data), np.array(train_street), np.array(train_weekday), np.array(train_time), np.array(targets)
    return np.array(train_data), np.array(train_street), np.array(train_weekday), np.array(train_season), np.array(targets)
    
        
train_data = []
train_street = []
train_weekday = []
train_season = []
#train_time = []
targets = []

for street in range(len(normalized_data['id_cuadra'].drop_duplicates())):
    clear_output()
    print('Calle num: ', street)
    #p_train_data, p_train_street, p_train_weekday, p_train_time, p_targets = prepare_train_data(data_separate[street], street, train_size=train_size, tolerance=tolerance, operation=operation, control_target=control_target)
    p_train_data, p_train_street, p_train_weekday, p_train_season, p_targets = prepare_train_data(data_separate[street], street, train_size=train_size, tolerance=tolerance, data_columns=data_columns, control_target=control_target)
    train_data.append(p_train_data)
    train_street.append(p_train_street)
    train_weekday.append(p_train_weekday)
    train_season.append(p_train_season)
    #train_time.append(p_train_time)
    targets.append(p_targets)

In [None]:
'''# ESTE ANDA
def prepare_train_data(data, street, train_size=3):
    
    #Puts train_size values in a row with 
      
    train_data = []
    train_street = []
    train_weekday = []
    train_time = []
    targets = []

    #We dont have any value in sunday
    for weekday in range(0, 6):
        data_weekday = data.loc[data['dia de semana'] == weekday]

        for i in range(len(data_weekday)-train_size):
            train_data.append(np.array(data_weekday[i:i+train_size].loc[:,['tiempo','ocupacion','operacion']].values)) 
            train_street.append(street)
            train_weekday.append(weekday)

        train_time.append(np.array(data_weekday[train_size:].loc[:,'tiempo'].values))
        targets.append(np.array(data_weekday[train_size:].loc[:,'ocupacion'].values))               

    return np.array(train_data), np.array(train_street), np.array(train_weekday), np.array(train_time), np.array(targets)
    
        
train_data = []
train_street = []
train_weekday = []
train_time = []
targets = []

for street in range(len(normalized_data['id_cuadra'].drop_duplicates())):
    print('Calle num: ', street)
    p_train_data, p_train_street, p_train_weekday, p_train_time, p_targets = prepare_train_data(data_separate[street], street, train_size=train_size)
    train_data.append(p_train_data)
    train_street.append(p_train_street)
    train_weekday.append(p_train_weekday)
    train_time.append(p_train_time)
    targets.append(p_targets)'''

In [None]:
'''train_street'''

In [None]:
'''train_data'''

In [None]:
'''targets.shape'''

In [None]:
#t_train_weekday.shape

In [None]:
t_train_street = train_street[0]
t_train_weekday = train_weekday[0]
t_train_data = train_data[0]
t_train_season = train_season[0]
t_targets = targets[0]

In [None]:
'''t_train_street = train_street
t_train_weekday = train_weekday
t_train_data = train_data
t_train_season = train_season
t_targets = targets'''

In [None]:
for i in range(len(train_street) - 1):
    t_train_street = np.concatenate([t_train_street, train_street[i+1]])
    t_train_weekday = np.concatenate([t_train_weekday, train_weekday[i+1]])
    t_train_season = np.concatenate([t_train_season, train_season[i+1]])
    t_targets = np.concatenate([t_targets, targets[i+1]])

empty_streets = []
#try:
for i in range(len(train_data) - 1):
    clear_output()
    print('Iteration: {} of {}'.format(i+1, len(train_data) - 1))
    if len(train_data[i+1]) > 0:
        t_train_data = np.concatenate([t_train_data, train_data[i+1]])
    else:
        empty_streets.append(i+1)
#except:
#    print('ValueError: all the input arrays must have same number of dimensions in data')
print(empty_streets)

In [None]:
to_shuffle = list(zip(t_train_street, t_train_weekday, t_train_data, t_train_season, t_targets))
shuffle(to_shuffle)
s_train_street, s_train_weekday, s_train_data, s_train_season, s_targets = zip(*to_shuffle)

s_targets = np.array(s_targets)

In [None]:
'''x = tf.random.shuffle(t_train_street, seed=5)
y = tf.random.shuffle(t_train_weekday, seed=5)
z = tf.random.shuffle(t_targets, seed=5)
t = tf.random.shuffle(t_train_data, seed=5)'''

In [None]:
'''z'''

In [None]:
(t_train_data.shape)

In [None]:
t_train_street = tf.convert_to_tensor(s_train_street, np.float64)
t_train_weekday = tf.convert_to_tensor(s_train_weekday, np.float64)
t_train_data = tf.convert_to_tensor(s_train_data, np.float64)
t_train_season = tf.convert_to_tensor(s_train_season, np.float64)
t_targets = tf.convert_to_tensor(s_targets, np.float64)

In [None]:
'''#Aplano todo y lo dejo para que entre a la red (elimino la division entre calles)
train_street = [y for x in train_street for y in x]
train_weekday = [y for x in train_weekday for y in x]
train_data = [y for x in train_data for y in x]
train_time = [y for x in train_time for y in x]
train_time = [y for x in train_time for y in x]
targets = [y for x in targets for y in x]
targets = [y for x in targets for y in x]
'''

#VER COMO HACER QUE ESTO FUNCIONE
'''
to_shuffle = list(zip(train_street, train_weekday, train_data, train_time, targets))
shuffle(to_shuffle)
s_train_street, s_train_weekday, s_train_data, s_train_time, s_targets = zip(*to_shuffle)

s_targets = np.array(s_targets)'''

In [None]:
'''t_train_data'''

In [None]:
'''(t_targets)'''

In [None]:
'''(s_train_weekday[0])'''

In [None]:
'''(s_train_data)'''

In [None]:
'''(s_train_time[0])'''

In [None]:
'''(s_targets[0])'''

In [None]:
street = 65
weekday = 3


plot_data = normalized_data.loc[(normalized_data.id_cuadra == street) & (normalized_data['dia de semana'] == weekday), ['tiempo', 'ocupacion']]
plot_data = plot_data.groupby(by='tiempo').mean().reset_index()

def denormalize_time(time):
    max_time = 23 * 60 * 60 + 59 * 60 + 59
    return pd.Timestamp(time * max_time, unit='s').time()

def denormalize_ocupation(ocupation):
    global max_ocup
    return ocupation * max_ocup

def clean_predictions(ocupation):
    if ocupation <= 0:
        return 0
    return round(ocupation)


plot_data['tiempo'] = plot_data['tiempo'].apply(denormalize_time)
plot_data['ocupacion'] = plot_data['ocupacion'].apply(denormalize_ocupation)

#Set the label titles
plt.xlabel('Time')
plt.ylabel('Parked cars')
plt.title('Average parked cars in street nº{} in day {}.'.format(street, weekday))

#Set the plot data for each graph (ax1,ax2,ax3)
plt.plot(plot_data['tiempo'], plot_data['ocupacion'])

plt.rcParams['figure.figsize'] = (15,8)
plt.grid()
#plt.xticks(normalized_data['fecha_hora'], time)
plt.show()

In [None]:
#Para la calle
input_street = Input(shape=(1), name='input_street')
#emb_street = Embedding(input_length=1, input_dim=max_street_num+1, output_dim=32)(input_street)
emb_street = Embedding(input_length=1, input_dim=max_street_num+1, output_dim=32)(input_street)
flat_street = Flatten()(emb_street)

#Para el dia de la semana
input_weekday = Input(shape=(1), name='input_weekday')
#emb_weekday = Embedding(input_length=1, input_dim=7, output_dim=16)(input_weekday)
emb_weekday = Embedding(input_length=1, input_dim=6, output_dim=8)(input_weekday)
flat_weekday = Flatten()(emb_weekday)

#Para el tiempo y la ocupacion y la operacion
input_data = Input(shape=(train_size, features), name='input_data')
#lstm = LSTM(16, return_sequences=False, recurrent_dropout=0.25)(input_data)
lstm_layer = LSTM(16, return_sequences=True, recurrent_dropout=0.25)(input_data)
lstm = LSTM(6, return_sequences=False, recurrent_dropout=0.25)(lstm_layer)

#Para la estacion del año
input_season = Input(shape=(1), name='input_season')
emb_season = Embedding(input_length=1, input_dim=4, output_dim=4)(input_season)
flat_season = Flatten()(emb_season)


#Para el tiempo a predecir 
#No agrega valor, hace mas chota a la red. En vez de esto voy a arreglar el tiempo en los datos
#y hacer que sea cada cierta cantidad de minutos siempre.
#input_time = Input(shape=(1), name='input_time')

concat = Concatenate(axis=-1)
input_merge = concat([flat_weekday, flat_street])
input_merge = concat([input_merge, lstm])
input_merge = concat([input_merge, flat_season])
#input_merge = concat([input_merge, input_time])

d = Dense(32, activation='relu')(input_merge)
d = Dropout(0.25)(d)
#d = Dense(128, activation='relu')(d)
#d = Dense(128, activation='relu')(d)
#d = Dropout(0.5)(d)
#d = Dense(128, activation='relu')(d)
#d = Dense(128, activation='relu')(d)
#d = Dense(128, activation='relu')(d)
#d = Dropout(0.5)(d)
#d = Dense(32, activation='relu')(d)
#d = Dropout(0.25)(d)
out = Dense(1, activation='linear')(d)

model = Model(inputs=[input_weekday, input_street, input_season, input_data], outputs=out)
model.summary()
model.compile(loss='MSE', optimizer='adam', metrics=['MAE'])

In [None]:
#h = model.fit([s_train_weekday, s_train_street, s_train_data], s_targets, epochs=5, batch_size=64, validation_split=0.1)
red_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.00000001)

#h = model.fit([t_train_weekday, t_train_street, t_train_data], t_targets, epochs=20, batch_size=32, validation_split=0.2, callbacks=[red_lr])
#h = model.fit([t_train_weekday, t_train_street, t_train_season, t_train_data], tf.map_fn(denormalize_ocupation, t_targets), epochs=20, batch_size=16, validation_split=0.2, callbacks=[red_lr])
h = model.fit([t_train_weekday, t_train_street, t_train_season, t_train_data], t_targets, epochs=20, batch_size=8, validation_split=0.1, callbacks=[red_lr])

In [None]:
%matplotlib inline
import pickle
import numpy as np
from matplotlib import pyplot as plt

plt.plot(h.history['MAE'])
plt.plot(h.history['val_MAE'])
plt.title('model MAE')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(h.history['loss'])
plt.plot(h.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
index = 2

true_data = data_week[indexes[index]:indexes[index+1]].copy().reset_index(drop=True)
true_data['tiempo'].apply(normalize_time).apply(denormalize_time).drop_duplicates()

In [None]:
start_index = 43
finish_index = 61

In [None]:
start_time = true_data['tiempo'].iloc[start_index]
finish_time = true_data['tiempo'].iloc[finish_index]
predictions_needed = int((finish_time - start_time) / (period * 60))

plot_true_data = true_data.copy()

weekday = plot_true_data['dia de semana'][0]

def denormalize_time_to_date(time):
    return pd.Timestamp(time, unit='s').time()


#print(start_index, finish_index)

plot_true_data['tiempo'] = plot_true_data['tiempo'].apply(normalize_time)

def init_test(data, data_columns=['tiempo','ocupacion','operacion']):
    global train_size, features
    #print(data)
    street = data['id_cuadra'].iloc[0]
    weekday = data['dia de semana'].iloc[0]
    season = data['estacion'].iloc[0]
    d = data[data_columns]
    street = np.array(street).reshape(1,1)
    weekday = np.array(street).reshape(1,1)
    season = np.array(season).reshape(1,1)
    d = np.array(d).reshape(1,train_size,features)
    return street, weekday, season, d

def prepare_test_data(data, predicted_data, predicted_time, data_columns=['tiempo','ocupacion','operacion']):
    last = data[0]
    if 'operacion' in data_columns:
        last[-1]
        print('TODO')
    else:
        #print(data[0])
        #print(np.array([predicted_time,predicted_data]))
        
        #print(np.concatenate([data[0], np.array([predicted_time,predicted_data]).reshape(1,features)]))
        
        out = np.concatenate([data[0], np.array([predicted_time,predicted_data]).reshape(1,features)])
        out = out[1:out.shape[0]].reshape(1, train_size, features)
        #print(out)
        #print(out.shape[1])
    return out
    
#print(plot_true_data)


test_street, test_weekday, test_season, test_data = init_test(plot_true_data[start_index-train_size+1:start_index+1], data_columns=data_columns)

plot_predicted_data = pd.DataFrame(columns=data_columns)

last_data_time = test_data[0][test_data.shape[1]-1][0]

plot_predicted_data = plot_predicted_data.append({'ocupacion':test_data[0][test_data.shape[1]-1][1], 'tiempo':last_data_time}, ignore_index=True)
#plot_predicted_data = plot_predicted_data.append({'ocupacion':denormalize_ocupation(test_data[0][test_data.shape[1]-1][1]), 'tiempo':last_data_time}, ignore_index=True)

#print(test_data)


#print(to_seconds(denormalize_time(last_data_time)))
for i in range(predictions_needed):
    print('Iteration: {} of {}'.format(i, (predictions_needed)))
          
    pred_value = model.predict([test_street, test_weekday, test_season, test_data])[0][0]
    pred_time = last_data_time + normalize_time(period * 60)
    
    #if i == 1 or i == 2 or i == 3:
    #    print(pred_value)
    
    plot_predicted_data = plot_predicted_data.append({'ocupacion': pred_value, 'tiempo': pred_time}, ignore_index=True)
    last_data_time = pred_time
    test_data = prepare_test_data(test_data, predicted_data=pred_value, predicted_time=pred_time, data_columns=data_columns)


plot_true_data['tiempo'] = plot_true_data['tiempo'].apply(denormalize_time)
plot_true_data['ocupacion'] = plot_true_data['ocupacion'].apply(denormalize_ocupation)
plot_predicted_data['tiempo'] = plot_predicted_data['tiempo'].apply(denormalize_time)
plot_predicted_data['ocupacion'] = plot_predicted_data['ocupacion'].apply(denormalize_ocupation)
plot_predicted_data['ocupacion'] = plot_predicted_data['ocupacion'].apply(clean_predictions)


#Set the label titles
plt.xlabel('Time')
plt.ylabel('Parked cars')
plt.title('Average parked cars in street nº{} on {}.'.format(test_street[0][0], weekday))

#Set the plot data for each graph (ax1,ax2,ax3)
plt.plot(plot_true_data['tiempo'], plot_true_data['ocupacion'], c='b')
plt.plot(plot_predicted_data['tiempo'], plot_predicted_data['ocupacion'], c='r')

clear_output()

plt.rcParams['figure.figsize'] = (15,8)
plt.grid()
plt.legend(['True values','Predicted values'])
#plt.xticks(normalized_data['fecha_hora'], time)
plt.show()

In [None]:
plot_predicted_data

In [None]:
i = 1

a = np.array(t_train_weekday[i])
a = np.reshape(a, (1,1))

b = np.array(t_train_street[i])
b = np.reshape(b, (1,1))

c = np.array(t_train_season[i])
c = np.reshape(c, (1,1))
#c = np.array(train_time[0][0])
#c = np.reshape(c, (1,1))

d = np.array(t_train_data[i])
d = np.reshape(d, (1,train_size,features))
d = tf.convert_to_tensor(d, np.float64)

not_normalized_data = []

for i in range(train_size):
    not_normalized_data.append(float(d[0][i][1]) * max_ocup)
    
    
print('Weekday: ', a[0])
print('Street:  ', b[0])
print('Season:    ', c[0])
print('Data:    ', d[0])
print('Not normalized data: ', not_normalized_data)
print('Targets: ', t_targets[i])
print('Targets: ', t_targets[i] * max_ocup)

In [None]:
print('Valor predicho: ', model.predict([a, b, c, d])[0][0])
print('Valor predicho (si normalizar es necesario): ', model.predict([a, b, c, d])[0][0] * max_ocup)

import math #funcion techo
#print('Valor predicho techo: ', math.ceil(model.predict([a, b, d])[0][0]))

print('Valor real:     ', t_targets[i] * max_ocup)

In [None]:
'''index = 1


plot_true_data = data_week[indexes[index]:indexes[index+1]].copy().reset_index(drop=True)

weekday = plot_true_data['dia de semana'][0]
def denormalize_time_to_date(time):
    return pd.Timestamp(time, unit='s').time()

quarter = int(len(plot_true_data) * 1/4)
three_quarter = int(quarter * 3)

#three_quarter_time = plot_true_data.iloc[three_quarter]['tiempo']

plot_true_data['tiempo'] = plot_true_data['tiempo'].apply(normalize_time)

def init_test(data, data_columns=['tiempo','ocupacion','operacion']):
    global train_size, features
    street = data['id_cuadra'].iloc[0]
    weekday = data['dia de semana'].iloc[0]
    season = data['estacion'].iloc[0]
    d = data[data_columns]
    street = np.array(street).reshape(1,1)
    weekday = np.array(street).reshape(1,1)
    season = np.array(season).reshape(1,1)
    d = np.array(d).reshape(1,train_size,features)
    return street, weekday, season, d

def prepare_test_data(data, predicted_data, predicted_time, data_columns=['tiempo','ocupacion','operacion']):
    last = data[0]
    if 'operacion' in data_columns:
        last[-1]
        print('TODO')
    else:
        #print(data[0])
        #print(np.array([predicted_time,predicted_data]))
        
        #print(np.concatenate([data[0], np.array([predicted_time,predicted_data]).reshape(1,features)]))
        
        out = np.concatenate([data[0], np.array([predicted_time,predicted_data]).reshape(1,features)])
        out = out[1:out.shape[0]].reshape(1, train_size, features)
        #print(out)
        #print(out.shape[1])
    return out
    
#print(plot_true_data)

test_street, test_weekday, test_season, test_data = init_test(plot_true_data[-train_size-1-quarter:-1-quarter], data_columns=data_columns)

plot_predicted_data = pd.DataFrame(columns=data_columns)

last_data_time = test_data[0][test_data.shape[1]-1][0]

plot_predicted_data = plot_predicted_data.append({'ocupacion':test_data[0][test_data.shape[1]-1][1], 'tiempo':last_data_time}, ignore_index=True)
#plot_predicted_data = plot_predicted_data.append({'ocupacion':denormalize_ocupation(test_data[0][test_data.shape[1]-1][1]), 'tiempo':last_data_time}, ignore_index=True)

print(test_data)

for i in range(len(plot_true_data)-quarter):
    print('Iteration: {} of {}'.format(i, len(plot_true_data)-quarter-1))

    pred_value = model.predict([test_street, test_weekday, test_season, test_data])[0][0]
    pred_time = last_data_time + normalize_time(period * 60)
    
    #if i == 1 or i == 2 or i == 3:
    #    print(pred_value)
    
    plot_predicted_data = plot_predicted_data.append({'ocupacion': pred_value, 'tiempo': pred_time}, ignore_index=True)
    last_data_time = pred_time
    test_data = prepare_test_data(test_data, predicted_data=pred_value, predicted_time=pred_time, data_columns=data_columns)


plot_true_data['tiempo'] = plot_true_data['tiempo'].apply(denormalize_time)
plot_true_data['ocupacion'] = plot_true_data['ocupacion'].apply(denormalize_ocupation)
plot_predicted_data['tiempo'] = plot_predicted_data['tiempo'].apply(denormalize_time)
plot_predicted_data['ocupacion'] = plot_predicted_data['ocupacion'].apply(denormalize_ocupation)
plot_predicted_data['ocupacion'] = plot_predicted_data['ocupacion'].apply(clean_predictions)


#Set the label titles
plt.xlabel('Time')
plt.ylabel('Parked cars')
plt.title('Average parked cars in street nº{} on {}.'.format(test_street[0][0], weekday))

#Set the plot data for each graph (ax1,ax2,ax3)
plt.plot(plot_true_data['tiempo'], plot_true_data['ocupacion'], c='b')
plt.plot(plot_predicted_data['tiempo'], plot_predicted_data['ocupacion'], c='r')

clear_output()

plt.rcParams['figure.figsize'] = (15,8)
plt.grid()
plt.legend(['True values','Predicted values'])
#plt.xticks(normalized_data['fecha_hora'], time)
plt.show()'''

### Para el siguiente valor (y simular calcular valores seguidos)

### En este intento voy a utilizar como dato de tiempo el dia con num y la hora

### Otra red!

### Ideas de redes:
### Cosas que deberian ir si o si de entrada: Dia de la semana, mes (cuando tenga mas datos), tiempo
### Utilizar los valores de Entrada/Salida y que prediga capacidad 
### Utilizar valores de capacidad y que prediga capacidad