# Import necessary libraries

In [165]:
# Tratamiento de datos
# ==============================================================================
import numpy as np
import pandas as pd

# Gráficos
# ==============================================================================
import matplotlib.pyplot as plt
import seaborn as sns

# Modelado y Forecasting
# ==============================================================================
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.arima.model import ARIMA

# Guardado de modelos
# ==============================================================================
from pickle import dump

# Configuración warnings
# ==============================================================================
import warnings
warnings.filterwarnings('once')

# Read and preparing Data

In [148]:
data_series = pd.read_csv("../data/interim/bicimad_time_series.csv", sep=',')

In [149]:
data_series['unlock_date'] = pd.to_datetime(data_series['unlock_date'], format='%Y-%m-%d')

In [151]:
data_series = data_series.set_index(data_series['unlock_date'])

# Time Series

Evaluareamos y predecidermos según cada dato de nuestro data_series, teniendo como índice la fecha de desbloqueo 'unlock_date'

***1. Trip_minutes***

* LSTM

In [166]:
# Normalizar los datos
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(np.array(data_series["trip_minutes"]).reshape(-1, 1))

# Crear los conjuntos de entrenamiento y prueba
train_data = scaled_data[:-12]
test_data = scaled_data[-12:]

# Crear las secuencias de entrada y salida
def create_sequences(data, seq_length):
    xs = []
    ys = []

    for i in range(len(data)-seq_length-1):
        x = data[i:(i+seq_length)]
        y = data[i+seq_length]
        xs.append(x)
        ys.append(y)

    return np.array(xs), np.array(ys)

seq_length = 100
X, y = create_sequences(train_data, seq_length)

# Crear y entrenar el modelo LSTM
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(seq_length, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# Entrenar el modelo
model.fit(X, y, epochs=200, verbose=0)

# Hacer predicciones
inputs = test_data[-seq_length:]
inputs = inputs.reshape((1, seq_length, 1))
predictions = model.predict(inputs)

# Desnormalizar las predicciones
predictions = scaler.inverse_transform(predictions)


In [15]:
# Graficamos el resultado
plt.figure(figsize=(10,6))
plt.plot(data_series["trip_minutes"], label="Datos Reales")
plt.plot(predictions, label="Predicciones", color='r')
plt.title("Predicción vs Datos Reales")
plt.xlabel("Tiempo")
plt.ylabel("Minutos de viaje")
plt.legend()
plt.grid(True)
plt.show()


  warn('Non-invertible starting MA parameters found.'


* ARIMA

In [None]:
model_arima = ARIMA(data_series["trip_minutes"], order=(1,1,1))
model_arima_fit = model_arima.fit()

# Hacer predicciones para los próximos 12 meses
predictions_arima = model_arima_fit.predict(start=len(data_series), end=len(data_series)+11)

# Graficar las predicciones
plt.figure(figsize=(10,6))
plt.plot(data_series["trip_minutes"], label="Datos Reales")
plt.plot(predictions_arima, label="Predicciones", color='r')
plt.title("Predicción vs Datos Reales")
plt.xlabel("Tiempo")
plt.ylabel("Minutos de viaje")
plt.legend()
plt.grid(True)
plt.show()

***2. Distance***

* LSTM

* ARIMA