In [1]:
import requests
import streamlit as st
# from streamlit_lottie import st_lottie
import pandas as pd
import numpy as np
import pickle 
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
from datetime import datetime 


In [2]:
fecha = '2023-05-25'
temperatura = 20.4
humedad = 65.5
viento = 25.3

In [3]:
nuevo_dato = {'date': fecha, 'temp': temperatura, 'hum': humedad, 'windspeed': viento}
df = pd.DataFrame(nuevo_dato, index=[0])
df.sample()

Unnamed: 0,date,temp,hum,windspeed
0,2023-05-25,20.4,65.5,25.3


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1 entries, 0 to 0
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   date       1 non-null      object 
 1   temp       1 non-null      float64
 2   hum        1 non-null      float64
 3   windspeed  1 non-null      float64
dtypes: float64(3), object(1)
memory usage: 40.0+ bytes


In [5]:
df['date'] = pd.to_datetime(df['date'])

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1 entries, 0 to 0
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   date       1 non-null      datetime64[ns]
 1   temp       1 non-null      float64       
 2   hum        1 non-null      float64       
 3   windspeed  1 non-null      float64       
dtypes: datetime64[ns](1), float64(3)
memory usage: 40.0 bytes


In [7]:
df['year'] = df['date'].dt.year
df.sample()

Unnamed: 0,date,temp,hum,windspeed,year
0,2023-05-25,20.4,65.5,25.3,2023


In [45]:
año_func = df['date'].dt.year[0]
año_func

2023

In [8]:
df['month'] = df['date'].dt.month
df.sample()

Unnamed: 0,date,temp,hum,windspeed,year,month
0,2023-05-25,20.4,65.5,25.3,2023,5


In [9]:
df["weekday"]= df["date"].dt.day_name()
df.sample()

Unnamed: 0,date,temp,hum,windspeed,year,month,weekday
0,2023-05-25,20.4,65.5,25.3,2023,5,Thursday


In [10]:
cal = calendar() #llamada al calendario
holidays = cal.holidays(start=df['date'].min(), end=df['date'].max())
df['holiday'] = df['date'].isin(holidays)
df.sample()

Unnamed: 0,date,temp,hum,windspeed,year,month,weekday,holiday
0,2023-05-25,20.4,65.5,25.3,2023,5,Thursday,False


In [11]:
mapa = {True: 'holiday', False: 'not holiday'}
df['holiday'] = df['holiday'].map(mapa)

In [12]:
df

Unnamed: 0,date,temp,hum,windspeed,year,month,weekday,holiday
0,2023-05-25,20.4,65.5,25.3,2023,5,Thursday,not holiday


In [13]:
def laborables(dato):
    laborables = pd.date_range(start= fecha, end=fecha, freq='B')
    if dato in laborables:
        return 'workingday'
    else:
        return 'weekend or holiday'

In [14]:
df['workingday'] = df['date'].apply(laborables)
df

Unnamed: 0,date,temp,hum,windspeed,year,month,weekday,holiday,workingday
0,2023-05-25,20.4,65.5,25.3,2023,5,Thursday,not holiday,workingday


In [15]:
df.loc[df["holiday"] == 'holiday', "workingday"] = 'weekend or holiday'
df

Unnamed: 0,date,temp,hum,windspeed,year,month,weekday,holiday,workingday
0,2023-05-25,20.4,65.5,25.3,2023,5,Thursday,not holiday,workingday


In [16]:
df.loc[0,'year']

2023

In [17]:
año_func = int(fecha[:4])
año_func

2023

In [18]:
def estaciones(dato):
    año_func = int(dato[:4])
    primavera = pd.date_range(start= f'{año_func}-03-21', end=f'{año_func}-06-21')
    verano = pd.date_range(start= f'{año_func}-06-22', end=f'{año_func}-09-22')
    otoño = pd.date_range(start= f'{año_func}-09-23', end=f'{año_func}-12-21')
    # invierno = pd.date_range(start= f'{año_func}-12-22', end=f'{año_func+1}-03-20')

    if dato in primavera:
        return 'spring'
    elif dato in verano:
        return 'summer'
    elif dato in otoño:
        return 'autumn'
    else:
        return 'winter'

In [19]:
df['season'] = estaciones(fecha)
df

Unnamed: 0,date,temp,hum,windspeed,year,month,weekday,holiday,workingday,season
0,2023-05-25,20.4,65.5,25.3,2023,5,Thursday,not holiday,workingday,spring


In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1 entries, 0 to 0
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        1 non-null      datetime64[ns]
 1   temp        1 non-null      float64       
 2   hum         1 non-null      float64       
 3   windspeed   1 non-null      float64       
 4   year        1 non-null      int64         
 5   month       1 non-null      int64         
 6   weekday     1 non-null      object        
 7   holiday     1 non-null      object        
 8   workingday  1 non-null      object        
 9   season      1 non-null      object        
dtypes: datetime64[ns](1), float64(3), int64(2), object(4)
memory usage: 196.0+ bytes


In [21]:
numericas = df.select_dtypes(include=np.number).drop(['year', 'month'], axis= 1)
numericas

Unnamed: 0,temp,hum,windspeed
0,20.4,65.5,25.3


In [22]:
with open("../datos/robust.pkl", "rb") as modelo:
    robust = pickle.load(modelo)

In [23]:
x_robust = robust.transform(numericas)

In [24]:
numericas_robust = pd.DataFrame(x_robust, columns=numericas.columns)

In [25]:
df[numericas_robust.columns] = numericas_robust
df

Unnamed: 0,date,temp,hum,windspeed,year,month,weekday,holiday,workingday,season
0,2023-05-25,-0.005037,0.136973,2.001032,2023,5,Thursday,not holiday,workingday,spring


In [26]:
def año_binario(dato):
    if dato <=2018:
        return 0
    else:
        return 1

In [27]:
df['year'] = df['year'].apply(año_binario)
df

Unnamed: 0,date,temp,hum,windspeed,year,month,weekday,holiday,workingday,season
0,2023-05-25,-0.005037,0.136973,2.001032,1,5,Thursday,not holiday,workingday,spring


In [28]:
mapa_wd_casual = {'weekend or holiday': 1.0258019525801954, 'workingday': 0.9923291492329149}
# mapa_wd_registered = {'weekend or holiday': 1.0152817574021011, 'workingday': 0.9914040114613181}

# mapa_se_casual = {'autumn': 0.7475592747559274, 'spring': 1.203626220362622, 'summer': 1.4665271966527196, 'winter': 0.30613668061366806}
mapa_se_registered = {'autumn': 1.039432391867922, 'spring': 1.053759039432392, 'summer': 1.1217082821667348, 'winter': 0.5063446582071224}

# mapa_hol_casual = {'holiday': 1.7245467224546722, 'not holiday': 0.9923291492329149}
# mapa_hol_registered = {'holiday': 0.7571292127166053, 'not holiday': 1.0081866557511256}

mapa_day_casual = {'Monday': 2.0, 'Thursday': 0.8765690376569037, 'Wednesday': 0.9295676429567643, 'Tuesday': 1.3695955369595536, 'Friday': 0.7496513249651325, 'Saturday': 0.8507670850767085, 'Sunday': 1.2224546722454672}
mapa_day_registered = {'Monday': 4.291492329149233, 'Thursday': 5.652022315202231, 'Wednesday': 5.160390516039052, 'Tuesday': 4.089260808926081, 'Friday': 5.527894002789401, 'Saturday': 5.347977684797769, 'Sunday': 5.182008368200837}

# mapa_month_casual = {1: 0.17642956764295675, 2: 0.2824267782426778, 3: 0.7092050209205021, 4: 1.1610878661087867, 5: 1.2370990237099024, 6: 1.3507670850767086, 7: 1.4574616457461647, 8: 1.5495118549511855, 9: 1.3528591352859136, 10: 1.0760111576011158, 11: 0.6262203626220363, 12: 0.40794979079497906}
mapa_month_registered = {1: 2.311715481171548, 2: 2.822873082287308, 3: 3.273361227336123, 4: 4.735704323570432, 5: 5.416317991631799, 6: 6.079497907949791, 7: 5.5864714086471405, 8: 5.741283124128312, 9: 5.804741980474198, 10: 5.598326359832636, 11: 5.225244072524407, 12: 4.400278940027894}

mapa_yr_casual = {0: 0.8563458856345886, 1: 1.2622036262203626}
mapa_yr_registered = {0: 0.7954700504843771, 1: 1.3071360349297312}


In [29]:
df.columns

Index(['date', 'temp', 'hum', 'windspeed', 'year', 'month', 'weekday',
       'holiday', 'workingday', 'season'],
      dtype='object')

In [30]:
df_casual = df.filter(['year', 'weekday', 'workingday', 'temp', 'hum', 'windspeed'], axis=1)
df_casual

Unnamed: 0,year,weekday,workingday,temp,hum,windspeed
0,1,Thursday,workingday,-0.005037,0.136973,2.001032


In [31]:
df_registered = df.filter(['season', 'year', 'month', 'weekday', 'temp', 'hum'], axis=1)
df_registered

Unnamed: 0,season,year,month,weekday,temp,hum
0,spring,1,5,Thursday,-0.005037,0.136973


In [32]:
df_casual['year'] = df_casual['year'].map(mapa_yr_casual)
df_registered['year'] = df_registered['year'].map(mapa_yr_registered)
df_casual['weekday'] = df_casual['weekday'].map(mapa_day_casual)
df_registered['weekday'] = df_registered['weekday'].map(mapa_day_registered)
df_casual['workingday'] = df_casual['workingday'].map(mapa_wd_casual)
df_registered['season'] = df_registered['season'].map(mapa_se_registered)
df_registered['month'] = df_registered['month'].map(mapa_month_registered)

In [33]:
df_casual

Unnamed: 0,year,weekday,workingday,temp,hum,windspeed
0,1.262204,0.876569,0.992329,-0.005037,0.136973,2.001032


In [34]:
df_registered

Unnamed: 0,season,year,month,weekday,temp,hum
0,1.053759,1.307136,5.416318,5.652022,-0.005037,0.136973


In [35]:
with open("../datos/mejor_modelo_casual.pkl", "rb") as modelo_cas:
    mejor_modelo_casual = pickle.load(modelo_cas)
with open("../datos/mejor_modelo_registered.pkl", "rb") as modelo_reg:
    mejor_modelo_registered = pickle.load(modelo_reg)

In [36]:
conteo_casual = mejor_modelo_casual.predict(df_casual)
conteo_casual

array([752.42810202])

In [37]:
conteo_registered = mejor_modelo_registered.predict(df_registered)
conteo_registered

array([5585.16882433])

In [38]:
rmse_casual = 322
rmse_registered = 540

In [39]:
print(f'The estimated number of rental bikes for {fecha} for casual users would be between: {round(conteo_casual[0]-rmse_casual)} - {round(conteo_casual[0]+rmse_casual)}. The model estimates: {round(conteo_casual[0])}.')
print(f'The estimated number of rental bikes for {fecha} for registered users would be between: {round(conteo_registered[0]-rmse_registered)} - {round(conteo_registered[0]+rmse_registered)}. The model estimates: {round(conteo_registered[0])}.')

The estimated number of rental bikes for 2023-05-25 for casual users would be between: 430 - 1074. The model estimates: 752.
The estimated number of rental bikes for 2023-05-25 for registered users would be between: 5045 - 6125. The model estimates: 5585.


In [40]:
def predictor_bicis(date, temperature, humidity, windspeed):
    # creamos el df
    nuevo_dato = {'date': date, 'temp': float(temperature), 'hum': float(humidity), 'windspeed': float(windspeed)}
    df = pd.DataFrame(nuevo_dato, index=[0])
    # pasamos la columna a formato datetime
    df['date'] = pd.to_datetime(df['date'])
    #creamos la columna year
    df['year'] = df['date'].dt.year
    #creamos la columna month
    df['month'] = df['date'].dt.month
    # creamos la columna weekday
    df["weekday"]= df["date"].dt.day_name()
    # comprobamos si es festivo
    cal = calendar() #llamada al calendario
    holidays = cal.holidays(start=df['date'].min(), end=df['date'].max()) # sacamos el listado de festivos
    df['holiday'] = df['date'].isin(holidays) # comprobamos si en la lista de festivos están nuestras fechas
    mapa = {True: 'holiday', False: 'not holiday'} # cambiamos el resultado con un map
    df['holiday'] = df['holiday'].map(mapa)
    # creamos la columna de workingday
    laborables = pd.date_range(start= date, end=date, freq='B')
    if date in laborables:
        df['workingday'] = 'workingday'
    else:
        df['workingday'] = 'weekend or holiday'
    # función para obtener season
    año_func = int(date[:4])
    primavera = pd.date_range(start= f'{año_func}-03-21', end=f'{año_func}-06-21')
    verano = pd.date_range(start= f'{año_func}-06-22', end=f'{año_func}-09-22')
    otoño = pd.date_range(start= f'{año_func}-09-23', end=f'{año_func}-12-21')
    # invierno = pd.date_range(start= f'{año_func}-12-22', end=f'{año_func+1}-03-20')
    if date in primavera:
        df['season'] = 'spring'
    elif date in verano:
        df['season'] = 'summer'
    elif date in otoño:
        df['season'] = 'autumn'
    else:
        df['season'] = 'winter'
    # estandarizacion
    numericas = df.select_dtypes(include=np.number).drop(['year', 'month'], axis= 1)
    with open("../datos/robust.pkl", "rb") as modelo:
        robust = pickle.load(modelo)
    x_robust = robust.transform(numericas)
    numericas_robust = pd.DataFrame(x_robust, columns=numericas.columns)
    df[numericas_robust.columns] = numericas_robust
    # apañamos el año
    if df.loc[0, 'year'] <=2018:
        df['year'] = 0
    else:
        df['year'] = 1
    # mapas para el encoding
    mapa_wd_casual = {'weekend or holiday': 1.0258019525801954, 'workingday': 0.9923291492329149}
    # mapa_wd_registered = {'weekend or holiday': 1.0152817574021011, 'workingday': 0.9914040114613181}
    # mapa_se_casual = {'autumn': 0.7475592747559274, 'spring': 1.203626220362622, 'summer': 1.4665271966527196, 'winter': 0.30613668061366806}
    mapa_se_registered = {'autumn': 1.039432391867922, 'spring': 1.053759039432392, 'summer': 1.1217082821667348, 'winter': 0.5063446582071224}
    # mapa_hol_casual = {'holiday': 1.7245467224546722, 'not holiday': 0.9923291492329149}
    # mapa_hol_registered = {'holiday': 0.7571292127166053, 'not holiday': 1.0081866557511256}
    mapa_day_casual = {'Monday': 2.0, 'Thursday': 0.8765690376569037, 'Wednesday': 0.9295676429567643, 'Tuesday': 1.3695955369595536, 'Friday': 0.7496513249651325, 'Saturday': 0.8507670850767085, 'Sunday': 1.2224546722454672}
    mapa_day_registered = {'Monday': 4.291492329149233, 'Thursday': 5.652022315202231, 'Wednesday': 5.160390516039052, 'Tuesday': 4.089260808926081, 'Friday': 5.527894002789401, 'Saturday': 5.347977684797769, 'Sunday': 5.182008368200837}
    # mapa_month_casual = {1: 0.17642956764295675, 2: 0.2824267782426778, 3: 0.7092050209205021, 4: 1.1610878661087867, 5: 1.2370990237099024, 6: 1.3507670850767086, 7: 1.4574616457461647, 8: 1.5495118549511855, 9: 1.3528591352859136, 10: 1.0760111576011158, 11: 0.6262203626220363, 12: 0.40794979079497906}
    mapa_month_registered = {1: 2.311715481171548, 2: 2.822873082287308, 3: 3.273361227336123, 4: 4.735704323570432, 5: 5.416317991631799, 6: 6.079497907949791, 7: 5.5864714086471405, 8: 5.741283124128312, 9: 5.804741980474198, 10: 5.598326359832636, 11: 5.225244072524407, 12: 4.400278940027894}
    mapa_yr_casual = {0: 0.8563458856345886, 1: 1.2622036262203626}
    mapa_yr_registered = {0: 0.7954700504843771, 1: 1.3071360349297312}
    # creamos los df de casual y registered
    df_casual = df.filter(['year', 'weekday', 'workingday', 'temp', 'hum', 'windspeed'], axis=1)
    df_registered = df.filter(['season', 'year', 'month', 'weekday', 'temp', 'hum'], axis=1)
    # encoding
    df_casual['year'] = df_casual['year'].map(mapa_yr_casual)
    df_registered['year'] = df_registered['year'].map(mapa_yr_registered)
    df_casual['weekday'] = df_casual['weekday'].map(mapa_day_casual)
    df_registered['weekday'] = df_registered['weekday'].map(mapa_day_registered)
    df_casual['workingday'] = df_casual['workingday'].map(mapa_wd_casual)
    df_registered['season'] = df_registered['season'].map(mapa_se_registered)
    df_registered['month'] = df_registered['month'].map(mapa_month_registered)
    # abrimos los modelos
    with open("../datos/mejor_modelo_casual.pkl", "rb") as modelo_cas:
        mejor_modelo_casual = pickle.load(modelo_cas)
    with open("../datos/mejor_modelo_registered.pkl", "rb") as modelo_reg:
        mejor_modelo_registered = pickle.load(modelo_reg)
    # realizamos las predicciones
    conteo_casual = mejor_modelo_casual.predict(df_casual)
    conteo_registered = mejor_modelo_registered.predict(df_registered)
    rmse_casual = 322.167752
    rmse_registered = 540.794549
    cas_min = round(conteo_casual[0] - rmse_casual)
    cas_max = round(conteo_casual[0] + rmse_casual)
    reg_min = round(conteo_registered[0] - rmse_registered)
    reg_max = round(conteo_registered[0] + rmse_registered)
    return round(conteo_casual[0]), cas_min, cas_max, round(conteo_registered[0]), reg_min, reg_max


In [41]:
fecha = '2023-05-25'
temperatura = '20.4'
humedad = '65.5'
viento = '25.3'

In [42]:
casual_pred, casual_min, casual_max, regist_pred, regist_min, regist_max = predictor_bicis(fecha, temperatura, humedad, viento)

In [43]:
print(f'The estimated number of rental bikes for {fecha} for casual users would be between: {casual_min} - {casual_max}. The model estimates: {casual_pred}.')
print(f'The estimated number of rental bikes for {fecha} for registered users would be between: {regist_min} - {regist_max}. The model estimates: {regist_pred}.')

The estimated number of rental bikes for 2023-05-25 for casual users would be between: 430 - 1075. The model estimates: 752.
The estimated number of rental bikes for 2023-05-25 for registered users would be between: 5044 - 6126. The model estimates: 5585.


In [44]:
df2 = pd.DataFrame({'User type':['casual', 'registered'], 'Estimated value':[casual_pred, regist_pred], 'Probable min':[casual_min, regist_min], 'Probable max':[casual_max, regist_max]}, index = [0, 1])
df2

Unnamed: 0,User type,Estimated value,Probable min,Probable max
0,casual,752,430,1075
1,registered,5585,5044,6126
