In [1]:
# Tratamiento de datos
# -----------------------------------------------------------------------
import numpy as np
import pandas as pd

# Gráficos
# ------------------------------------------------------------------------
import matplotlib.pyplot as plt
import seaborn as sns


# Guardar transformers
import pickle


# Preprocesado
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder # para realizar el Label Encoding 
from sklearn.preprocessing import OneHotEncoder  # para realizar el One-Hot Encoding


#  Modelado y evaluación
# -----------------------------------------------------------------------------
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [2]:
df = pd.read_csv("bikes2.csv", index_col = 0)
df.head()

Unnamed: 0,season,holiday,weekday,temp,atemp,hum,windspeed,cnt
0,spring,No,Domingo,14.110847,18.18125,80.5833,10.749882,985
1,spring,No,Lunes,14.902598,17.68695,69.6087,16.652113,801
2,spring,No,Martes,8.050924,9.47025,43.7273,16.636703,1349
3,spring,No,Miercoles,8.2,10.6061,59.0435,10.739832,1562
4,spring,No,Jueves,9.305237,11.4635,43.6957,12.5223,1600


In [3]:
numericas = df.select_dtypes(include = np.number).drop("cnt", axis = 1)
numericas.head()

Unnamed: 0,temp,atemp,hum,windspeed
0,14.110847,18.18125,80.5833,10.749882
1,14.902598,17.68695,69.6087,16.652113
2,8.050924,9.47025,43.7273,16.636703
3,8.2,10.6061,59.0435,10.739832
4,9.305237,11.4635,43.6957,12.5223


**estandarizacion**

In [4]:
scaler = StandardScaler()
scaler.fit(numericas)
numericas_escaladas = pd.DataFrame(scaler.transform(numericas), columns = numericas.columns)

In [5]:
df[numericas_escaladas.columns] = numericas_escaladas
df.head()

Unnamed: 0,season,holiday,weekday,temp,atemp,hum,windspeed,cnt
0,spring,No,Domingo,-0.827613,-0.680818,1.252343,-0.387833,985
1,spring,No,Lunes,-0.722069,-0.741507,0.480996,0.748899,801
2,spring,No,Martes,-1.635432,-1.750344,-1.338073,0.745931,1349
3,spring,No,Miercoles,-1.61556,-1.610886,-0.261577,-0.389769,1562
4,spring,No,Jueves,-1.468226,-1.505615,-1.340294,-0.046477,1600


**encoding**

In [6]:
mapa_season = {"spring": 3, "summer": 4, "autumn": 2, "winter": 1}
mapa_vacaciones = {"No": 0, "Si": 1}


In [7]:
df["season"] = df["season"].map(mapa_season)
df["holiday"] = df["holiday"].map(mapa_vacaciones)

In [8]:
oh = OneHotEncoder()
transformados = oh.fit(df[["weekday"]])
transformados_array = oh.transform(df[["weekday"]])
transformados_df = pd.DataFrame(transformados_array.toarray())
transformados_df.columns =  oh.get_feature_names_out()
df[transformados_df.columns] = transformados_df
df.drop("weekday", axis = 1, inplace = True)
df.head()

Unnamed: 0,season,holiday,temp,atemp,hum,windspeed,cnt,weekday_Domingo,weekday_Jueves,weekday_Lunes,weekday_Martes,weekday_Miercoles,weekday_Sabado,weekday_Viernes
0,3,0,-0.827613,-0.680818,1.252343,-0.387833,985,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3,0,-0.722069,-0.741507,0.480996,0.748899,801,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,3,0,-1.635432,-1.750344,-1.338073,0.745931,1349,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,3,0,-1.61556,-1.610886,-0.261577,-0.389769,1562,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,3,0,-1.468226,-1.505615,-1.340294,-0.046477,1600,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [9]:
X = df.drop("cnt", axis = 1)
y = df["cnt"]

In [10]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=34)


In [11]:
lr = LinearRegression()
lr.fit(x_train, y_train)

In [15]:

#lr.predict(x_test)

In [12]:
df.head()

Unnamed: 0,season,holiday,temp,atemp,hum,windspeed,cnt,weekday_Domingo,weekday_Jueves,weekday_Lunes,weekday_Martes,weekday_Miercoles,weekday_Sabado,weekday_Viernes
0,3,0,-0.827613,-0.680818,1.252343,-0.387833,985,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3,0,-0.722069,-0.741507,0.480996,0.748899,801,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,3,0,-1.635432,-1.750344,-1.338073,0.745931,1349,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,3,0,-1.61556,-1.610886,-0.261577,-0.389769,1562,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,3,0,-1.468226,-1.505615,-1.340294,-0.046477,1600,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [14]:
usuario = {"season": "winter", "holiday": "No", "weekday": "Lunes", "temp": 11, "atemp": 9, "hum": 47, "windspeed":  15}
df_usuario = pd.DataFrame(usuario, index = [0])
df_usuario

Unnamed: 0,season,holiday,weekday,temp,atemp,hum,windspeed
0,winter,No,Lunes,11,9,47,15


In [15]:
df_usu_num = df_usuario.select_dtypes(include = np.number)
df_usu_num

Unnamed: 0,temp,atemp,hum,windspeed
0,11,9,47,15


In [17]:
df_usu_num_est = pd.DataFrame(scaler.transform(df_usu_num), columns = df_usu_num.columns)
df_usu_num_est

Unnamed: 0,temp,atemp,hum,windspeed
0,-1.242305,-1.808081,-1.108052,0.430713


In [18]:
df_usuario[df_usu_num_est.columns] = df_usu_num_est
df_usuario

Unnamed: 0,season,holiday,weekday,temp,atemp,hum,windspeed
0,winter,No,Lunes,-1.242305,-1.808081,-1.108052,0.430713


In [19]:
df_usuario["season"] = df_usuario["season"].map(mapa_season)
df_usuario["holiday"] = df_usuario["holiday"].map(mapa_vacaciones)

In [20]:
df_usuario

Unnamed: 0,season,holiday,weekday,temp,atemp,hum,windspeed
0,1,0,Lunes,-1.242305,-1.808081,-1.108052,0.430713


In [21]:
oh_res = oh.transform(df_usuario[["weekday"]])
oh_res

<1x7 sparse matrix of type '<class 'numpy.float64'>'
	with 1 stored elements in Compressed Sparse Row format>

In [28]:
df_enc_one = pd.DataFrame(oh_res.toarray(), columns = oh.get_feature_names_out())
df_enc_one

Unnamed: 0,weekday_Domingo,weekday_Jueves,weekday_Lunes,weekday_Martes,weekday_Miercoles,weekday_Sabado,weekday_Viernes
0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [29]:
df_usuario[df_enc_one.columns] = df_enc_one
df_usuario

Unnamed: 0,season,holiday,weekday,temp,atemp,hum,windspeed,weekday_Domingo,weekday_Jueves,weekday_Lunes,weekday_Martes,weekday_Miercoles,weekday_Sabado,weekday_Viernes
0,1,0,Lunes,-1.242305,-1.808081,-1.108052,0.430713,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [30]:
df_usuario.drop("weekday", axis = 1, inplace=True)

In [31]:
df_usuario

Unnamed: 0,season,holiday,temp,atemp,hum,windspeed,weekday_Domingo,weekday_Jueves,weekday_Lunes,weekday_Martes,weekday_Miercoles,weekday_Sabado,weekday_Viernes
0,1,0,-1.242305,-1.808081,-1.108052,0.430713,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [32]:
df.head(1)

Unnamed: 0,season,holiday,temp,atemp,hum,windspeed,cnt,weekday_Domingo,weekday_Jueves,weekday_Lunes,weekday_Martes,weekday_Miercoles,weekday_Sabado,weekday_Viernes
0,3,0,-0.827613,-0.680818,1.252343,-0.387833,985,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
lr.predict(df_usuario)

array([2980.18303841])

In [34]:
import pickle

In [36]:
with open ("mejor_modelo.pkl", "wb") as f:
    pickle.dump(lr, f)

In [37]:
with open ("estandarizacion.pkl", "wb") as f:
    pickle.dump(scaler, f)

In [38]:
with open("mejor_modelo.pkl", "rb") as modelo:
    mejor_modelo = pickle.load(modelo)

In [39]:
mejor_modelo.predict(df_usuario)

array([2980.18303841])