# ENCODING

In [2]:
# Tratamiento de datos
# ==============================================================================
import numpy as np
import pandas as pd
# Gráficos
# ==============================================================================
import matplotlib.pyplot as plt
from matplotlib import style
import matplotlib.ticker as ticker
import seaborn as sns
# Gráficos
# ==============================================================================
from statsmodels.tools.tools import add_constant
from statsmodels.stats.outliers_influence import variance_inflation_factor
# Asunciones y Preprocesamiento
# ==============================================================================
from scipy import stats
import math
from scipy.stats import levene
import researchpy as rp
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
import itertools
# ANOVA
# ==============================================================================
import statsmodels.api as sm
from statsmodels.formula.api import ols
#Linear Regresion con Sklearn
# ==============================================================================
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
# Configuración warnings
# ==============================================================================
import warnings
warnings.filterwarnings('ignore')
# Tamaño gráficas
# ==============================================================================
plt.rcParams["figure.figsize"] = (10,8)

In [3]:
df = pd.read_pickle("datos/bikes_clean_pickle.pkl")
df.head()

Unnamed: 0,date,season,year,month,holiday,weekday,workingday,weather,temp,hum,windspeed,casual,registered
0,2018-01-01,winter,0,1,holiday,Monday,weekend or holiday,cloudy,14.110847,80.5833,10.749882,331,654
1,2018-01-02,winter,0,1,not holiday,Tuesday,workingday,cloudy,14.902598,69.6087,16.652113,131,670
2,2018-01-03,winter,0,1,not holiday,Wednesday,workingday,clear,8.050924,43.7273,16.636703,120,1229
3,2018-01-04,winter,0,1,not holiday,Thursday,workingday,clear,8.2,59.0435,10.739832,108,1454
4,2018-01-05,winter,0,1,not holiday,Friday,workingday,clear,9.305237,43.6957,12.5223,82,1518


## Para el map de workingday. 

In [4]:
estad = df.describe().T
estad

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
temp,730.0,20.319259,7.506729,2.424346,13.811885,20.465826,26.880615,35.328347
hum,730.0,62.765175,14.237589,0.0,52.0,62.625,72.989575,97.25
windspeed,730.0,12.76362,5.195841,1.500244,9.04165,12.125325,15.625589,34.000021
casual,730.0,849.249315,686.479875,2.0,316.25,717.0,1096.5,3410.0
registered,730.0,3658.757534,1559.758728,20.0,2502.25,3664.5,4783.25,6946.0


In [5]:
df_workingday = df.groupby('workingday')['casual', 'registered'].median()
df_workingday

Unnamed: 0_level_0,casual,registered
workingday,Unnamed: 1_level_1,Unnamed: 2_level_1
weekend or holiday,735.5,3720.5
workingday,711.5,3633.0


Haciendo la proporción entre las medias de las categorías

In [6]:
mapa_wd_casual = {False: 1, True: (df_workingday.iloc[1,0] / df_workingday.iloc[0,0])}
mapa_wd_registered = {False: (df_workingday.iloc[0,1] / df_workingday.iloc[1,1]), True: 1}
print(f'Los valores del mapa de workingday para el modelo de casual sería: {mapa_wd_casual}')
print(f'Los valores del mapa de workingday para el modelo de registered sería: {mapa_wd_registered}')

Los valores del mapa de workingday para el modelo de casual sería: {False: 1, True: 0.9673691366417403}
Los valores del mapa de workingday para el modelo de registered sería: {False: 1.0240847784200386, True: 1}


Considerando la media global para sacar las proporciones

In [7]:
# si ajustamos en base a las medias de cada categoría de 'workingday' respecto a la media global de esa columna paralos casos de 'casual' y 'registered':
mapa_wd_casual = {False: (df_workingday.iloc[0,0] / estad.loc['casual', '50%']), True: (df_workingday.iloc[1,0] / estad.loc['casual', '50%'])}
mapa_wd_registered = {False: (df_workingday.iloc[0,1] / estad.loc['registered', '50%']), True: (df_workingday.iloc[1,1] / estad.loc['registered', '50%'])}
print(f'Los valores del mapa de workingday para el modelo de casual sería: {mapa_wd_casual}')
print(f'Los valores del mapa de workingday para el modelo de registered sería: {mapa_wd_registered}')

Los valores del mapa de workingday para el modelo de casual sería: {False: 1.0258019525801954, True: 0.9923291492329149}
Los valores del mapa de workingday para el modelo de registered sería: {False: 1.0152817574021011, True: 0.9914040114613181}


## Para el map de season

In [8]:
df_season = df.groupby('season')['casual', 'registered'].median()
df_season

Unnamed: 0_level_0,casual,registered
season,Unnamed: 1_level_1,Unnamed: 2_level_1
autumn,536.0,3809.0
spring,863.0,3861.5
summer,1051.5,4110.5
winter,219.5,1855.5


In [9]:
mapa_se_casual = {'autumn': (df_season.iloc[0,0] / estad.loc['casual', '50%']), 'spring': (df_season.iloc[1,0] / estad.loc['casual', '50%']), 'summer': (df_season.iloc[2,0] / estad.loc['casual', '50%']), 'winter': (df_season.iloc[3,0] / estad.loc['casual', '50%'])}
mapa_se_registered = {'autumn': (df_season.iloc[0,1] / estad.loc['registered', '50%']), 'spring': (df_season.iloc[1,1] / estad.loc['registered', '50%']), 'summer': (df_season.iloc[2,1] / estad.loc['registered', '50%']), 'winter': (df_season.iloc[3,1] / estad.loc['registered', '50%'])}
print(f'Los valores del mapa de season para el modelo de casual sería: {mapa_se_casual}')
print(f'Los valores del mapa de season para el modelo de registered sería: {mapa_se_registered}')

Los valores del mapa de season para el modelo de casual sería: {'autumn': 0.7475592747559274, 'spring': 1.203626220362622, 'summer': 1.4665271966527196, 'winter': 0.30613668061366806}
Los valores del mapa de season para el modelo de registered sería: {'autumn': 1.039432391867922, 'spring': 1.053759039432392, 'summer': 1.1217082821667348, 'winter': 0.5063446582071224}


## Para la columna year

In [10]:
df_year = df.groupby('year')['casual', 'registered'].median()
df_year

Unnamed: 0_level_0,casual,registered
year,Unnamed: 1_level_1,Unnamed: 2_level_1
0,614.0,2915.0
1,905.0,4790.0


In [11]:
mapa_yr_casual = {0: (df_year.iloc[0,0] / estad.loc['casual', '50%']), 1: (df_year.iloc[1,0] / estad.loc['casual', '50%'])}
mapa_yr_registered = {0: (df_year.iloc[0,1] / estad.loc['registered', '50%']), 1: (df_year.iloc[1,1] / estad.loc['registered', '50%'])}
print(f'Los valores del mapa de year para el modelo de casual sería: {mapa_yr_casual}')
print(f'Los valores del mapa de year para el modelo de registered sería: {mapa_yr_registered}')

Los valores del mapa de year para el modelo de casual sería: {0: 0.8563458856345886, 1: 1.2622036262203626}
Los valores del mapa de year para el modelo de registered sería: {0: 0.7954700504843771, 1: 1.3071360349297312}


## Para holiday

In [12]:
df_holiday = df.groupby('holiday')['casual', 'registered'].median()
df_holiday

Unnamed: 0_level_0,casual,registered
holiday,Unnamed: 1_level_1,Unnamed: 2_level_1
holiday,1236.5,2774.5
not holiday,711.5,3694.5


In [13]:
mapa_hol_casual = {'holiday': (df_holiday.iloc[0,0] / estad.loc['casual', '50%']), 'not holiday': (df_holiday.iloc[1,0] / estad.loc['casual', '50%'])}
mapa_hol_registered = {'holiday': (df_holiday.iloc[0,1] / estad.loc['registered', '50%']), 'not holiday': (df_holiday.iloc[1,1] / estad.loc['registered', '50%'])}
print(f'Los valores del mapa de holiday para el modelo de casual sería: {mapa_hol_casual}')
print(f'Los valores del mapa de holiday para el modelo de registered sería: {mapa_hol_registered}')

Los valores del mapa de holiday para el modelo de casual sería: {'holiday': 1.7245467224546722, 'not holiday': 0.9923291492329149}
Los valores del mapa de holiday para el modelo de registered sería: {'holiday': 0.7571292127166053, 'not holiday': 1.0081866557511256}


## Para weekday

In [14]:
df_weekday = df.groupby('weekday')['casual', 'registered'].median()
df_weekday

Unnamed: 0_level_0,casual,registered
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1
Friday,537.5,3963.5
Monday,1434.0,3077.0
Saturday,610.0,3834.5
Sunday,876.5,3715.5
Thursday,628.5,4052.5
Tuesday,982.0,2932.0
Wednesday,666.5,3700.0


In [16]:
mapa_day_casual = {'Monday': (df_weekday.iloc[1,0] / estad.loc['casual', '50%']), 'Thursday': (df_weekday.iloc[4,0] / estad.loc['casual', '50%']), 'Wednesday': (df_weekday.iloc[6,0] / estad.loc['casual', '50%']), 'Tuesday': (df_weekday.iloc[5,0] / estad.loc['casual', '50%']), 'Friday': (df_weekday.iloc[0,0] / estad.loc['casual', '50%']), 'Saturday': (df_weekday.iloc[2,0] / estad.loc['casual', '50%']), 'Sunday': (df_weekday.iloc[3,0] / estad.loc['casual', '50%'])}
mapa_day_registered = {'Monday': (df_weekday.iloc[1,1] / estad.loc['casual', '50%']), 'Thursday': (df_weekday.iloc[4,1] / estad.loc['casual', '50%']), 'Wednesday': (df_weekday.iloc[6,1] / estad.loc['casual', '50%']), 'Tuesday': (df_weekday.iloc[5,1] / estad.loc['casual', '50%']), 'Friday': (df_weekday.iloc[0,1] / estad.loc['casual', '50%']), 'Saturday': (df_weekday.iloc[2,1] / estad.loc['casual', '50%']), 'Sunday': (df_weekday.iloc[3,1] / estad.loc['casual', '50%'])}
print(f'Los valores del mapa de weekday para el modelo de casual sería: {mapa_day_casual}')
print(f'Los valores del mapa de weekday para el modelo de registered sería: {mapa_day_registered}')

Los valores del mapa de weekday para el modelo de casual sería: {'Monday': 2.0, 'Thursday': 0.8765690376569037, 'Wednesday': 0.9295676429567643, 'Tuesday': 1.3695955369595536, 'Friday': 0.7496513249651325, 'Saturday': 0.8507670850767085, 'Sunday': 1.2224546722454672}
Los valores del mapa de weekday para el modelo de registered sería: {'Monday': 4.291492329149233, 'Thursday': 5.652022315202231, 'Wednesday': 5.160390516039052, 'Tuesday': 4.089260808926081, 'Friday': 5.527894002789401, 'Saturday': 5.347977684797769, 'Sunday': 5.182008368200837}
