In [1]:
import pandas as pd
import numpy as np

import sidetable as stb

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import skew
from scipy.stats import kurtosistest
from scipy import stats

plt.rcParams['figure.figsize'] = (10, 10)
pd.options.display.max_columns = None

from datetime import datetime

In [2]:
df = pd.read_csv("data/bikes.csv", index_col= 0)
df.drop(["instant"], axis= 1, inplace= True)

In [3]:
df.shape

(730, 15)

In [4]:
df.stb.missing()

Unnamed: 0,missing,total,percent
dteday,0,730,0.0
season,0,730,0.0
yr,0,730,0.0
mnth,0,730,0.0
holiday,0,730,0.0
weekday,0,730,0.0
workingday,0,730,0.0
weathersit,0,730,0.0
temp,0,730,0.0
atemp,0,730,0.0


In [5]:
df.dtypes

dteday         object
season         object
yr              int64
mnth            int64
holiday         int64
weekday         int64
workingday      int64
weathersit      int64
temp          float64
atemp         float64
hum           float64
windspeed     float64
casual          int64
registered      int64
cnt             int64
dtype: object

In [6]:
df[df["season"] == "summer"]
df.sample()

Unnamed: 0,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
716,18-12-2019,winter,1,12,0,2,1,1,16.844153,20.4854,66.625,14.834068,433,5124,5557


# CAMBIO NOMBRE COLUMNAS:

In [7]:
diccionario = {
    "dteday": "fecha",
    "season": "estacion",
    "yr": "año",
    "mnth": "mes", 
    "holiday": "festivo", 
    "weekday": "dia_sem",
    "workingday": "laborable", 
    "weathersit": "prevision_tiempo",
    "temp": "temperatura",
    "atemp": "sensacion_termica", 
    "hum": "humedad", 
    "windspeed": "velocidad_viento", 
    "casual": "clientes_casual", 
    "registered": "clientes_registrados", 
    "cnt":"total_bicis"}

In [8]:
df.rename(columns = diccionario, inplace =True)
df.head()

Unnamed: 0,fecha,estacion,año,mes,festivo,dia_sem,laborable,prevision_tiempo,temperatura,sensacion_termica,humedad,velocidad_viento,clientes_casual,clientes_registrados,total_bicis
0,01-01-2018,spring,0,1,0,6,0,2,14.110847,18.18125,80.5833,10.749882,331,654,985
1,02-01-2018,spring,0,1,0,0,0,2,14.902598,17.68695,69.6087,16.652113,131,670,801
2,03-01-2018,spring,0,1,0,1,1,1,8.050924,9.47025,43.7273,16.636703,120,1229,1349
3,04-01-2018,spring,0,1,0,2,1,1,8.2,10.6061,59.0435,10.739832,108,1454,1562
4,05-01-2018,spring,0,1,0,3,1,1,9.305237,11.4635,43.6957,12.5223,82,1518,1600


# CAMBIO DATOS ESTACIÓN:

In [9]:
# invierno: de 21/12/2018  hasta 19/03/2019
# invierno: de 22/12/2018  hasta 19/03/2019
# primavera_18: 20/03/2018 hasta 20/06/2018
# primavera_19: 20/03/2019 hasta 20/06/2019
# verano_18: 21/06/2018 hasta 22/09/2018
# verano_19: 21/06/2019 hasta 22/09/2019
# otoño_18: 23/09/2018 hasta 20/12/2018
# otoño_19: 23/09/2019 hasta 20/12/2019

#instituto de mecánica celeste de Francia
# (https://www.tutiempo.net/meteorologia/equinoccios-solsticios.html)
# seasons = {"invierno": fecha_invierno, "primavera": fecha_primavera, "verano": fecha_verano, "otoño": fecha_otoño}

In [17]:
df["fecha"] = df["fecha"].astype("datetime64", errors ="ignore")

In [21]:
df['fecha'] = df['fecha'].astype('datetime64[ns]')

ValueError: time data "13-01-2018" doesn't match format "%m-%d-%Y", at position 12. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.

In [23]:
df['fecha']= pd.to_datetime(df['fecha'], format = "%d-%m-%Y")

In [24]:
df.dtypes

fecha                   datetime64[ns]
estacion                        object
año                              int64
mes                              int64
festivo                          int64
dia_sem                          int64
laborable                        int64
prevision_tiempo                 int64
temperatura                    float64
sensacion_termica              float64
humedad                        float64
velocidad_viento               float64
clientes_casual                  int64
clientes_registrados             int64
total_bicis                      int64
dtype: object

In [25]:
def cambio_estacion(fecha):
    inicio_primavera = datetime(fecha.year, 3, 20)
    fin_primavera = datetime(fecha.year, 6, 20)
    inicio_verano = datetime(fecha.year, 6, 21)
    fin_verano = datetime(fecha.year, 9, 22)
    inicio_otoño = datetime(fecha.year, 9, 23)
    fin_otoño = datetime(fecha.year, 12, 20)
    # Comprueba en qué estación del año cae la fecha dada
    if inicio_primavera <= fecha <= fin_primavera:
        return "Primavera"
    elif inicio_verano <= fecha <= fin_verano:
        return "Verano"
    elif inicio_otoño <= fecha <= fin_otoño:
        return "Otoño"
    else:
        return "Invierno"

fecha_2018 = datetime(2018, 7, 15)
estacion_2018 = cambio_estacion(fecha_2018)

fecha_2019 = datetime(2019, 7, 15)
estacion_2019 = cambio_estacion(fecha_2019)

In [26]:
df['estacion'] = df['fecha'].apply(cambio_estacion)