In [4]:
import pandas as pd 
import numpy as np
import geopandas as gpd
import fiona
import os
import zipfile
import shapely
import matplotlib
from ipywidgets import interact
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.tsa 
from datetime import datetime
import missingno as msno
import re

In [5]:
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
class Databases:
    def __init__(self, csv, anva, smm, metro):
        self.csv = csv
        self.anva = anva 
        self.smm = smm
        self.metro = metro

    def extract_csv(self):
        file = pd.read_csv(f'data/bike_loans/{self.csv}', sep=';')
        file['Fecha_Prestamo'] = pd.to_datetime(file['Fecha_Prestamo'])
        file['Fecha_Devolucion'] = pd.to_datetime(file['Fecha_Devolucion'])
        return file
        
    
    def extract_anva(self):
        bus_routes_AMVA = gpd.GeoDataFrame()
        for root, dirs, files in os.walk(f'data/bus_routes/medellin_bus_routes/{self.anva}'):
            for file in files:
                path = os.path.join(root, file)
                df = gpd.read_file(path, driver='KML')
                bus_routes_AMVA = gpd.GeoDataFrame(pd.concat([bus_routes_AMVA, df], ignore_index=True))
        return bus_routes_AMVA
    
    def extract_smm(self):
        smm_bus_routes = gpd.GeoDataFrame()
        for root, dirs, files in os.walk(f'data/bus_routes/medellin_bus_routes/{self.smm}'):
            for file in files:
                path = os.path.join(root, file)
                df = gpd.read_file(path, driver='KML')
                smm_bus_routes = gpd.GeoDataFrame(pd.concat([smm_bus_routes, df], ignore_index=True))
        return smm_bus_routes
    
    def extract_metro(self):
        metro_routes = gpd.GeoDataFrame()
        for root, dirs, files in os.walk(f'data/medellin_metro/{self.metro}'):
            for file in files:
                path = os.path.join(root, file)
                print(path)
                if path.endswith('.kmz'):
                    kmz = zipfile.ZipFile(path, 'r')
                    kml = kmz.open('doc.kml', 'r')
                    df = gpd.read_file(kml, driver='KML')
                    metro_routes = gpd.GeoDataFrame(pd.concat([metro_routes, df], ignore_index=True))
        return metro_routes

In [6]:
pd.set_option('display.max_rows', 100)

In [7]:
databases = Databases('bike_loans.csv','Kml Ruta AMVA','kml Ruta SMM','medellin_metro_stations')

In [8]:
bikes = databases.extract_csv()

In [9]:
bikes.dtypes #checking feature types

Id_Historico_Prestamo               int64
Id_Usuario                          int64
Id_Tag_Bicicleta                   object
Fecha_Prestamo             datetime64[ns]
Fecha_Devolucion           datetime64[ns]
Id_Aparcamiento_Origen              int64
Posicion_Origen                     int64
Id_Aparcamiento_Destino           float64
Posicion_Destino                  float64
Importe                           float64
DescripcionImporte                float64
operario                           object
Num_Bici_Hist                     float64
dtype: object

In [10]:
bikes_2019 = bikes[(bikes['Fecha_Prestamo'].dt.year == 2019) & (bikes['Fecha_Devolucion'].dt.year == 2019)]
bikes_2019

Unnamed: 0,Id_Historico_Prestamo,Id_Usuario,Id_Tag_Bicicleta,Fecha_Prestamo,Fecha_Devolucion,Id_Aparcamiento_Origen,Posicion_Origen,Id_Aparcamiento_Destino,Posicion_Destino,Importe,DescripcionImporte,operario,Num_Bici_Hist
440864,10948569,20272,00FB73222B5E,2019-02-20 14:08:04,2019-02-20 14:26:08,9,1,43.0,1.0,0.0,,anderson.rojas,1366.0
586995,12896369,135209,002772222B5E,2019-08-22 09:19:51,2019-08-22 09:20:31,44,15,44.0,15.0,0.0,,,976.0
726893,12896264,26536,00094A72BF5B,2019-08-22 09:11:22,2019-08-22 09:20:53,9,1,70.0,1.0,0.0,,seguro.sorelly,2699.0
1028721,10948678,124512,00233C222B5E,2019-02-20 14:15:54,2019-02-20 14:26:28,89,2,56.0,1.0,0.0,,,638.0
2111125,10516625,40806,009F53222B5E,2019-01-02 12:57:01,2019-01-02 13:51:38,12,1,22.0,,0.0,,monitor.jannetv,558.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11767100,14454800,27944,0000F69A5A88,2019-12-31 13:58:55,2019-12-31 14:25:38,40,1,50.0,1.0,0.0,,claudia.marin,541.0
11767101,14454778,108800,00029A222B5E,2019-12-31 13:52:48,2019-12-31 14:40:52,28,1,50.0,1.0,0.0,,claudia.marin,715.0
11767102,14454793,98449,00001619CE0A,2019-12-31 13:55:47,2019-12-31 14:41:39,113,1,28.0,1.0,0.0,,edison.graciano,3497.0
11767103,14454774,129665,000036885A88,2019-12-31 13:51:28,2019-12-31 16:27:12,39,15,41.0,15.0,0.0,,,534.0


hours = ['2019-01-01 00h00m00', ......, '2019-12-31 23h00m00']
bikes = ['000017B00743', ...]
dataframe = all combinations of bike hours
for bike
     for hour
        row = data(max Fecha_Devolucion < hour)
        location = row[Id_Aparcamiento_Destino]
        
end = date_hour/bike/location
groupby hour location - count() -> #bikes per station per hour
hours ->left_join ->nan is 0 bikes
```

In [48]:
date_hours = pd.date_range(start='2019-01-01', end='2019-12-31', freq='H')

In [73]:
bikes_ID = bikes_2019['Id_Tag_Bicicleta'].unique()

In [74]:
len(bikes_ID)

2560

In [75]:
bikes_ID

array(['00FB73222B5E', '002772222B5E', '00094A72BF5B', ...,
       '0000C680CE0A', '0000561ACE0A', '00002614CE0A'], dtype=object)

In [76]:
df_supply = pd.DataFrame()

In [77]:
for bike in bikes_ID:
    for date_hour in date_hours:
        row = max(bikes_2019['Fecha_Devolucion'] < date_hour)
        location = row['Id_Aparcamiento_Destino']

TypeError: 'bool' object is not subscriptable

In [78]:
bikes_2019[bikes_2019['Id_Tag_Bicicleta'] == bikes_ID[1]] 

Unnamed: 0,Id_Historico_Prestamo,Id_Usuario,Id_Tag_Bicicleta,Fecha_Prestamo,Fecha_Devolucion,Id_Aparcamiento_Origen,Posicion_Origen,Id_Aparcamiento_Destino,Posicion_Destino,Importe,DescripcionImporte,operario,Num_Bici_Hist
586995,12896369,135209,002772222B5E,2019-08-22 09:19:51,2019-08-22 09:20:31,44,15,44.0,15.0,0.0,,,976.0
7949314,10566846,101233,002772222B5E,2019-01-11 15:06:19,2019-01-11 15:24:05,46,1,34.0,1.0,0.0,,manuela.arango,976.0
7977765,10523708,40377,002772222B5E,2019-01-03 14:12:29,2019-01-03 15:13:53,22,21,51.0,9.0,0.0,,,976.0
7977886,10524246,29608,002772222B5E,2019-01-03 15:20:32,2019-01-03 15:34:38,51,1,8.0,1.0,0.0,,manuala.alzate,976.0
7983102,10524894,118786,002772222B5E,2019-01-03 16:30:45,2019-01-03 18:19:47,8,1,22.0,11.0,0.0,,,976.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11760499,14446657,134975,002772222B5E,2019-12-30 13:49:48,2019-12-30 13:55:20,56,24,91.0,8.0,0.0,,,976.0
11761351,14447723,136776,002772222B5E,2019-12-30 15:14:21,2019-12-30 15:28:08,64,12,89.0,5.0,0.0,,,976.0
11761487,14447618,96243,002772222B5E,2019-12-30 15:06:05,2019-12-30 15:10:27,91,8,64.0,12.0,0.0,,,976.0
11764080,14450176,115365,002772222B5E,2019-12-30 18:00:11,2019-12-30 18:18:10,89,5,89.0,6.0,0.0,,,976.0
