In [1]:
import pandas as pd
import os
import requests
import os
import pickle

from joblib import load
from pipeline import create_time_feature



In [48]:
st = pd.read_csv('data/stations_info.csv')

In [3]:
st

Unnamed: 0,station_id,name,lat,lon,capacity,cluster
0,213688169,Benjamin Godard - Victor Hugo,48.865983,2.275725,35,555
1,516709288,Charonne - Robert et Sonia Delauney,48.855908,2.392571,20,7
2,36255,Toudouze - Clauzel,48.879296,2.337360,21,888
3,37815204,Mairie du 12ème,48.840855,2.387555,30,33
4,100769544,Harpe - Saint-Germain,48.851519,2.343670,45,8
...,...,...,...,...,...,...
1425,1062807847,BNF - Bibliothèque Nationale de France,48.835027,2.376016,42,3
1426,315022587,Malesherbes - Place de la Madeleine,48.870406,2.323244,67,1
1427,34742973,Place Balard,48.836396,2.278419,22,88
1428,478732841,Sebastopol - Rambuteau,48.861818,2.350138,16,7


In [None]:


class velibPredictor():
    '''This class can collect meteo datapoints and make velib/docks predictions for a specified date and hour'''
    
    def __init__(self, date, hour, stations):
        
        self.target_date = pd.Timestamp(f"{date} {hour}", tz="Europe/Brussels")
        self.stations = stations
        
        path = 'velib_prediction/modeling/models/'
        self.models = {st: [load(path+model_dic[st][0]),load(path+model_dic[st][1])] for st in stations}
        
    def get_API_meteo(self, day_shift_nb):
        """Retrieve meteo forecast info via meteo-concept' API, return a pd.DataFrame for a given day"""

       # TOKEN = os.environ["METEO_TOKEN"]
        
        TOKEN = 'b2e61b6debca16466d2155717dfbd66e4174baaf17c9be89c8daae6addcb553f'

        url = f'https://api.meteo-concept.com/api/forecast/daily/{day_shift_nb}/periods?token={TOKEN}&insee=75056'
        rep = requests.get(url)

        assert rep.status_code == 200, f"API call failed with code error {rep.status_code}"
        
        df = pd.DataFrame(rep.json()['forecast']).loc[:,['temp2m','probarain','weather','wind10m','datetime']]
        df['datetime'] = pd.to_datetime(df.datetime)
        return df.set_index('datetime')

    
    def retrieve_meteo_forecast(self):
        
        now = pd.Timestamp.now(tz="Europe/Brussels")
        
        day_shift = (self.target_date - now.floor('d')).days
        assert day_shift <= 13, "No meteo data after 13 days"
        
        if self.target_date.hour < 2:
            df = self.get_API_meteo(day_shift - 1)
            self.meteo_info = df.tail(1)

        else:
            df = self.get_API_meteo(day_shift)
            self.meteo_info = df.loc[(self.target_date - pd.Timedelta(hours=5, minutes=59)) : self.target_date]
       
    
    def predict(self):
        
        self.X = self.meteo_info

        
        return {st : [mod_tup[0].predict(self.X).item(),
                      mod_tup[1].predict(self.X).item()] for st, mod_tup in self.models.items()}
        
        return {
            'ATF_docks_available'    : self.ATF_DOCKS.predict(self.X).item(),
            'ATF_meca_available'     : self.ATF_MECA.predict(self.X).item(),
            'MAIRIE_docks_available' : self.MAIRIE_DOCKS.predict(self.X).item(),
            'MAIRIE_meca_available'  : self.MAIRIE_MECA.predict(self.X).item(),
            'X':self.X
            }
    
    
        

In [4]:
model_dic = {8: 'docks_cl_8.pkl',
             88: 'docks_cl_88.pkl',
             11: 'docks_cl_11.pkl',
             888: 'docks_cl_888.pkl',
             33: 'docks_cl_33.pkl',
             2: 'docks_cl_2.pkl',
             3: 'docks_cl_3.pkl',
             1: 'docks_cl_1.pkl',
             0: 'docks_cl_0.pkl',
             4: 'docks_cl_4.pkl',
             7: 'docks_cl_7.pkl',
             555: 'docks_cl_555.pkl',
             55: 'docks_cl_55.pkl',
             6: 'docks_cl_6.pkl'}

In [47]:
models = {cluster : pickle.load(open(f"models/{mod}","rb")) for cluster,mod in model_dic.items()}

In [66]:
station_df = pd.read_csv('data/stations_info.csv')

In [95]:
class velibPredictor():
    '''This class can collect meteo datapoints and make velib/docks predictions for a specified date and hour'''
    
    def __init__(self):
        
        self.stations = None
        self.date = None
        self.hour = None

        
        self.station_df = pd.read_csv('data/stations_info.csv').set_index('name')
        self.station_cluster_dic = dict(zip(self.station_df.index, self.station_df.cluster))

        station_id_dic = None
        
        self.models = {cluster : pickle.load(open(f"models/{mod}","rb")) for cluster,mod in model_dic.items()}
        
    
    def add_time(self, date, hour):
        
        self.date = date
        self.hour = hour
        
        self.target_date = pd.Timestamp(f"{date} {hour}", tz="Europe/Brussels")
        
        
    def add_stations(self, stations):
        
        self.stations = stations
        self.station_id_dic = {int(self.station_df.at[st,'station_id']) : st for st in self.stations}
        
    
    def get_API_meteo(self, day_shift_nb):
        """Retrieve meteo forecast info via meteo-concept' API, return a pd.DataFrame for a given day"""

       # TOKEN = os.environ["METEO_TOKEN"]
        
        TOKEN = 'b2e61b6debca16466d2155717dfbd66e4174baaf17c9be89c8daae6addcb553f'

        url = f'https://api.meteo-concept.com/api/forecast/daily/{day_shift_nb}/periods?token={TOKEN}&insee=75056'
        rep = requests.get(url)

        assert rep.status_code == 200, f"API call failed with code error {rep.status_code}"
        
        df = pd.DataFrame(rep.json()['forecast']).loc[:,['temp2m','probarain','weather','wind10m','datetime']]
        df['datetime'] = pd.to_datetime(df.datetime)
        return df.set_index('datetime')

    
    def retrieve_meteo_forecast(self):
        
        now = pd.Timestamp.now(tz="Europe/Brussels")
        
        day_shift = (self.target_date - now.floor('d')).days
        assert day_shift <= 13, "No meteo data after 13 days"
        
        if self.target_date.hour < 2:
            df = self.get_API_meteo(day_shift - 1)
            self.meteo_info = df.tail(1)

        else:
            df = self.get_API_meteo(day_shift)
            self.meteo_info = df.loc[(self.target_date - pd.Timedelta(hours=5, minutes=59)) : self.target_date]
            
            
            
    def predict(self):
        
        self.X = self.meteo_info.reset_index().rename({'datetime':'time'}, axis='columns')
        self.X = create_time_feature(self.X)
        
        results = dict()
        
        for st_id, st_name  in self.station_id_dic.items():
            
            X = self.X.assign(station_id = st_id)
            
            #should have dropped "capacity" during training as it doesn't bring any more info to our model
            X['capacity'] = int(self.station_df.at[st_name, 'capacity']) 
            
            X.columns = ['station_id', 'capacity', 'temp2m', 'probarain', 'weather', 'wind10m',
                 'month', 'hour', 'day', 'minute']
            
            cluster = self.station_cluster_dic.get(st_name)
            model = self.models.get(cluster)
            
            results[st_name] = model.predict(X).item()
        
        return results
    

In [96]:
v = velibPredictor()

In [97]:
d = "2022/05/11"
h = "13:30:00"

v.add_time(date = d, hour=h)
v.add_stations(['Benjamin Godard - Victor Hugo', 'Charonne - Robert et Sonia Delauney'])

In [98]:
v.retrieve_meteo_forecast()

In [99]:
v.predict()

{'Benjamin Godard - Victor Hugo': 13.450408935546875,
 'Charonne - Robert et Sonia Delauney': 1.1372238397598267}

In [102]:
station_df.index.unique()

Index(['Benjamin Godard - Victor Hugo', 'Charonne - Robert et Sonia Delauney',
       'Toudouze - Clauzel', 'Mairie du 12ème', 'Harpe - Saint-Germain',
       'Jourdan - Stade Charléty', 'Alibert - Jemmapes',
       'Messine - Place Du Pérou', 'Cassini - Denfert-Rochereau',
       'Saint-Sulpice',
       ...
       'Jonquière - Docteur Paul Brousse', 'Ordener - Poissonniers',
       'Ruisseau - Ordener', 'Clignancourt - Ordener',
       'Gare Saint-Lazare - Cour du Havre',
       'BNF - Bibliothèque Nationale de France',
       'Malesherbes - Place de la Madeleine', 'Place Balard',
       'Sebastopol - Rambuteau', 'Caumartin - Provence'],
      dtype='object', name='name', length=1412)