In [23]:
!pip install --upgrade pandas-profiling

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [24]:
!pip install mlforecast

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [25]:
import pandas as pd

from pandas_profiling import ProfileReport

On importe le dataset traffic_10lines.parquet qu'on a préalablement déposé dans les fichier du notebook. 

In [26]:
traffic_df = pd.read_parquet('traffic_10lines.parquet')

L'affichage ci-après montre toutes les routes existantes dans les 2 directions possibles. 

In [27]:
(traffic_df
 .groupby(['home_airport', 'paired_airport', 'direction'])
 .agg(date_min=('date', 'min'), date_max=('date', 'max'), pax=('pax', 'sum'))
 .reset_index()
)

Unnamed: 0,home_airport,paired_airport,direction,date_min,date_max,pax
0,LGW,AMS,A,2016-01-01,2023-03-09,2686346.0
1,LGW,AMS,D,2016-01-01,2023-03-09,2686476.0
2,LGW,BCN,A,2016-01-01,2023-03-10,3813240.0
3,LGW,BCN,D,2016-01-01,2023-03-09,3799836.0
4,LIS,OPO,A,2016-01-01,2023-03-09,2819094.0
5,LIS,OPO,D,2016-01-01,2023-03-09,2813651.0
6,LIS,ORY,A,2016-01-01,2023-03-09,3835664.0
7,LIS,ORY,D,2016-01-01,2023-03-09,3860404.0
8,LYS,PIS,A,2017-11-20,2023-03-09,6173.0
9,LYS,PIS,D,2018-01-02,2023-03-09,4178.0


Nous créeons une fonction qui permet de générer un dataset pour toutes les routes existantes sans prendre en compte la direction (Arrivée (A) ou Départ (D)).  Celà nous permettra d'entrainer les modèles. Nous prenons les routes existantes seulement car il n'y a pas un grand intérêt d'afficher des données pour les routes non-existantes. 

In [28]:
def generate_route_df(traffic_df: pd.DataFrame, homeAirport: str, pairedAirport: str) -> pd.DataFrame:
  """Extract route dataframe from traffic dataframe for route from home airport to paired airport

  Args:
  - traffic_df (pd.DataFrame): traffic dataframe
  - homeAirport (str): IATA Code for home airport
  - pairedAirport (str): IATA Code for paired airport

  Returns:
  - pd.DataFrame: aggregated daily PAX traffic on route (home-paired)
  """
  _df = (traffic_df
         .query('home_airport == "{home}" and paired_airport == "{paired}"'.format(home=homeAirport, paired=pairedAirport))
         .groupby(['home_airport', 'paired_airport', 'date'])
         .agg(pax_total=('pax', 'sum'))
         .reset_index()
         )
  return _df

La fonction ci-après permet de créer un graphique de la série pour une route donnée. Plusieurs options sont disponibles, comme la mise en lumière de la période "COVID" ou l'insertion de prévisions...

In [29]:
import datetime
import plotly

import plotly.offline as pyoff
import plotly.graph_objs as go

from plotly.subplots import make_subplots


def draw_ts_multiple(df: pd.DataFrame, v1: str, v2: str=None, prediction: str=None, date: str='date',
              secondary_y=True, covid_zone=False, display=True):
  """Draw times series possibly on two y axis, with COVID period option.

  Args:
  - df (pd.DataFrame): time series dataframe (one line per date, series in columns)
  - v1 (str | list[str]): name or list of names of the series to plot on the first x axis
  - v2 (str): name of the serie to plot on the second y axis (default: None)
  - prediction (str): name of v1 hat (prediction) displayed with a dotted line (default: None)
  - date (str): name of date column for time (default: 'date')
  - secondary_y (bool): use a secondary y axis if v2 is used (default: True)
  - covid_zone (bool): highlight COVID-19 period with a grayed rectangle (default: False)
  - display (bool): display figure otherwise just return the figure (default: True)

  Returns:
  - fig (plotly.graph_objs._figure.Figure): Plotly figure generated

  Notes:
  Make sure to use the semi-colon trick if you don't want to have the figure displayed twice.
  Or use `display=False`.
  """
  if isinstance(v1, str):
    variables = [(v1, 'V1')]
  else:
    variables = [(v, 'V1.{}'.format(i)) for i, v in enumerate(v1)]
  title = '<br>'.join([n + ': '+ v for v, n in variables]) + ('<br>V2: ' + v2) if v2 else '<br>'.join([v + ': '+ n for v, n in variables])
  layout = dict(
    title=title,
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label='1m',
                     step='month',
                     stepmode='backward'),
                dict(count=6,
                     label='6m',
                     step='month',
                     stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(
            visible = True
        ),
        type='date'
    )
  )
  fig = make_subplots(specs=[[{"secondary_y": True}]])
  fig.update_layout(layout)
  for v, name in variables:
    fig.add_trace(go.Scatter(x=df[date], y=df[v], name=name), secondary_y=False)
  if v2:
    fig.add_trace(go.Scatter(x=df[date], y=df[v2], name='V2'), secondary_y=secondary_y)
    fig['layout']['yaxis2']['showgrid'] = False
    fig.update_yaxes(rangemode='tozero')
    fig.update_layout(margin=dict(t=125 + 30 * (len(variables) - 1)))
  if prediction:
    fig.add_trace(go.Scatter(x=df[date], y=df[prediction], name='^V1', line={'dash': 'dot'}), secondary_y=False)

  if covid_zone:
    fig.add_vrect(
        x0=pd.Timestamp("2020-03-01"), x1=pd.Timestamp("2022-01-01"),
        fillcolor="Gray", opacity=0.5,
        layer="below", line_width=0,
    )
  if display:
    pyoff.iplot(fig)
  return fig

Le dataset nous permet de sélectionner les routes existantes et de les mettres en forme comme un dictionnaire Python. Ceci nous permettra d'aller chercher facilement le home et paired airport correspondant. 

In [30]:
routes = (traffic_df
 .drop_duplicates(subset=['home_airport', 'paired_airport'])
 [['home_airport', 'paired_airport']]
 .to_dict(orient='rows')
)
routes





[{'home_airport': 'LGW', 'paired_airport': 'BCN'},
 {'home_airport': 'LGW', 'paired_airport': 'AMS'},
 {'home_airport': 'LIS', 'paired_airport': 'ORY'},
 {'home_airport': 'LIS', 'paired_airport': 'OPO'},
 {'home_airport': 'SSA', 'paired_airport': 'GRU'},
 {'home_airport': 'NTE', 'paired_airport': 'FUE'},
 {'home_airport': 'LYS', 'paired_airport': 'PIS'},
 {'home_airport': 'PNH', 'paired_airport': 'NGB'},
 {'home_airport': 'POP', 'paired_airport': 'JFK'},
 {'home_airport': 'SCL', 'paired_airport': 'LHR'}]

Nous pouvons maintenant afficher toutes les séries pour chaque route existantes (sans s'occuper du départ et de l'arrivée). On évite ainsi d'utiliser des routes non-existantes donnant des séries vides. 

In [31]:
for route in routes: 
  print(route)
  home = route['home_airport']
  paired = route['paired_airport']
  data = generate_route_df(traffic_df, home, paired)
  draw_ts_multiple(data, 'pax_total', covid_zone=True)

{'home_airport': 'LGW', 'paired_airport': 'BCN'}


{'home_airport': 'LGW', 'paired_airport': 'AMS'}


{'home_airport': 'LIS', 'paired_airport': 'ORY'}


{'home_airport': 'LIS', 'paired_airport': 'OPO'}


{'home_airport': 'SSA', 'paired_airport': 'GRU'}


{'home_airport': 'NTE', 'paired_airport': 'FUE'}


{'home_airport': 'LYS', 'paired_airport': 'PIS'}


{'home_airport': 'PNH', 'paired_airport': 'NGB'}


{'home_airport': 'POP', 'paired_airport': 'JFK'}


{'home_airport': 'SCL', 'paired_airport': 'LHR'}


Nous pouvons entrainer un modèle Prophet pour créer des prévisions d'affluence sur toutes les routes existantes. Grâce à cette étape nous pouvons récupérer le nom du modèle, la route qui y est associée et sa performance en terme de RMSE. 

In [32]:
from prophet import Prophet
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics
models = dict()
performances = dict()

for route in routes:
  print(route)
  home = route['home_airport']
  paired = route['paired_airport']
  # Build route traffic dataframe
  _df = generate_route_df(traffic_df, home, paired)
  # Create a model
  _model = Prophet()
  # Fit the model
  _model.fit(_df.rename(columns={'date': 'ds', 'pax_total': 'y'}))
  # Cross validate the model
  _cv_df = cross_validation(_model, horizon='90 days', parallel="processes")
  _perf_df = performance_metrics(_cv_df, rolling_window=1)
  # Save results
  models[(home, paired)] = _model
  performances[home, paired] = _perf_df['rmse'].values[0]

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/y8_f11b3.json


{'home_airport': 'LGW', 'paired_airport': 'BCN'}


DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/gja6fh3z.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=2095', 'data', 'file=/tmp/tmpm4aowhn2/y8_f11b3.json', 'init=/tmp/tmpm4aowhn2/gja6fh3z.json', 'output', 'file=/tmp/tmpm4aowhn2/prophet_model1mih8x_5/prophet_model-20230601093035.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
09:30:35 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
09:30:36 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Making 49 forecasts with cutoffs between 2017-01-10 00:00:00 and 2022-12-10 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fa7fd5032e0>
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/uogqoslp.json
DEBUG:cmdstanpy:input tempfile: 

{'home_airport': 'LGW', 'paired_airport': 'AMS'}


DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/j6dme_qa.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=62968', 'data', 'file=/tmp/tmpm4aowhn2/5lirs3gf.json', 'init=/tmp/tmpm4aowhn2/j6dme_qa.json', 'output', 'file=/tmp/tmpm4aowhn2/prophet_modelr8zcs49l/prophet_model-20230601093057.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
09:30:57 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
09:30:58 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Making 49 forecasts with cutoffs between 2017-01-09 00:00:00 and 2022-12-09 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fa7fd5016c0>
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/j9y9axam.json
DEBUG:cmdstanpy:input tempfile:

{'home_airport': 'LIS', 'paired_airport': 'ORY'}


DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/qdc4c2p7.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=91958', 'data', 'file=/tmp/tmpm4aowhn2/r7_jb55i.json', 'init=/tmp/tmpm4aowhn2/qdc4c2p7.json', 'output', 'file=/tmp/tmpm4aowhn2/prophet_modelk0v8iyvi/prophet_model-20230601093116.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
09:31:16 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
09:31:16 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Making 49 forecasts with cutoffs between 2017-01-09 00:00:00 and 2022-12-09 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fa7fd5038e0>
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/xjd9zrhr.json
DEBUG:cmdstanpy:input tempfile:

{'home_airport': 'LIS', 'paired_airport': 'OPO'}


DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/9oskrqtp.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=23070', 'data', 'file=/tmp/tmpm4aowhn2/apn4asf6.json', 'init=/tmp/tmpm4aowhn2/9oskrqtp.json', 'output', 'file=/tmp/tmpm4aowhn2/prophet_model81cxgvnx/prophet_model-20230601093140.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
09:31:40 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
09:31:41 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Making 49 forecasts with cutoffs between 2017-01-09 00:00:00 and 2022-12-09 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fa7fd514820>
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/1wuc8pli.json
DEBUG:cmdstanpy:input tempfile:

{'home_airport': 'SSA', 'paired_airport': 'GRU'}


09:32:10 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Making 24 forecasts with cutoffs between 2020-02-08 00:00:00 and 2022-12-09 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fa7fd5195a0>
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/a95dqohl.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/z8trwxsy.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/qnzpmm3g.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/3igza70b.json
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=20823', 'data', 'file=/tmp/tmpm4aowhn2/a95dqohl.json', 'init=/tmp/tmpm4aowhn2/qnzpmm3g.json', 'output', 'file=/tmp/tmpm4aowhn2/prophet_modelv4xv5i8g/prophet_model-20230601093210.csv', 'method=optimize', 'algorithm=lbfg

{'home_airport': 'NTE', 'paired_airport': 'FUE'}


DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/bcb742_w.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/rn3ktixf.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/_i12db6l.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/vc23dp97.json
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=63577', 'data', 'file=/tmp/tmpm4aowhn2/rn3ktixf.json', 'init=/tmp/tmpm4aowhn2/_i12db6l.json', 'output', 'file=/tmp/tmpm4aowhn2/prophet_model601fy5q1/prophet_model-20230601093220.csv', 'method=optimize', 'algorithm=newton', 'iter=10000']
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=80807', 'data', 'file=/tmp/tmpm4aowhn2/bcb742_w.json', 'init=/tmp/tmpm4aowhn2/vc23dp97.json', 'output', 'file=/

{'home_airport': 'LYS', 'paired_airport': 'PIS'}


INFO:prophet:n_changepoints greater than number of observations. Using 5.
INFO:prophet:n_changepoints greater than number of observations. Using 7.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/kpcfgpr4.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/z0n4q2ov.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/n90sxydg.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/se2ajea6.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=94058', 'data', 'file=/tmp/tmpm4aowhn2/z0n4q2ov.json', 'init=/tmp/tmpm4aowhn2/n90sxydg.json', 'output', 'file=/tmp/tmpm4aowhn2/prophet_model_x5b1i8n/prophet_model-20230601093223.csv', 'method=optimize', 'algorithm=newton', 'iter=10000']
09:32:23 - cmdstanpy - INFO - Chain [1] start processing
DEBUG:cmdstanpy:idx 0
INFO:cmdstanpy:Chain [1] start processing
DEBUG:cmdstanpy:running CmdStan, num_threa

{'home_airport': 'PNH', 'paired_airport': 'NGB'}


09:33:14 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Making 11 forecasts with cutoffs between 2017-10-02 00:00:00 and 2022-12-07 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fa7fd50dba0>
INFO:prophet:n_changepoints greater than number of observations. Using 0.
INFO:prophet:n_changepoints greater than number of observations. Using 0.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/fnmtup37.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/b5yneshe.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/89x3cznx.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/mb4mhcv9.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=86568', 'data', 'fi

{'home_airport': 'POP', 'paired_airport': 'JFK'}


09:33:19 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Making 32 forecasts with cutoffs between 2019-02-13 00:00:00 and 2022-12-09 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fa7fd518520>
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/k5rrxeky.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/76snmva8.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/zudg5m0_.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=97823', 'data', 'file=/tmp/tmpm4aowhn2/k5rrxeky.json', 'init=/tmp/tmpm4aowhn2/zudg5m0_.json', 'output', 'file=/tmp/tmpm4aowhn2/prophet_model30abcaos/prophet_model-20230601093319.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
09:33:19 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdst

{'home_airport': 'SCL', 'paired_airport': 'LHR'}


INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Making 13 forecasts with cutoffs between 2020-01-04 00:00:00 and 2022-12-07 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fa7fd51ce80>
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/ctg2m7v0.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/moxfw6k1.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm4aowhn2/eqs1yr_g.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=77473', 'data', 'file=/tmp/tmpm4aowhn2/ctg2m7v0.json', 'init=/tmp/tmpm4aowhn2/eqs1yr_g.json', 'output', 'file=/tmp/tmpm4aowhn2/prophet_model7ge23mz0/prophet_model-20230601093332.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
09:33:32 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
DEBUG:cmdstanpy:input te

In [33]:
models

{('LGW', 'BCN'): <prophet.forecaster.Prophet at 0x7fa7e3253ca0>,
 ('LGW', 'AMS'): <prophet.forecaster.Prophet at 0x7fa7e57adf00>,
 ('LIS', 'ORY'): <prophet.forecaster.Prophet at 0x7fa7fe9d91b0>,
 ('LIS', 'OPO'): <prophet.forecaster.Prophet at 0x7fa853fb6170>,
 ('SSA', 'GRU'): <prophet.forecaster.Prophet at 0x7fa7fd55cc70>,
 ('NTE', 'FUE'): <prophet.forecaster.Prophet at 0x7fa7fd53d420>,
 ('LYS', 'PIS'): <prophet.forecaster.Prophet at 0x7fa7e3d1ef20>,
 ('PNH', 'NGB'): <prophet.forecaster.Prophet at 0x7fa7fd5392a0>,
 ('POP', 'JFK'): <prophet.forecaster.Prophet at 0x7fa7e3d00580>,
 ('SCL', 'LHR'): <prophet.forecaster.Prophet at 0x7fa7fd55cf10>}

In [34]:
performances

{('LGW', 'BCN'): 1252.80455530257,
 ('LGW', 'AMS'): 927.4295007587415,
 ('LIS', 'ORY'): 871.9787557259463,
 ('LIS', 'OPO'): 607.0948037131809,
 ('SSA', 'GRU'): 1952.9757983596164,
 ('NTE', 'FUE'): 192.23652122836083,
 ('LYS', 'PIS'): 632.5392618552802,
 ('PNH', 'NGB'): 1562.754400403463,
 ('POP', 'JFK'): 76.63391104712609,
 ('SCL', 'LHR'): 146.91799138474485}

Nous pouvons maintenant faire une prévisions pour k jours (ici k = 365), en itérant sur toutes les routes pour que celà nous entraine le modèle correspondant à la route itérée. 
Enfin, nous pouvons faire le graphique de la prédiction. 

In [35]:
k = 365
for route in routes: 
  print(route)
  home = route['home_airport']
  paired = route['paired_airport']
  data = generate_route_df(traffic_df, home, paired)
  future = models[(home, paired)].make_future_dataframe(periods=k)
  forecast = models[(home, paired)].predict(future)
  forecast = forecast[['ds', 'yhat']].tail(k).rename(columns={"ds": "date", "yhat":"Forecasts"})
  draw_ts_multiple(pd.concat([data[['home_airport', 'date', 'pax_total']].rename(columns={"pax_total": "Passengers"}),forecast]),
                   v1='Passengers', v2='Forecasts', secondary_y=False, covid_zone=True)

{'home_airport': 'LGW', 'paired_airport': 'BCN'}


{'home_airport': 'LGW', 'paired_airport': 'AMS'}


{'home_airport': 'LIS', 'paired_airport': 'ORY'}


{'home_airport': 'LIS', 'paired_airport': 'OPO'}


{'home_airport': 'SSA', 'paired_airport': 'GRU'}


{'home_airport': 'NTE', 'paired_airport': 'FUE'}


{'home_airport': 'LYS', 'paired_airport': 'PIS'}


{'home_airport': 'PNH', 'paired_airport': 'NGB'}


{'home_airport': 'POP', 'paired_airport': 'JFK'}


{'home_airport': 'SCL', 'paired_airport': 'LHR'}


OPTION : Nous allons entrainer 3 autres modèles, toujours en itérant sur toutes les routes. 

In [36]:
#Faire tourner les autres modèles : 

import lightgbm as lgb
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor

from mlforecast import MLForecast
from numba import njit
from window_ops.expanding import expanding_mean
from window_ops.rolling import rolling_mean

tested_models = [
    lgb.LGBMRegressor(),
    xgb.XGBRegressor(),
    RandomForestRegressor(random_state=0),
]

@njit
def rolling_mean_28(x):
    return rolling_mean(x, window_size=28)


fcst = MLForecast(
    models=tested_models,
    freq='D',
    lags=[7, 14, 21, 28],
    lag_transforms={
        1: [expanding_mean],
        7: [rolling_mean_28]
    },
    date_features=['dayofweek'],
    differences=[1],
)



The differences argument is deprecated and will be removed in a future version.
Please pass an `mlforecast.target_transforms.Differences` instance to the `target_transforms` argument instead.



Ici encore nous pouvons, selon le nombre de jours choisi (k) et le modèle (m) montrer un graphique de la prédiction. 

In [37]:
k = 365
m='XGBRegressor'
for route in routes: 
  print(route)
  home = route['home_airport']
  paired = route['paired_airport']
  nixtla_model = fcst.fit(generate_route_df(traffic_df, home, paired).drop(columns=['paired_airport']),
                        id_col='home_airport', time_col='date', target_col='pax_total')
  draw_ts_multiple((pd.concat([generate_route_df(traffic_df, home, paired).drop(columns=['paired_airport']),
                             nixtla_model.predict(k)])),
                 v1='pax_total', v2=m, secondary_y=False, covid_zone=True);

{'home_airport': 'LGW', 'paired_airport': 'BCN'}


{'home_airport': 'LGW', 'paired_airport': 'AMS'}


{'home_airport': 'LIS', 'paired_airport': 'ORY'}


{'home_airport': 'LIS', 'paired_airport': 'OPO'}


{'home_airport': 'SSA', 'paired_airport': 'GRU'}


{'home_airport': 'NTE', 'paired_airport': 'FUE'}


{'home_airport': 'LYS', 'paired_airport': 'PIS'}


{'home_airport': 'PNH', 'paired_airport': 'NGB'}


{'home_airport': 'POP', 'paired_airport': 'JFK'}


{'home_airport': 'SCL', 'paired_airport': 'LHR'}
