In [39]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'/home/ubuntu/varios/skforecast'

In [40]:
import pandas as pd
import numpy as np
from typing import Union, List, Tuple, Dict, Any, Optional
from sklearn.linear_model import LinearRegression
from skforecast.utils import preprocess_y
from skforecast.utils import preprocess_exog
from skforecast.utils import align_series_and_exog_multiseries
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries

import pytest
import ipytest
from pprint import pprint

In [41]:
series = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
series.index = pd.date_range(start='2020-01-01', periods=len(series), freq='D')
display(series)

ipytest.autoconfig()

@pytest.mark.parametrize("series", [
    [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
    "1, 2, 3, 4, 5, 6, 7, 8, 9, 10"
])
def test_check_preprocess_series_raises_TypeError_if_series_is_not_a_pandas_dataframe_series_or_dict(series):
    forecaster = ForecasterAutoregMultiSeries(
                    regressor = LinearRegression(),
                    lags = 10
                )
    msg_error = (
        f"`series` must be a pandas DataFrame or a dict of DataFrames or Series. "
        f"Got {type(series)}."
    )
    with pytest.raises(TypeError, match=msg_error):
        forecaster._check_preprocess_series(series=series)


ipytest.run('-qq')

2020-01-01     1
2020-01-02     2
2020-01-03     3
2020-01-04     4
2020-01-05     5
2020-01-06     6
2020-01-07     7
2020-01-08     8
2020-01-09     9
2020-01-10    10
Freq: D, dtype: int64

[32m.[0m[32m.[0m[32m.[0m[32m                                                                                          [100%][0m


<ExitCode.OK: 0>

In [92]:
# Series is a data frame and exog is a data frame
# =============================================================================
series = pd.DataFrame(
    np.random.randint(0, 100, size=(10, 3)),
    columns=[f"series_{i}" for i in range(1, 4)],
    index=pd.date_range(start='2020-01-01', periods=10, freq='D')
)
display(series)

series_dict = series.to_dict('series')
pprint(series_dict)

exog = series.copy()
if exog is not None:
    exog_dict = exog.to_dict('series')
else:
    exog_dict = {series: None for series in series_dict.keys()}
display(exog)
pprint(exog_dict)


series_dict_aligned, exog_dict_aligned = align_series_and_exog_multiseries(
    series_dict          = series_dict,
    input_series_is_dict = isinstance(series, dict),
    exog_dict            = exog_dict
)

for k in series_dict_aligned.keys():
    assert (series_dict_aligned[k] == series_dict[k]).all()
    assert (exog_dict_aligned[k] == exog_dict[k]).all()

aligned_inputs = [
            [series_dict[k], exog_dict[k]]
            for k
            in series_dict.keys() & exog_dict.keys()
        ]

for series_aligned, exog_aligned in aligned_inputs:
    display(series_aligned)
    display(exog_aligned)


Unnamed: 0,series_1,series_2,series_3
2020-01-01,89,40,18
2020-01-02,80,15,40
2020-01-03,3,72,64
2020-01-04,86,38,14
2020-01-05,57,84,22
2020-01-06,19,31,11
2020-01-07,77,58,80
2020-01-08,11,20,1
2020-01-09,84,47,87
2020-01-10,51,91,28


{'series_1': 2020-01-01    89
2020-01-02    80
2020-01-03     3
2020-01-04    86
2020-01-05    57
2020-01-06    19
2020-01-07    77
2020-01-08    11
2020-01-09    84
2020-01-10    51
Freq: D, Name: series_1, dtype: int64,
 'series_2': 2020-01-01    40
2020-01-02    15
2020-01-03    72
2020-01-04    38
2020-01-05    84
2020-01-06    31
2020-01-07    58
2020-01-08    20
2020-01-09    47
2020-01-10    91
Freq: D, Name: series_2, dtype: int64,
 'series_3': 2020-01-01    18
2020-01-02    40
2020-01-03    64
2020-01-04    14
2020-01-05    22
2020-01-06    11
2020-01-07    80
2020-01-08     1
2020-01-09    87
2020-01-10    28
Freq: D, Name: series_3, dtype: int64}


Unnamed: 0,series_1,series_2,series_3
2020-01-01,89,40,18
2020-01-02,80,15,40
2020-01-03,3,72,64
2020-01-04,86,38,14
2020-01-05,57,84,22
2020-01-06,19,31,11
2020-01-07,77,58,80
2020-01-08,11,20,1
2020-01-09,84,47,87
2020-01-10,51,91,28


{'series_1': 2020-01-01    89
2020-01-02    80
2020-01-03     3
2020-01-04    86
2020-01-05    57
2020-01-06    19
2020-01-07    77
2020-01-08    11
2020-01-09    84
2020-01-10    51
Freq: D, Name: series_1, dtype: int64,
 'series_2': 2020-01-01    40
2020-01-02    15
2020-01-03    72
2020-01-04    38
2020-01-05    84
2020-01-06    31
2020-01-07    58
2020-01-08    20
2020-01-09    47
2020-01-10    91
Freq: D, Name: series_2, dtype: int64,
 'series_3': 2020-01-01    18
2020-01-02    40
2020-01-03    64
2020-01-04    14
2020-01-05    22
2020-01-06    11
2020-01-07    80
2020-01-08     1
2020-01-09    87
2020-01-10    28
Freq: D, Name: series_3, dtype: int64}


2020-01-01    40
2020-01-02    15
2020-01-03    72
2020-01-04    38
2020-01-05    84
2020-01-06    31
2020-01-07    58
2020-01-08    20
2020-01-09    47
2020-01-10    91
Freq: D, Name: series_2, dtype: int64

2020-01-01    40
2020-01-02    15
2020-01-03    72
2020-01-04    38
2020-01-05    84
2020-01-06    31
2020-01-07    58
2020-01-08    20
2020-01-09    47
2020-01-10    91
Freq: D, Name: series_2, dtype: int64

2020-01-01    89
2020-01-02    80
2020-01-03     3
2020-01-04    86
2020-01-05    57
2020-01-06    19
2020-01-07    77
2020-01-08    11
2020-01-09    84
2020-01-10    51
Freq: D, Name: series_1, dtype: int64

2020-01-01    89
2020-01-02    80
2020-01-03     3
2020-01-04    86
2020-01-05    57
2020-01-06    19
2020-01-07    77
2020-01-08    11
2020-01-09    84
2020-01-10    51
Freq: D, Name: series_1, dtype: int64

2020-01-01    18
2020-01-02    40
2020-01-03    64
2020-01-04    14
2020-01-05    22
2020-01-06    11
2020-01-07    80
2020-01-08     1
2020-01-09    87
2020-01-10    28
Freq: D, Name: series_3, dtype: int64

2020-01-01    18
2020-01-02    40
2020-01-03    64
2020-01-04    14
2020-01-05    22
2020-01-06    11
2020-01-07    80
2020-01-08     1
2020-01-09    87
2020-01-10    28
Freq: D, Name: series_3, dtype: int64

In [93]:
# Series is a data frame and exog is dict dataframe already aligned
# =============================================================================
series = pd.DataFrame(
    np.random.randint(0, 100, size=(10, 3)),
    columns=[f"series_{i}" for i in range(1, 4)],
    index=pd.date_range(start='2020-01-01', periods=10, freq='D')
)
display(series)
print("series")
print("------")
display(series)
series_dict = series.to_dict('series')

exog = {
    'series_1': pd.DataFrame(
                    np.random.randint(0, 100, size=(10, 3)),
                    columns=[f"exog_{i}" for i in range(1, 4)],
                    index=pd.date_range(start='2020-01-01', periods=10, freq='D')
                ),
    'series_2': pd.DataFrame(
                    np.random.randint(0, 100, size=(10, 2)),
                    columns=[f"exog_{i}" for i in range(1, 3)],
                    index=pd.date_range(start='2020-01-01', periods=10, freq='D')
                ),
    'series_3': pd.DataFrame(
                    np.random.randint(0, 100, size=(10)),
                    columns=[f"exog_{i}" for i in range(1, 2)],
                    index=pd.date_range(start='2020-01-01', periods=10, freq='D'),
                    
                )          
}

exog_dict = exog.copy()

print("")
print("Series dict")
print("-----------")
pprint(series_dict)
print("")
print("Exog dict")
print("-----------")
pprint(exog_dict)
print("")

series_dict_aligned, exog_dict_aligned = align_series_and_exog_multiseries(
    series_dict          = series_dict,
    input_series_is_dict = isinstance(series, dict),
    exog_dict            = exog_dict
)

print("")
print("Series dict aligned")
print("-----------")
pprint(series_dict_aligned)
print("")
print("Exog dict aligned")
print("-----------")
pprint(exog_dict_aligned)
print("")

for k in series_dict_aligned.keys():
    assert series_dict_aligned[k].index.equals(exog_dict_aligned[k].index)

aligned_inputs = [
            [series_dict[k], exog_dict[k]]
            for k
            in series_dict.keys() & exog_dict.keys()
        ]

for series_aligned, exog_aligned in aligned_inputs:
    display(series_aligned)
    display(exog_aligned)


Unnamed: 0,series_1,series_2,series_3
2020-01-01,73,10,48
2020-01-02,51,82,97
2020-01-03,6,81,28
2020-01-04,19,92,10
2020-01-05,56,89,24
2020-01-06,7,76,38
2020-01-07,88,86,88
2020-01-08,45,57,8
2020-01-09,64,28,55
2020-01-10,57,19,69


series
------


Unnamed: 0,series_1,series_2,series_3
2020-01-01,73,10,48
2020-01-02,51,82,97
2020-01-03,6,81,28
2020-01-04,19,92,10
2020-01-05,56,89,24
2020-01-06,7,76,38
2020-01-07,88,86,88
2020-01-08,45,57,8
2020-01-09,64,28,55
2020-01-10,57,19,69



Series dict
-----------
{'series_1': 2020-01-01    73
2020-01-02    51
2020-01-03     6
2020-01-04    19
2020-01-05    56
2020-01-06     7
2020-01-07    88
2020-01-08    45
2020-01-09    64
2020-01-10    57
Freq: D, Name: series_1, dtype: int64,
 'series_2': 2020-01-01    10
2020-01-02    82
2020-01-03    81
2020-01-04    92
2020-01-05    89
2020-01-06    76
2020-01-07    86
2020-01-08    57
2020-01-09    28
2020-01-10    19
Freq: D, Name: series_2, dtype: int64,
 'series_3': 2020-01-01    48
2020-01-02    97
2020-01-03    28
2020-01-04    10
2020-01-05    24
2020-01-06    38
2020-01-07    88
2020-01-08     8
2020-01-09    55
2020-01-10    69
Freq: D, Name: series_3, dtype: int64}

Exog dict
-----------
{'series_1':             exog_1  exog_2  exog_3
2020-01-01      92      64      74
2020-01-02      91      25      98
2020-01-03      45      76      83
2020-01-04      61      40      94
2020-01-05      94       4      21
2020-01-06      65      12      24
2020-01-07       1       7  

2020-01-01    10
2020-01-02    82
2020-01-03    81
2020-01-04    92
2020-01-05    89
2020-01-06    76
2020-01-07    86
2020-01-08    57
2020-01-09    28
2020-01-10    19
Freq: D, Name: series_2, dtype: int64

Unnamed: 0,exog_1,exog_2
2020-01-01,44,48
2020-01-02,59,80
2020-01-03,77,93
2020-01-04,94,98
2020-01-05,18,98
2020-01-06,53,83
2020-01-07,11,73
2020-01-08,69,33
2020-01-09,54,78
2020-01-10,12,27


2020-01-01    73
2020-01-02    51
2020-01-03     6
2020-01-04    19
2020-01-05    56
2020-01-06     7
2020-01-07    88
2020-01-08    45
2020-01-09    64
2020-01-10    57
Freq: D, Name: series_1, dtype: int64

Unnamed: 0,exog_1,exog_2,exog_3
2020-01-01,92,64,74
2020-01-02,91,25,98
2020-01-03,45,76,83
2020-01-04,61,40,94
2020-01-05,94,4,21
2020-01-06,65,12,24
2020-01-07,1,7,57
2020-01-08,13,90,98
2020-01-09,12,85,61
2020-01-10,43,26,32


2020-01-01    48
2020-01-02    97
2020-01-03    28
2020-01-04    10
2020-01-05    24
2020-01-06    38
2020-01-07    88
2020-01-08     8
2020-01-09    55
2020-01-10    69
Freq: D, Name: series_3, dtype: int64

Unnamed: 0,exog_1
2020-01-01,7
2020-01-02,56
2020-01-03,15
2020-01-04,8
2020-01-05,51
2020-01-06,94
2020-01-07,11
2020-01-08,96
2020-01-09,40
2020-01-10,28


In [94]:
# Series is a data frame and exog is dict dataframe already aligned
# =============================================================================
series = pd.DataFrame(
    np.random.randint(0, 100, size=(10, 3)),
    columns=[f"series_{i}" for i in range(1, 4)],
    index=pd.date_range(start='2020-01-01', periods=10, freq='D')
)
display(series)
series = series.to_dict('series')
print("series")
print("------")
display(series)
series_dict = series.copy()

exog = {
    'series_1': pd.DataFrame(
                    np.random.randint(0, 100, size=(5, 3)),
                    columns=[f"exog_{i}" for i in range(1, 4)],
                    index=pd.date_range(start='2020-01-01', periods=5, freq='D')
                ),
    'series_2': pd.DataFrame(
                    np.random.randint(0, 100, size=(5, 2)),
                    columns=[f"exog_{i}" for i in range(1, 3)],
                    index=pd.date_range(start='2020-01-01', periods=5, freq='D')
                ),
    'series_3': pd.DataFrame(
                    np.random.randint(0, 100, size=(5)),
                    columns=[f"exog_{i}" for i in range(1, 2)],
                    index=pd.date_range(start='2020-01-01', periods=5, freq='D'),
                    
                )          
}

exog_dict = exog.copy()

print("")
print("Series dict")
print("-----------")
pprint(series_dict)
print("")
print("Exog dict")
print("-----------")
pprint(exog_dict)
print("")

series_dict_aligned, exog_dict_aligned = align_series_and_exog_multiseries(
    series_dict          = series_dict,
    input_series_is_dict = isinstance(series, dict),
    exog_dict            = exog_dict
)

print("")
print("Series dict aligned")
print("-----------")
pprint(series_dict_aligned)
print("")
print("Exog dict aligned")
print("-----------")
pprint(exog_dict_aligned)
print("")

for k in series_dict_aligned.keys():
    assert series_dict_aligned[k].index.equals(exog_dict_aligned[k].index)


aligned_inputs = [
            [series_dict[k], exog_dict[k]]
            for k
            in series_dict.keys() & exog_dict.keys()
        ]

for series_aligned, exog_aligned in aligned_inputs:
    display(series_aligned)
    display(exog_aligned)

Unnamed: 0,series_1,series_2,series_3
2020-01-01,45,84,20
2020-01-02,52,34,65
2020-01-03,56,28,16
2020-01-04,70,43,83
2020-01-05,53,50,27
2020-01-06,18,1,59
2020-01-07,34,84,19
2020-01-08,97,47,21
2020-01-09,23,94,32
2020-01-10,90,68,19


series
------


{'series_1': 2020-01-01    45
 2020-01-02    52
 2020-01-03    56
 2020-01-04    70
 2020-01-05    53
 2020-01-06    18
 2020-01-07    34
 2020-01-08    97
 2020-01-09    23
 2020-01-10    90
 Freq: D, Name: series_1, dtype: int64,
 'series_2': 2020-01-01    84
 2020-01-02    34
 2020-01-03    28
 2020-01-04    43
 2020-01-05    50
 2020-01-06     1
 2020-01-07    84
 2020-01-08    47
 2020-01-09    94
 2020-01-10    68
 Freq: D, Name: series_2, dtype: int64,
 'series_3': 2020-01-01    20
 2020-01-02    65
 2020-01-03    16
 2020-01-04    83
 2020-01-05    27
 2020-01-06    59
 2020-01-07    19
 2020-01-08    21
 2020-01-09    32
 2020-01-10    19
 Freq: D, Name: series_3, dtype: int64}


Series dict
-----------
{'series_1': 2020-01-01    45
2020-01-02    52
2020-01-03    56
2020-01-04    70
2020-01-05    53
2020-01-06    18
2020-01-07    34
2020-01-08    97
2020-01-09    23
2020-01-10    90
Freq: D, Name: series_1, dtype: int64,
 'series_2': 2020-01-01    84
2020-01-02    34
2020-01-03    28
2020-01-04    43
2020-01-05    50
2020-01-06     1
2020-01-07    84
2020-01-08    47
2020-01-09    94
2020-01-10    68
Freq: D, Name: series_2, dtype: int64,
 'series_3': 2020-01-01    20
2020-01-02    65
2020-01-03    16
2020-01-04    83
2020-01-05    27
2020-01-06    59
2020-01-07    19
2020-01-08    21
2020-01-09    32
2020-01-10    19
Freq: D, Name: series_3, dtype: int64}

Exog dict
-----------
{'series_1':             exog_1  exog_2  exog_3
2020-01-01      42      26      47
2020-01-02      37      56      31
2020-01-03      60      16       1
2020-01-04      61       0      67
2020-01-05       2      83      37,
 'series_2':             exog_1  exog_2
2020-01-01      60    



2020-01-01    84
2020-01-02    34
2020-01-03    28
2020-01-04    43
2020-01-05    50
2020-01-06     1
2020-01-07    84
2020-01-08    47
2020-01-09    94
2020-01-10    68
Freq: D, Name: series_2, dtype: int64

Unnamed: 0,exog_1,exog_2
2020-01-01,60.0,91.0
2020-01-02,98.0,63.0
2020-01-03,72.0,49.0
2020-01-04,78.0,79.0
2020-01-05,87.0,86.0
2020-01-06,,
2020-01-07,,
2020-01-08,,
2020-01-09,,
2020-01-10,,


2020-01-01    45
2020-01-02    52
2020-01-03    56
2020-01-04    70
2020-01-05    53
2020-01-06    18
2020-01-07    34
2020-01-08    97
2020-01-09    23
2020-01-10    90
Freq: D, Name: series_1, dtype: int64

Unnamed: 0,exog_1,exog_2,exog_3
2020-01-01,42.0,26.0,47.0
2020-01-02,37.0,56.0,31.0
2020-01-03,60.0,16.0,1.0
2020-01-04,61.0,0.0,67.0
2020-01-05,2.0,83.0,37.0
2020-01-06,,,
2020-01-07,,,
2020-01-08,,,
2020-01-09,,,
2020-01-10,,,


2020-01-01    20
2020-01-02    65
2020-01-03    16
2020-01-04    83
2020-01-05    27
2020-01-06    59
2020-01-07    19
2020-01-08    21
2020-01-09    32
2020-01-10    19
Freq: D, Name: series_3, dtype: int64

Unnamed: 0,exog_1
2020-01-01,54.0
2020-01-02,51.0
2020-01-03,30.0
2020-01-04,85.0
2020-01-05,57.0
2020-01-06,
2020-01-07,
2020-01-08,
2020-01-09,
2020-01-10,


In [95]:
# Series is a data frame and exog is dict dataframe already aligned
# =============================================================================
series = pd.DataFrame(
    np.random.randint(0, 100, size=(10, 3)),
    columns=[f"series_{i}" for i in range(1, 4)],
    index=pd.date_range(start='2020-01-01', periods=10, freq='D')
)
series.iloc[:3, :] = np.nan
series.iloc[-2:, :] = np.nan
display(series)
series = series.to_dict('series')
print("series")
print("------")
display(series)
series_dict = series.copy()

exog = {
    'series_1': pd.DataFrame(
                    np.random.randint(0, 100, size=(5, 3)),
                    columns=[f"exog_{i}" for i in range(1, 4)],
                    index=pd.date_range(start='2020-01-01', periods=5, freq='D')
                ),
    'series_2': pd.DataFrame(
                    np.random.randint(0, 100, size=(5, 2)),
                    columns=[f"exog_{i}" for i in range(1, 3)],
                    index=pd.date_range(start='2020-01-01', periods=5, freq='D')
                ),
    'series_3': pd.DataFrame(
                    np.random.randint(0, 100, size=(5)),
                    columns=[f"exog_{i}" for i in range(1, 2)],
                    index=pd.date_range(start='2020-01-01', periods=5, freq='D'),
                    
                )          
}

exog_dict = exog.copy()

print("")
print("Series dict")
print("-----------")
pprint(series_dict)
print("")
print("Exog dict")
print("-----------")
pprint(exog_dict)
print("")

series_dict_aligned, exog_dict_aligned = align_series_and_exog_multiseries(
    series_dict          = series_dict,
    input_series_is_dict = isinstance(series, dict),
    exog_dict            = exog_dict
)

print("")
print("Series dict aligned")
print("-----------")
pprint(series_dict_aligned)
print("")
print("Exog dict aligned")
print("-----------")
pprint(exog_dict_aligned)
print("")

for k in series_dict_aligned.keys():
    assert series_dict_aligned[k].index.equals(exog_dict_aligned[k].index)

aligned_inputs = [
            [series_dict[k], exog_dict[k]]
            for k
            in series_dict.keys() & exog_dict.keys()
        ]

for series_aligned, exog_aligned in aligned_inputs:
    display(series_aligned)
    display(exog_aligned)

Unnamed: 0,series_1,series_2,series_3
2020-01-01,,,
2020-01-02,,,
2020-01-03,,,
2020-01-04,82.0,37.0,45.0
2020-01-05,63.0,13.0,26.0
2020-01-06,52.0,11.0,95.0
2020-01-07,66.0,74.0,47.0
2020-01-08,99.0,16.0,7.0
2020-01-09,,,
2020-01-10,,,


series
------


{'series_1': 2020-01-01     NaN
 2020-01-02     NaN
 2020-01-03     NaN
 2020-01-04    82.0
 2020-01-05    63.0
 2020-01-06    52.0
 2020-01-07    66.0
 2020-01-08    99.0
 2020-01-09     NaN
 2020-01-10     NaN
 Freq: D, Name: series_1, dtype: float64,
 'series_2': 2020-01-01     NaN
 2020-01-02     NaN
 2020-01-03     NaN
 2020-01-04    37.0
 2020-01-05    13.0
 2020-01-06    11.0
 2020-01-07    74.0
 2020-01-08    16.0
 2020-01-09     NaN
 2020-01-10     NaN
 Freq: D, Name: series_2, dtype: float64,
 'series_3': 2020-01-01     NaN
 2020-01-02     NaN
 2020-01-03     NaN
 2020-01-04    45.0
 2020-01-05    26.0
 2020-01-06    95.0
 2020-01-07    47.0
 2020-01-08     7.0
 2020-01-09     NaN
 2020-01-10     NaN
 Freq: D, Name: series_3, dtype: float64}


Series dict
-----------
{'series_1': 2020-01-01     NaN
2020-01-02     NaN
2020-01-03     NaN
2020-01-04    82.0
2020-01-05    63.0
2020-01-06    52.0
2020-01-07    66.0
2020-01-08    99.0
2020-01-09     NaN
2020-01-10     NaN
Freq: D, Name: series_1, dtype: float64,
 'series_2': 2020-01-01     NaN
2020-01-02     NaN
2020-01-03     NaN
2020-01-04    37.0
2020-01-05    13.0
2020-01-06    11.0
2020-01-07    74.0
2020-01-08    16.0
2020-01-09     NaN
2020-01-10     NaN
Freq: D, Name: series_2, dtype: float64,
 'series_3': 2020-01-01     NaN
2020-01-02     NaN
2020-01-03     NaN
2020-01-04    45.0
2020-01-05    26.0
2020-01-06    95.0
2020-01-07    47.0
2020-01-08     7.0
2020-01-09     NaN
2020-01-10     NaN
Freq: D, Name: series_3, dtype: float64}

Exog dict
-----------
{'series_1':             exog_1  exog_2  exog_3
2020-01-01      16      68       8
2020-01-02      88      67      12
2020-01-03       9      54      76
2020-01-04      49      40      91
2020-01-05      34      72      



2020-01-04    37.0
2020-01-05    13.0
2020-01-06    11.0
2020-01-07    74.0
2020-01-08    16.0
Freq: D, Name: series_2, dtype: float64

Unnamed: 0,exog_1,exog_2
2020-01-04,42.0,66.0
2020-01-05,7.0,98.0
2020-01-06,,
2020-01-07,,
2020-01-08,,


2020-01-04    82.0
2020-01-05    63.0
2020-01-06    52.0
2020-01-07    66.0
2020-01-08    99.0
Freq: D, Name: series_1, dtype: float64

Unnamed: 0,exog_1,exog_2,exog_3
2020-01-04,49.0,40.0,91.0
2020-01-05,34.0,72.0,66.0
2020-01-06,,,
2020-01-07,,,
2020-01-08,,,


2020-01-04    45.0
2020-01-05    26.0
2020-01-06    95.0
2020-01-07    47.0
2020-01-08     7.0
Freq: D, Name: series_3, dtype: float64

Unnamed: 0,exog_1
2020-01-04,66.0
2020-01-05,28.0
2020-01-06,
2020-01-07,
2020-01-08,


In [96]:
# Series is a data frame and exog is dict dataframe already aligned
# =============================================================================
series = pd.DataFrame(
    np.random.randint(0, 100, size=(10, 3)),
    columns=[f"series_{i}" for i in range(1, 4)],
    index=pd.date_range(start='2020-01-01', periods=10, freq='D')
)
series.iloc[:3, :1] = np.nan
series.iloc[-2:, :] = np.nan
display(series)
series = series.to_dict('series')
print("series")
print("------")
display(series)
series_dict = series.copy()

exog = {
    'series_1': pd.DataFrame(
                    np.random.randint(0, 100, size=(5, 3)),
                    columns=[f"exog_{i}" for i in range(1, 4)],
                    index=pd.date_range(start='2020-01-01', periods=5, freq='D')
                ),
    'series_2': pd.DataFrame(
                    np.random.randint(0, 100, size=(5, 2)),
                    columns=[f"exog_{i}" for i in range(1, 3)],
                    index=pd.date_range(start='2020-01-01', periods=5, freq='D')
                ),
    'series_3': pd.DataFrame(
                    np.random.randint(0, 100, size=(5)),
                    columns=[f"exog_{i}" for i in range(1, 2)],
                    index=pd.date_range(start='2020-01-01', periods=5, freq='D'),
                    
                )          
}

exog_dict = exog.copy()

print("")
print("Series dict")
print("-----------")
pprint(series_dict)
print("")
print("Exog dict")
print("-----------")
pprint(exog_dict)
print("")

series_dict_aligned, exog_dict_aligned = align_series_and_exog_multiseries(
    series_dict          = series_dict,
    input_series_is_dict = isinstance(series, dict),
    exog_dict            = exog_dict
)

print("")
print("Series dict aligned")
print("-----------")
pprint(series_dict_aligned)
print("")
print("Exog dict aligned")
print("-----------")
pprint(exog_dict_aligned)
print("")

for k in series_dict_aligned.keys():
    assert series_dict_aligned[k].index.equals(exog_dict_aligned[k].index)

aligned_inputs = [
            [series_dict[k], exog_dict[k]]
            for k
            in series_dict.keys() & exog_dict.keys()
        ]

for series_aligned, exog_aligned in aligned_inputs:
    display(series_aligned)
    display(exog_aligned)

Unnamed: 0,series_1,series_2,series_3
2020-01-01,,91.0,39.0
2020-01-02,,62.0,79.0
2020-01-03,,39.0,99.0
2020-01-04,21.0,41.0,4.0
2020-01-05,10.0,78.0,90.0
2020-01-06,90.0,45.0,70.0
2020-01-07,30.0,69.0,13.0
2020-01-08,28.0,84.0,8.0
2020-01-09,,,
2020-01-10,,,


series
------


{'series_1': 2020-01-01     NaN
 2020-01-02     NaN
 2020-01-03     NaN
 2020-01-04    21.0
 2020-01-05    10.0
 2020-01-06    90.0
 2020-01-07    30.0
 2020-01-08    28.0
 2020-01-09     NaN
 2020-01-10     NaN
 Freq: D, Name: series_1, dtype: float64,
 'series_2': 2020-01-01    91.0
 2020-01-02    62.0
 2020-01-03    39.0
 2020-01-04    41.0
 2020-01-05    78.0
 2020-01-06    45.0
 2020-01-07    69.0
 2020-01-08    84.0
 2020-01-09     NaN
 2020-01-10     NaN
 Freq: D, Name: series_2, dtype: float64,
 'series_3': 2020-01-01    39.0
 2020-01-02    79.0
 2020-01-03    99.0
 2020-01-04     4.0
 2020-01-05    90.0
 2020-01-06    70.0
 2020-01-07    13.0
 2020-01-08     8.0
 2020-01-09     NaN
 2020-01-10     NaN
 Freq: D, Name: series_3, dtype: float64}


Series dict
-----------
{'series_1': 2020-01-01     NaN
2020-01-02     NaN
2020-01-03     NaN
2020-01-04    21.0
2020-01-05    10.0
2020-01-06    90.0
2020-01-07    30.0
2020-01-08    28.0
2020-01-09     NaN
2020-01-10     NaN
Freq: D, Name: series_1, dtype: float64,
 'series_2': 2020-01-01    91.0
2020-01-02    62.0
2020-01-03    39.0
2020-01-04    41.0
2020-01-05    78.0
2020-01-06    45.0
2020-01-07    69.0
2020-01-08    84.0
2020-01-09     NaN
2020-01-10     NaN
Freq: D, Name: series_2, dtype: float64,
 'series_3': 2020-01-01    39.0
2020-01-02    79.0
2020-01-03    99.0
2020-01-04     4.0
2020-01-05    90.0
2020-01-06    70.0
2020-01-07    13.0
2020-01-08     8.0
2020-01-09     NaN
2020-01-10     NaN
Freq: D, Name: series_3, dtype: float64}

Exog dict
-----------
{'series_1':             exog_1  exog_2  exog_3
2020-01-01      41      29      47
2020-01-02       2      76      66
2020-01-03      71      30      37
2020-01-04      41      54      76
2020-01-05      75      28      



2020-01-01    91.0
2020-01-02    62.0
2020-01-03    39.0
2020-01-04    41.0
2020-01-05    78.0
2020-01-06    45.0
2020-01-07    69.0
2020-01-08    84.0
Freq: D, Name: series_2, dtype: float64

Unnamed: 0,exog_1,exog_2
2020-01-01,61.0,15.0
2020-01-02,43.0,18.0
2020-01-03,87.0,86.0
2020-01-04,67.0,58.0
2020-01-05,5.0,7.0
2020-01-06,,
2020-01-07,,
2020-01-08,,


2020-01-04    21.0
2020-01-05    10.0
2020-01-06    90.0
2020-01-07    30.0
2020-01-08    28.0
Freq: D, Name: series_1, dtype: float64

Unnamed: 0,exog_1,exog_2,exog_3
2020-01-04,41.0,54.0,76.0
2020-01-05,75.0,28.0,94.0
2020-01-06,,,
2020-01-07,,,
2020-01-08,,,


2020-01-01    39.0
2020-01-02    79.0
2020-01-03    99.0
2020-01-04     4.0
2020-01-05    90.0
2020-01-06    70.0
2020-01-07    13.0
2020-01-08     8.0
Freq: D, Name: series_3, dtype: float64

Unnamed: 0,exog_1
2020-01-01,27.0
2020-01-02,26.0
2020-01-03,88.0
2020-01-04,88.0
2020-01-05,82.0
2020-01-06,
2020-01-07,
2020-01-08,
