In [429]:
import pprint
from functools import partial

import pandas as pd

from gluonts.dataset.repository.datasets import get_dataset
from gluonts.mx.distribution.piecewise_linear import PiecewiseLinearOutput
from gluonts.evaluation import make_evaluation_predictions, Evaluator
from gluonts.model.deepar import DeepAREstimator
from gluonts.model.seq2seq import MQCNNEstimator
from gluonts.mx.trainer import Trainer

In [437]:
datasets = [
    "m4_hourly",
    "m4_daily",
    "m4_weekly",
    "m4_monthly",
    "m4_quarterly",
    "m4_yearly",
]

epochs = 10
num_batches_per_epoch = 50
estimators = [
    MQCNNEstimator,
    DeepAREstimator,
    partial(
        DeepAREstimator,
        distr_output=PiecewiseLinearOutput(8),
    ),
]

In [438]:

estimator = DeepAREstimator(
        prediction_length=dataset.metadata.prediction_length,
        freq=dataset.metadata.freq,
        use_feat_static_cat=True,
        cardinality=[
            feat_static_cat.cardinality
            for feat_static_cat in dataset.metadata.feat_static_cat
        ],
        trainer=Trainer(
            epochs=3,
            num_batches_per_epoch=10,
        ),
    )
print(f"Current dataset is {dataset}")


Current dataset is TrainDatasets(metadata=MetaData(freq='Q', target=None, feat_static_cat=[CategoricalFeatureInfo(name='feat_static_cat', cardinality='24000')], feat_static_real=[], feat_dynamic_real=[], feat_dynamic_cat=[], prediction_length=8), train=<gluonts.dataset.common.FileDataset object at 0x7fead0fbfa60>, test=<gluonts.dataset.common.FileDataset object at 0x7fead0fbfa30>)


In [439]:
predictor = estimator.train(dataset.train)

100%|██████████| 10/10 [00:00<00:00, 46.29it/s, epoch=1/3, avg_epoch_loss=8.66]
100%|██████████| 10/10 [00:00<00:00, 55.59it/s, epoch=2/3, avg_epoch_loss=8.56]
100%|██████████| 10/10 [00:00<00:00, 53.14it/s, epoch=3/3, avg_epoch_loss=7.87]


In [440]:
forecast_it, ts_it = make_evaluation_predictions(
        dataset.test, predictor=predictor, num_samples=100
    )


In [441]:
agg_metrics, item_metrics = Evaluator()(
        ts_it, forecast_it, num_series=len(dataset.test)
        )

Running evaluation: 100%|██████████| 24000/24000 [00:30<00:00, 776.06it/s]


In [443]:
item_metrics

Unnamed: 0,item_id,MSE,abs_error,abs_target_sum,abs_target_mean,seasonal_error,MASE,MAPE,sMAPE,MSIS,...,QuantileLoss[0.5],Coverage[0.5],QuantileLoss[0.6],Coverage[0.6],QuantileLoss[0.7],Coverage[0.7],QuantileLoss[0.8],Coverage[0.8],QuantileLoss[0.9],Coverage[0.9]
0,0.0,227433.203125,3313.677246,54831.070312,6853.883789,308.422340,1.342995,0.061246,0.058842,13.560884,...,3313.677246,1.000,3655.766406,1.000,3847.881152,1.000,3387.118945,1.000,2303.196484,1.00
1,1.0,189857.796875,2963.601074,56761.937500,7095.242188,357.911086,1.035034,0.053075,0.051132,11.584008,...,2963.601074,1.000,3334.013281,1.000,3441.122461,1.000,3156.040039,1.000,2206.768457,1.00
2,2.0,502050.625000,5557.117188,56761.523438,7095.190430,359.439593,1.932563,0.098003,0.093069,13.351615,...,5557.117188,1.000,5617.098047,1.000,5078.002148,1.000,4130.822656,1.000,2936.402930,1.00
3,3.0,265856.562500,3697.719238,61294.324219,7661.790527,317.977330,1.453610,0.060662,0.058458,16.087560,...,3697.719238,1.000,4140.095312,1.000,4235.002148,1.000,3662.295703,1.000,2638.766211,1.00
4,4.0,68221.226562,1563.250977,9781.000000,1222.625000,149.800000,1.304448,0.144160,0.154530,10.557169,...,1563.250977,0.500,1629.357764,0.625,1685.173242,0.625,1571.989404,0.625,1333.854639,0.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23995,23995.0,29828.621094,1555.552246,17320.000000,2165.000000,23.857143,8.150349,0.089961,0.085934,54.325528,...,1555.552246,1.000,1484.581055,1.000,1410.382031,1.000,1160.693457,1.000,765.431836,1.00
23996,23996.0,85614.781250,2066.037109,81030.000000,10128.750000,240.571429,1.073505,0.025417,0.024939,25.421861,...,2066.037109,1.000,3292.275000,1.000,3913.583789,1.000,3716.085156,1.000,2780.366016,1.00
23997,23997.0,106081.578125,1688.685547,82040.000000,10255.000000,240.571429,0.877435,0.020563,0.020295,25.354027,...,1688.685547,0.875,2658.819531,1.000,3653.358398,1.000,3620.185937,1.000,2880.878320,1.00
23998,23998.0,5154.675781,436.435669,9356.407227,1169.550903,410.208664,0.132992,0.048347,0.046290,1.878267,...,436.435669,0.750,495.234375,1.000,547.170850,1.000,495.582422,1.000,339.674756,1.00


In [442]:
agg_metrics

{'MSE': 2575691.6912184916,
 'abs_error': 146623365.78692627,
 'abs_target_sum': 1147074070.9031982,
 'abs_target_mean': 5974.344119287491,
 'seasonal_error': 473.4332698179725,
 'MASE': 1.7481263018363098,
 'MAPE': 0.16215688510539863,
 'sMAPE': 0.13327629880156988,
 'MSIS': 19.247667374065244,
 'QuantileLoss[0.1]': 86164699.97987062,
 'Coverage[0.1]': 0.15038020833333332,
 'QuantileLoss[0.2]': 115243276.25249329,
 'Coverage[0.2]': 0.24002083333333332,
 'QuantileLoss[0.3]': 132258687.25056762,
 'Coverage[0.3]': 0.33919791666666665,
 'QuantileLoss[0.4]': 142300527.76174316,
 'Coverage[0.4]': 0.44561979166666665,
 'QuantileLoss[0.5]': 146623365.69945526,
 'Coverage[0.5]': 0.5480260416666667,
 'QuantileLoss[0.6]': 144718106.61466676,
 'Coverage[0.6]': 0.6331354166666666,
 'QuantileLoss[0.7]': 136710986.20660248,
 'Coverage[0.7]': 0.7160104166666666,
 'QuantileLoss[0.8]': 120371745.21779785,
 'Coverage[0.8]': 0.7928385416666667,
 'QuantileLoss[0.9]': 91399760.83436279,
 'Coverage[0.9]': 0

In [None]:
train_df = pd.read_csv("/home/ttran/Downloads/Yearly-train.txt", index_col=0)

In [None]:
test_df = pd.read_csv("/home/ttran/Downloads/Yearly-test.txt", index_col=0)

In [None]:
import os

import sktime
from sktime.datasets import load_from_tsfile_to_dataframe, load_from_tsfile
import pandas as pd
import numpy as np

DATA_PATH = os.path.join(os.path.dirname(sktime.__file__), "datasets/data")

train_x, train_y = load_from_tsfile_to_dataframe(
    os.path.join(DATA_PATH, "ItalyPowerDemand/ItalyPowerDemand_TRAIN.ts")
)
test_x, test_y = load_from_tsfile_to_dataframe(
    os.path.join(DATA_PATH, "ItalyPowerDemand/ItalyPowerDemand_TEST.ts")
)

In [None]:
train_y

In [None]:
train_x.head(10)

In [None]:
train_x

In [None]:
type(train_x.iloc[0].values)

In [None]:
print(type(train_x["dim_0"].to_numpy()[2]))

In [None]:
train_x_np = train_x.to_numpy()
train_x_np

In [None]:
train_x_np

In [None]:
train_x_np.shape

In [None]:
train_x_np[0,:].shape

In [None]:
df = pd.DataFrame([[2,3,4], [3,4,5]], columns=["dim_0", "dim_1", "dim_2"])

In [None]:
df

In [None]:
df.to_numpy()

In [None]:
df_np = df.to_numpy()

In [None]:
df.T

In [None]:
df.T.to_dict()

In [None]:
dict_x = train_x.to_dict()


In [None]:
from gluonts.dataset.common import ListDataset

def load_from_tsfile_to_listdataset(
    full_file_path_and_name,
    return_separate_X_and_y=True,
    replace_missing_vals_with="NaN",
):
    """Load data from a .ts file into a Pandas DataFrame.

    Parameters
    ----------
    full_file_path_and_name: str
        The full pathname of the .ts file to read.
    return_separate_X_and_y: bool
        true if X and Y values should be returned as separate Data Frames (
        X) and a numpy array (y), false otherwise.
        This is only relevant for data that
    replace_missing_vals_with: str
       The value that missing values in the text file should be replaced
       with prior to parsing.

    Returns
    -------
    DataFrame (default) or ndarray (i
        If return_separate_X_and_y then a tuple containing a DataFrame and a
        numpy array containing the relevant time-series and corresponding
        class values.
    DataFrame
        If not return_separate_X_and_y then a single DataFrame containing
        all time-series and (if relevant) a column "class_vals" the
        associated class values.
    """
    # Initialize flags and variables used when parsing the file
    metadata_started = False
    data_started = False

    has_problem_name_tag = False
    has_timestamps_tag = False
    has_univariate_tag = False
    has_class_labels_tag = False
    has_data_tag = False

    previous_timestamp_was_int = None
    prev_timestamp_was_timestamp = None
    num_dimensions = None
    is_first_case = True
    instance_list = []
    class_val_list = []
    line_num = 0
    # Parse the file
    with open(full_file_path_and_name, "r", encoding="utf-8") as file:
        for line in file:
            # Strip white space from start/end of line and change to
            # lowercase for use below
            line = line.strip().lower()
            # Empty lines are valid at any point in a file
            if line:
                # Check if this line contains metadata
                # Please note that even though metadata is stored in this
                # function it is not currently published externally
                if line.startswith("@problemname"):
                    # Check that the data has not started
                    if data_started:
                        raise IOError("metadata must come before data")
                    # Check that the associated value is valid
                    tokens = line.split(" ")
                    token_len = len(tokens)
                    if token_len == 1:
                        raise IOError("problemname tag requires an associated value")
                    # problem_name = line[len("@problemname") + 1:]
                    has_problem_name_tag = True
                    metadata_started = True
                elif line.startswith("@timestamps"):
                    # Check that the data has not started
                    if data_started:
                        raise IOError("metadata must come before data")
                    # Check that the associated value is valid
                    tokens = line.split(" ")
                    token_len = len(tokens)
                    if token_len != 2:
                        raise IOError(
                            "timestamps tag requires an associated Boolean " "value"
                        )
                    elif tokens[1] == "true":
                        timestamps = True
                    elif tokens[1] == "false":
                        timestamps = False
                    else:
                        raise IOError("invalid timestamps value")
                    has_timestamps_tag = True
                    metadata_started = True
                elif line.startswith("@univariate"):
                    # Check that the data has not started
                    if data_started:
                        raise IOError("metadata must come before data")
                    # Check that the associated value is valid
                    tokens = line.split(" ")
                    token_len = len(tokens)
                    if token_len != 2:
                        raise IOError(
                            "univariate tag requires an associated Boolean  " "value"
                        )
                    elif tokens[1] == "true":
                        # univariate = True
                        pass
                    elif tokens[1] == "false":
                        # univariate = False
                        pass
                    else:
                        raise IOError("invalid univariate value")
                    has_univariate_tag = True
                    metadata_started = True
                elif line.startswith("@classlabel"):
                    # Check that the data has not started
                    if data_started:
                        raise IOError("metadata must come before data")
                    # Check that the associated value is valid
                    tokens = line.split(" ")
                    token_len = len(tokens)
                    if token_len == 1:
                        raise IOError(
                            "classlabel tag requires an associated Boolean  " "value"
                        )
                    if tokens[1] == "true":
                        class_labels = True
                    elif tokens[1] == "false":
                        class_labels = False
                    else:
                        raise IOError("invalid classLabel value")
                    # Check if we have any associated class values
                    if token_len == 2 and class_labels:
                        raise IOError(
                            "if the classlabel tag is true then class values "
                            "must be supplied"
                        )
                    has_class_labels_tag = True
                    class_label_list = [token.strip() for token in tokens[2:]]
                    metadata_started = True
                # Check if this line contains the start of data
                elif line.startswith("@data"):
                    if line != "@data":
                        raise IOError("data tag should not have an associated value")
                    if data_started and not metadata_started:
                        raise IOError("metadata must come before data")
                    else:
                        has_data_tag = True
                        data_started = True
                # If the 'data tag has been found then metadata has been
                # parsed and data can be loaded
                elif data_started:
                    # Check that a full set of metadata has been provided
                    if (
                        not has_problem_name_tag
                        or not has_timestamps_tag
                        or not has_univariate_tag
                        or not has_class_labels_tag
                        or not has_data_tag
                    ):
                        raise IOError(
                            "a full set of metadata has not been provided "
                            "before the data"
                        )
                    # Replace any missing values with the value specified
                    line = line.replace("?", replace_missing_vals_with)
                    # Check if we dealing with data that has timestamps
                    if timestamps:
                        # We're dealing with timestamps so cannot just split
                        # line on ':' as timestamps may contain one
                        has_another_value = False
                        has_another_dimension = False
                        timestamp_for_dim = []
                        values_for_dimension = []
                        this_line_num_dim = 0
                        line_len = len(line)
                        char_num = 0
                        while char_num < line_len:
                            # Move through any spaces
                            while char_num < line_len and str.isspace(line[char_num]):
                                char_num += 1
                            # See if there is any more data to read in or if
                            # we should validate that read thus far
                            if char_num < line_len:
                                # See if we have an empty dimension (i.e. no
                                # values)
                                if line[char_num] == ":":
                                    if len(instance_list) < (this_line_num_dim + 1):
                                        instance_list.append([])
                                    instance_list[this_line_num_dim].append(
                                        pd.Series(dtype="object")
                                    )
                                    this_line_num_dim += 1
                                    has_another_value = False
                                    has_another_dimension = True
                                    timestamp_for_dim = []
                                    values_for_dimension = []
                                    char_num += 1
                                else:
                                    # Check if we have reached a class label
                                    if line[char_num] != "(" and class_labels:
                                        class_val = line[char_num:].strip()
                                        if class_val not in class_label_list:
                                            raise IOError(
                                                "the class value '"
                                                + class_val
                                                + "' on line "
                                                + str(line_num + 1)
                                                + " is not "
                                                "valid"
                                            )
                                        class_val_list.append(class_val)
                                        char_num = line_len
                                        has_another_value = False
                                        has_another_dimension = False
                                        timestamp_for_dim = []
                                        values_for_dimension = []
                                    else:
                                        # Read in the data contained within
                                        # the next tuple
                                        if line[char_num] != "(" and not class_labels:
                                            raise IOError(
                                                "dimension "
                                                + str(this_line_num_dim + 1)
                                                + " on line "
                                                + str(line_num + 1)
                                                + " does "
                                                "not "
                                                "start "
                                                "with a "
                                                "'('"
                                            )
                                        char_num += 1
                                        tuple_data = ""
                                        while (
                                            char_num < line_len
                                            and line[char_num] != ")"
                                        ):
                                            tuple_data += line[char_num]
                                            char_num += 1
                                        if (
                                            char_num >= line_len
                                            or line[char_num] != ")"
                                        ):
                                            raise IOError(
                                                "dimension "
                                                + str(this_line_num_dim + 1)
                                                + " on line "
                                                + str(line_num + 1)
                                                + " does "
                                                "not end"
                                                " with a "
                                                "')'"
                                            )
                                        # Read in any spaces immediately
                                        # after the current tuple
                                        char_num += 1
                                        while char_num < line_len and str.isspace(
                                            line[char_num]
                                        ):
                                            char_num += 1

                                        # Check if there is another value or
                                        # dimension to process after this tuple
                                        if char_num >= line_len:
                                            has_another_value = False
                                            has_another_dimension = False
                                        elif line[char_num] == ",":
                                            has_another_value = True
                                            has_another_dimension = False
                                        elif line[char_num] == ":":
                                            has_another_value = False
                                            has_another_dimension = True
                                        char_num += 1
                                        # Get the numeric value for the
                                        # tuple by reading from the end of
                                        # the tuple data backwards to the
                                        # last comma
                                        last_comma_index = tuple_data.rfind(",")
                                        if last_comma_index == -1:
                                            raise IOError(
                                                "dimension "
                                                + str(this_line_num_dim + 1)
                                                + " on line "
                                                + str(line_num + 1)
                                                + " contains a tuple that has "
                                                "no comma inside of it"
                                            )
                                        try:
                                            value = tuple_data[last_comma_index + 1 :]
                                            value = float(value)
                                        except ValueError:
                                            raise IOError(
                                                "dimension "
                                                + str(this_line_num_dim + 1)
                                                + " on line "
                                                + str(line_num + 1)
                                                + " contains a tuple that does "
                                                "not have a valid numeric "
                                                "value"
                                            )
                                        # Check the type of timestamp that
                                        # we have
                                        timestamp = tuple_data[0:last_comma_index]
                                        try:
                                            timestamp = int(timestamp)
                                            timestamp_is_int = True
                                            timestamp_is_timestamp = False
                                        except ValueError:
                                            timestamp_is_int = False
                                        if not timestamp_is_int:
                                            try:
                                                timestamp = timestamp.strip()
                                                timestamp_is_timestamp = True
                                            except ValueError:
                                                timestamp_is_timestamp = False
                                        # Make sure that the timestamps in
                                        # the file (not just this dimension
                                        # or case) are consistent
                                        if (
                                            not timestamp_is_timestamp
                                            and not timestamp_is_int
                                        ):
                                            raise IOError(
                                                "dimension "
                                                + str(this_line_num_dim + 1)
                                                + " on line "
                                                + str(line_num + 1)
                                                + " contains a tuple that "
                                                "has an invalid timestamp '"
                                                + timestamp
                                                + "'"
                                            )
                                        if (
                                            previous_timestamp_was_int is not None
                                            and previous_timestamp_was_int
                                            and not timestamp_is_int
                                        ):
                                            raise IOError(
                                                "dimension "
                                                + str(this_line_num_dim + 1)
                                                + " on line "
                                                + str(line_num + 1)
                                                + " contains tuples where the "
                                                "timestamp format is "
                                                "inconsistent"
                                            )
                                        if (
                                            prev_timestamp_was_timestamp is not None
                                            and prev_timestamp_was_timestamp
                                            and not timestamp_is_timestamp
                                        ):
                                            raise IOError(
                                                "dimension "
                                                + str(this_line_num_dim + 1)
                                                + " on line "
                                                + str(line_num + 1)
                                                + " contains tuples where the "
                                                "timestamp format is "
                                                "inconsistent"
                                            )
                                        # Store the values
                                        timestamp_for_dim += [timestamp]
                                        values_for_dimension += [value]
                                        #  If this was our first tuple then
                                        #  we store the type of timestamp we
                                        #  had
                                        if (
                                            prev_timestamp_was_timestamp is None
                                            and timestamp_is_timestamp
                                        ):
                                            prev_timestamp_was_timestamp = True
                                            previous_timestamp_was_int = False

                                        if (
                                            previous_timestamp_was_int is None
                                            and timestamp_is_int
                                        ):
                                            prev_timestamp_was_timestamp = False
                                            previous_timestamp_was_int = True
                                        # See if we should add the data for
                                        # this dimension
                                        if not has_another_value:
                                            if len(instance_list) < (
                                                this_line_num_dim + 1
                                            ):
                                                instance_list.append([])

                                            if timestamp_is_timestamp:
                                                timestamp_for_dim = pd.DatetimeIndex(
                                                    timestamp_for_dim
                                                )

                                            instance_list[this_line_num_dim].append(
                                                pd.Series(
                                                    index=timestamp_for_dim,
                                                    data=values_for_dimension,
                                                )
                                            )
                                            this_line_num_dim += 1
                                            timestamp_for_dim = []
                                            values_for_dimension = []
                            elif has_another_value:
                                raise IOError(
                                    "dimension " + str(this_line_num_dim + 1) + " on "
                                    "line "
                                    + str(line_num + 1)
                                    + " ends with a ',' that "
                                    "is not followed by "
                                    "another tuple"
                                )
                            elif has_another_dimension and class_labels:
                                raise IOError(
                                    "dimension " + str(this_line_num_dim + 1) + " on "
                                    "line "
                                    + str(line_num + 1)
                                    + " ends with a ':' while "
                                    "it should list a class "
                                    "value"
                                )
                            elif has_another_dimension and not class_labels:
                                if len(instance_list) < (this_line_num_dim + 1):
                                    instance_list.append([])
                                instance_list[this_line_num_dim].append(
                                    pd.Series(dtype=np.float32)
                                )
                                this_line_num_dim += 1
                                num_dimensions = this_line_num_dim
                            # If this is the 1st line of data we have seen
                            # then note the dimensions
                            if not has_another_value and not has_another_dimension:
                                if num_dimensions is None:
                                    num_dimensions = this_line_num_dim
                                if num_dimensions != this_line_num_dim:
                                    raise IOError(
                                        "line "
                                        + str(line_num + 1)
                                        + " does not have the "
                                        "same number of "
                                        "dimensions as the "
                                        "previous line of "
                                        "data"
                                    )
                        # Check that we are not expecting some more data,
                        # and if not, store that processed above
                        if has_another_value:
                            raise IOError(
                                "dimension "
                                + str(this_line_num_dim + 1)
                                + " on line "
                                + str(line_num + 1)
                                + " ends with a ',' that is "
                                "not followed by another "
                                "tuple"
                            )
                        elif has_another_dimension and class_labels:
                            raise IOError(
                                "dimension "
                                + str(this_line_num_dim + 1)
                                + " on line "
                                + str(line_num + 1)
                                + " ends with a ':' while it "
                                "should list a class value"
                            )
                        elif has_another_dimension and not class_labels:
                            if len(instance_list) < (this_line_num_dim + 1):
                                instance_list.append([])
                            instance_list[this_line_num_dim].append(
                                pd.Series(dtype="object")
                            )
                            this_line_num_dim += 1
                            num_dimensions = this_line_num_dim
                        # If this is the 1st line of data we have seen then
                        # note the dimensions
                        if (
                            not has_another_value
                            and num_dimensions != this_line_num_dim
                        ):
                            raise IOError(
                                "line " + str(line_num + 1) + " does not have the same "
                                "number of dimensions as the "
                                "previous line of data"
                            )
                        # Check if we should have class values, and if so
                        # that they are contained in those listed in the
                        # metadata
                        if class_labels and len(class_val_list) == 0:
                            raise IOError("the cases have no associated class values")
                    else:
                        dimensions = line.split(":")
                        # If first row then note the number of dimensions (
                        # that must be the same for all cases)
                        if is_first_case:
                            num_dimensions = len(dimensions)
                            if class_labels:
                                num_dimensions -= 1
                            for _dim in range(0, num_dimensions):
                                instance_list.append([])
                            is_first_case = False
                        # See how many dimensions that the case whose data
                        # in represented in this line has
                        this_line_num_dim = len(dimensions)
                        if class_labels:
                            this_line_num_dim -= 1
                        # All dimensions should be included for all series,
                        # even if they are empty
                        if this_line_num_dim != num_dimensions:
                            raise IOError(
                                "inconsistent number of dimensions. "
                                "Expecting "
                                + str(num_dimensions)
                                + " but have read "
                                + str(this_line_num_dim)
                            )
                        # Process the data for each dimension
                        for dim in range(0, num_dimensions):
                            dimension = dimensions[dim].strip()

                            if dimension:
                                data_series = dimension.split(",")
                                data_series = [float(i) for i in data_series]
                                instance_list[dim].append(pd.Series(data_series))
                            else:
                                instance_list[dim].append(pd.Series(dtype="object"))
                        if class_labels:
                            class_val_list.append(dimensions[num_dimensions].strip())
            line_num += 1
    # Check that the file was not empty
    if line_num:
        # Check that the file contained both metadata and data
        if metadata_started and not (
            has_problem_name_tag
            and has_timestamps_tag
            and has_univariate_tag
            and has_class_labels_tag
            and has_data_tag
        ):
            raise IOError("metadata incomplete")

        elif metadata_started and not data_started:
            raise IOError("file contained metadata but no data")

        elif metadata_started and data_started and len(instance_list) == 0:
            raise IOError("file contained metadata but no data")
        # Create a ListDataset from the data parsed above
        
        data = pd.DataFrame(dtype=np.float32)
        for dim in range(0, num_dimensions):
            data["dim_" + str(dim)] = instance_list[dim]

        # the original dataset did not include time stamps, so we use the earliest
        # point available in pandas as the start date for each time series.
        dummy_start = [
        pd.Timestamp("04-18-2022", freq='1H')  # Arbitrary default date
        for _ in range(len(data))]    

        nested_target, feat_static_cat = create_dataset(data, class_val_list)

        train_ds = ListDataset(
    [
        {
            "target": target,
            "start": start,
            "fea_static_cat": [fsc]
        }
        for (target, start, fsc) in zip(
            nested_target,
            dummy_start,
            feat_static_cat
        )
    ],
    freq='1H'
)

        # Check if we should return any associated class labels separately
        if class_labels:
            if return_separate_X_and_y:
                return train_ds, np.asarray(class_val_list)
        else:
            return train_ds
    else:
        raise IOError("empty file")
        

In [None]:
import numpy as np

def create_dataset(data, label):
    list_len = len([num for sublist in pd.Series(data.loc[0]) for num in sublist])
    nested_target = np.empty([1,list_len])
    
    for x in range(len(data)):
        flat_list = np.expand_dims(np.array([num for sublist in pd.Series(data.loc[x]) for num in sublist]), axis=0)
        nested_target = np.append(nested_target, flat_list, axis=0)
        #nested_target.append(flat_list)
    nested_target = np.delete(nested_target,0,0)

    # create categorical static feats: pass in train_y
    feat_static_cat = label

    return nested_target, feat_static_cat

In [None]:
def to_dict(
    target_values,
    start
):

    res = {
        "start": start,
        "target": target_values,
    }

    return res

In [None]:
train_x, train_y = load_from_tsfile_to_listdataset(
    os.path.join(DATA_PATH, "ItalyPowerDemand/ItalyPowerDemand_TRAIN.ts")
)

In [None]:
train_entry = next(iter(train_x))
ListDataset_check = train_entry["target"]
ListDataset_check

In [None]:
a=[]
b=[[3,4], [5,6], [7,8]]


In [None]:
b

In [None]:
b.shape

In [None]:
len(b)

In [None]:
b[0,:]