In [75]:
from darts import TimeSeries
import pandas as pd
from darts.models import NaiveSeasonal
from darts.models import NaiveMean
from darts import TimeSeries
from sklearn.metrics import mean_absolute_percentage_error
from typing import Dict


In [76]:
df = pd.read_excel('../../data/raw/Количество исследований по неделям (для реализации).xlsx')
df = df.rename(columns={"Год": "year", "Номер недели": "week"})
df.fillna(0, inplace=True)
ds_index = pd.date_range("2021-12-24", "2024-1-28", freq="W", name="ds").to_frame().reset_index(drop=True)
ds_index["year"] = ds_index["ds"].dt.year
ds_index["week"] = ds_index["ds"].dt.isocalendar().week
ds_index['week'][0] = 52
ds_index['week'][1] = 1
ds_index['week'][2:54] += 1
data = ds_index.merge(df, how="left", on=["year", "week"])
data.fillna(0, inplace=True)
series = TimeSeries.from_dataframe(
    data,
    time_col='ds',
    value_cols=[column for column in df.columns if column not in ['year', 'week']]
)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  ds_index['week'][0] = 52
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, becaus

In [77]:
def cross_val(
        models: Dict[str, object],
        series: TimeSeries,
        remains_rows_to_val: int = 24,
        n_rows_to_forecast: int = 4,
        step: int = 1,
) -> Dict[str, Dict[str, pd.Series]]:
    """
    Perform cross-validation on a given set of models and a time series.

    Args:
        models (Dict[str, object]): A dictionary of models to be used for cross-validation.
        series (TimeSeries): The time series data to be used for cross-validation.
        remains_rows_to_val (int, optional): The number of rows to be used for validation. Defaults to 24.
        n_rows_to_forecast (int, optional): The number of rows to be forecasted. Defaults to 4.
        step (int, optional): The step size for iterating over the training data. Defaults to 1.

    Returns:
        Dict[str, Dict[str, pd.Series]]: A dictionary containing the cross-validation results for each combination of training and forecasting
        rows. The keys are strings representing the combination of rows, and the values are dictionaries containing
        the MAPE (mean absolute percentage error) for each model by each column.
      Example: []
    """
    remains_rows_to_val = 24
    n_rows_to_forecast = 4
    step = 1
    if remains_rows_to_val < n_rows_to_forecast:
        raise ValueError("remains_rows_to_val count must be higher then n_rows_to_forecast")

    cross_val_results = {}
    # 109 - 24          85 + 4, 86 + 4, 87 + 4 ...........
    for n_rows_to_train in range(len(series) - remains_rows_to_val, len(series) - n_rows_to_forecast + 1, step):
        print(f"n_rows_to_train: {n_rows_to_train}", f"n_rows_for_forecast: {n_rows_to_train + n_rows_to_forecast}")
        train = series[:n_rows_to_train]
        val = series[n_rows_to_train:n_rows_to_train + n_rows_to_forecast]

        iteration_results = {}
        for name, model in models.items():
            model.fit(train)
            preds = model.predict(n_rows_to_forecast)
            iteration_results[name] = calc_mape_for_each_column(val, preds)

        cross_val_results[f'{n_rows_to_train} + {n_rows_to_forecast}'] = iteration_results

    return (cross_val_results)


def represent_cross_validation_results(
    models: Dict[str, object],
    cross_val_results: Dict[str, Dict[str, pd.Series]]
) -> None:
    """
    Represent the cross-validation results for each model.

    Args:
        models (Dict[str, object]): A dictionary of models.
        cross_val_results (Dict[str, Dict[str, float]]): A dictionary containing the cross-validation results for each combination of training and forecasting rows.

    Returns:
        None
    """
    for model in models:
        represent_model_dict = {}
        for validation_step in cross_val_results:
            represent_model_dict[validation_step] = cross_val_results[validation_step][model]

        represent_model_df = pd.DataFrame(represent_model_dict).T  # transpose DataFrame
        model_df_with_stats = pd.concat([represent_model_df, represent_model_df.describe()])

        display(model_df_with_stats.style.set_caption(f'{model}'))

    return


def calc_mape_for_each_column(
        val: TimeSeries, 
        preds: TimeSeries,
) -> pd.Series:
    """
    Calculate the Mean Absolute Percentage Error (MAPE) for each column between the actual values (val) and predicted values (preds).

    Args:
        val (TimeSeries): The actual values TimeSeries.
        preds (TimeSeries): The predicted values TimeSeries.

    Returns:
        pd.Series: A Series containing the MAPE for each column.
    """
    mape_dict = {}
    for column in preds.columns:
        mape_dict[column] = mean_absolute_percentage_error(val[column].values(), preds[column].values())

    # mape_series = pd.Series(mape_dict)
    # mape_stats: pd.Series = mape_series.describe()
    # mape_and_mape_stats = pd.concat([mape_series, mape_stats])

    return pd.Series(mape_dict)


models = {
    'Naive_mean': NaiveMean() , 
    'Naive_seasonal_K52': NaiveSeasonal(K=52), 
}
ress = cross_val(models, series)
represent_cross_validation_results(models, ress)



n_rows_to_train: 86 n_rows_for_forecast: 90
n_rows_to_train: 87 n_rows_for_forecast: 91
n_rows_to_train: 88 n_rows_for_forecast: 92
n_rows_to_train: 89 n_rows_for_forecast: 93
n_rows_to_train: 90 n_rows_for_forecast: 94
n_rows_to_train: 91 n_rows_for_forecast: 95
n_rows_to_train: 92 n_rows_for_forecast: 96
n_rows_to_train: 93 n_rows_for_forecast: 97
n_rows_to_train: 94 n_rows_for_forecast: 98
n_rows_to_train: 95 n_rows_for_forecast: 99
n_rows_to_train: 96 n_rows_for_forecast: 100
n_rows_to_train: 97 n_rows_for_forecast: 101
n_rows_to_train: 98 n_rows_for_forecast: 102
n_rows_to_train: 99 n_rows_for_forecast: 103
n_rows_to_train: 100 n_rows_for_forecast: 104
n_rows_to_train: 101 n_rows_for_forecast: 105
n_rows_to_train: 102 n_rows_for_forecast: 106
n_rows_to_train: 103 n_rows_for_forecast: 107
n_rows_to_train: 104 n_rows_for_forecast: 108
n_rows_to_train: 105 n_rows_for_forecast: 109
n_rows_to_train: 106 n_rows_for_forecast: 110


Unnamed: 0,Денситометр,КТ,КТ с КУ 1 зона,КТ с КУ 2 и более зон,ММГ,МРТ,МРТ с КУ 1 зона,МРТ с КУ 2 и более зон,РГ,Флюорограф
86 + 4,0.162582,0.497558,0.097157,0.053458,0.215588,0.136897,0.024637,0.494952,0.176475,0.192401
87 + 4,0.166987,0.393668,0.059673,0.081503,0.225985,0.149127,0.018794,0.395126,0.208346,0.192053
88 + 4,0.140742,0.26036,0.053079,0.122962,0.175356,0.141396,0.020265,0.392852,0.187931,0.294155
89 + 4,0.158545,0.186597,0.046658,0.1566,0.181461,0.120798,0.011711,0.325271,0.214371,0.304572
90 + 4,0.176376,0.129114,0.082091,0.192807,0.202168,0.120071,0.040365,0.284729,0.237229,0.310596
91 + 4,0.2061,0.075555,0.122028,0.242954,0.218469,0.112198,0.068728,0.389679,0.249379,0.325742
92 + 4,0.265931,0.051764,0.166473,0.262283,0.29634,0.124794,0.09428,0.374,0.314063,0.213941
93 + 4,0.283827,0.02411,0.19471,0.295441,0.310315,0.150882,0.14254,0.447627,0.323913,0.21383
94 + 4,0.301198,0.013998,0.206347,0.309823,0.319779,0.155306,0.152265,0.536327,0.326384,0.211099
95 + 4,0.320253,0.014524,0.163008,0.290788,0.307876,0.164116,0.140716,0.566959,0.318423,0.249285


Unnamed: 0,Денситометр,КТ,КТ с КУ 1 зона,КТ с КУ 2 и более зон,ММГ,МРТ,МРТ с КУ 1 зона,МРТ с КУ 2 и более зон,РГ,Флюорограф
86 + 4,0.327963,0.288459,0.118373,0.231617,0.249118,0.071531,0.032136,0.612919,0.254914,0.123227
87 + 4,0.305965,0.22794,0.098441,0.24576,0.246084,0.10229,0.056594,0.599527,0.270518,0.131407
88 + 4,0.237109,0.156365,0.081414,0.248949,0.204561,0.106449,0.076025,0.591254,0.232204,0.234248
89 + 4,0.212558,0.094957,0.052345,0.262656,0.195899,0.123449,0.08069,0.557921,0.227005,0.268344
90 + 4,0.222431,0.050275,0.081945,0.275371,0.199098,0.110632,0.10198,0.431658,0.217447,0.311357
91 + 4,0.265999,0.031813,0.144952,0.288885,0.205376,0.074806,0.097892,0.471242,0.21267,0.336795
92 + 4,0.320324,0.042536,0.167857,0.27935,0.245626,0.074272,0.088378,0.479514,0.264335,0.264859
93 + 4,0.380619,0.081845,0.214335,0.325081,0.277765,0.083197,0.128845,0.476573,0.301397,0.257769
94 + 4,0.3816,0.088128,0.176857,0.320683,0.274443,0.081957,0.118996,0.546017,0.304425,0.303745
95 + 4,0.370064,0.091145,0.139654,0.298676,0.248874,0.083965,0.105226,0.555541,0.287329,0.420246


In [74]:
def represent_cross_validation_results(models, cross_val_results):

    for model in models:
        represent_model_dict = {}
        for validation_step in cross_val_results:
            represent_model_dict[validation_step] = cross_val_results[validation_step][model]


        represent_model_df = pd.DataFrame(represent_model_dict).T  # transpose DataFrame   
        model_df_with_stats = pd.concat([represent_model_df,  represent_model_df.describe()])
        
        display(model_df_with_stats.style.set_caption(f'{model}'))

    return



represent_cross_validation_results(models,ress)

Unnamed: 0,Денситометр,КТ,КТ с КУ 1 зона,КТ с КУ 2 и более зон,ММГ,МРТ,МРТ с КУ 1 зона,МРТ с КУ 2 и более зон,РГ,Флюорограф
86 + 4,0.162582,0.497558,0.097157,0.053458,0.215588,0.136897,0.024637,0.494952,0.176475,0.192401
87 + 4,0.166987,0.393668,0.059673,0.081503,0.225985,0.149127,0.018794,0.395126,0.208346,0.192053
88 + 4,0.140742,0.26036,0.053079,0.122962,0.175356,0.141396,0.020265,0.392852,0.187931,0.294155
89 + 4,0.158545,0.186597,0.046658,0.1566,0.181461,0.120798,0.011711,0.325271,0.214371,0.304572
90 + 4,0.176376,0.129114,0.082091,0.192807,0.202168,0.120071,0.040365,0.284729,0.237229,0.310596
91 + 4,0.2061,0.075555,0.122028,0.242954,0.218469,0.112198,0.068728,0.389679,0.249379,0.325742
92 + 4,0.265931,0.051764,0.166473,0.262283,0.29634,0.124794,0.09428,0.374,0.314063,0.213941
93 + 4,0.283827,0.02411,0.19471,0.295441,0.310315,0.150882,0.14254,0.447627,0.323913,0.21383
94 + 4,0.301198,0.013998,0.206347,0.309823,0.319779,0.155306,0.152265,0.536327,0.326384,0.211099
95 + 4,0.320253,0.014524,0.163008,0.290788,0.307876,0.164116,0.140716,0.566959,0.318423,0.249285


Unnamed: 0,Денситометр,КТ,КТ с КУ 1 зона,КТ с КУ 2 и более зон,ММГ,МРТ,МРТ с КУ 1 зона,МРТ с КУ 2 и более зон,РГ,Флюорограф
86 + 4,0.327963,0.288459,0.118373,0.231617,0.249118,0.071531,0.032136,0.612919,0.254914,0.123227
87 + 4,0.305965,0.22794,0.098441,0.24576,0.246084,0.10229,0.056594,0.599527,0.270518,0.131407
88 + 4,0.237109,0.156365,0.081414,0.248949,0.204561,0.106449,0.076025,0.591254,0.232204,0.234248
89 + 4,0.212558,0.094957,0.052345,0.262656,0.195899,0.123449,0.08069,0.557921,0.227005,0.268344
90 + 4,0.222431,0.050275,0.081945,0.275371,0.199098,0.110632,0.10198,0.431658,0.217447,0.311357
91 + 4,0.265999,0.031813,0.144952,0.288885,0.205376,0.074806,0.097892,0.471242,0.21267,0.336795
92 + 4,0.320324,0.042536,0.167857,0.27935,0.245626,0.074272,0.088378,0.479514,0.264335,0.264859
93 + 4,0.380619,0.081845,0.214335,0.325081,0.277765,0.083197,0.128845,0.476573,0.301397,0.257769
94 + 4,0.3816,0.088128,0.176857,0.320683,0.274443,0.081957,0.118996,0.546017,0.304425,0.303745
95 + 4,0.370064,0.091145,0.139654,0.298676,0.248874,0.083965,0.105226,0.555541,0.287329,0.420246


In [65]:
ress

{'86 + 4': {'Naive_mean': Денситометр               0.162582
  КТ                        0.497558
  КТ с КУ 1 зона            0.097157
  КТ с КУ 2 и более зон     0.053458
  ММГ                       0.215588
  МРТ                       0.136897
  МРТ с КУ 1 зона           0.024637
  МРТ с КУ 2 и более зон    0.494952
  РГ                        0.176475
  Флюорограф                0.192401
  dtype: float64,
  'Naive_seasonal_K52': Денситометр               0.327963
  КТ                        0.288459
  КТ с КУ 1 зона            0.118373
  КТ с КУ 2 и более зон     0.231617
  ММГ                       0.249118
  МРТ                       0.071531
  МРТ с КУ 1 зона           0.032136
  МРТ с КУ 2 и более зон    0.612919
  РГ                        0.254914
  Флюорограф                0.123227
  dtype: float64},
 '87 + 4': {'Naive_mean': Денситометр               0.166987
  КТ                        0.393668
  КТ с КУ 1 зона            0.059673
  КТ с КУ 2 и более зон     0.081503
  ММГ

In [50]:
df.describe()

Unnamed: 0,year,week,Денситометр,КТ,КТ с КУ 1 зона,КТ с КУ 2 и более зон,ММГ,МРТ,МРТ с КУ 1 зона,МРТ с КУ 2 и более зон,РГ,Флюорограф
count,109.0,109.0,109.0,109.0,109.0,109.0,109.0,109.0,109.0,109.0,109.0,109.0
mean,2022.541284,25.853211,1329.201835,4882.201835,544.834862,612.715596,14002.100917,1957.623853,792.770642,8.59633,57966.495413,18959.376147
std,0.585822,15.616839,431.677751,1836.936418,147.709645,165.476646,4357.585561,398.824146,171.028459,4.641039,16708.372701,6391.717477
min,2021.0,1.0,0.0,887.0,0.0,0.0,22.0,8.0,0.0,0.0,2459.0,0.0
25%,2022.0,12.0,1050.0,4125.0,478.0,514.0,12199.0,1820.0,738.0,6.0,48811.0,16525.0
50%,2023.0,26.0,1418.0,4671.0,567.0,621.0,14039.0,2049.0,820.0,8.0,57958.0,19070.0
75%,2023.0,39.0,1671.0,5014.0,650.0,743.0,17448.0,2163.0,905.0,11.0,67630.0,22797.0
max,2024.0,52.0,2102.0,13235.0,757.0,914.0,21204.0,2549.0,995.0,22.0,99858.0,31419.0


In [39]:
series[108:110].values()

array([[1.8160e+03, 3.9390e+03, 5.6300e+02, 5.1800e+02, 1.7769e+04,
        1.7120e+03, 8.0900e+02, 1.7000e+01, 9.8705e+04, 6.5800e+03],
       [2.1020e+03, 4.5330e+03, 5.3300e+02, 5.6600e+02, 1.8770e+04,
        1.8780e+03, 7.5500e+02, 1.7000e+01, 9.9858e+04, 7.5910e+03]])

In [51]:
df = pd.read_excel('../../data/raw/Количество исследований по неделям (для реализации).xlsx')
df = df.rename(columns={"Год": "year", "Номер недели": "week"})
df.fillna(0, inplace=True)
ds_index = pd.date_range("2021-12-24", "2024-1-28", freq="W", name="ds").to_frame().reset_index(drop=True)
ds_index["year"] = ds_index["ds"].dt.year
ds_index["week"] = ds_index["ds"].dt.isocalendar().week
ds_index['week'][0] = 52
ds_index['week'][1] = 1
ds_index['week'][2:54] += 1
data = ds_index.merge(df, how="left", on=["year", "week"])
data.fillna(0, inplace=True)
series = TimeSeries.from_dataframe(
    data,
    time_col='ds',
    value_cols=[column for column in df.columns if column not in ['year', 'week']]
)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  ds_index['week'][0] = 52
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, becaus

In [12]:
df = pd.read_parquet("../../data/processed/dataset.parquet")
df.dtypes

Денситометр                                    float64
КТ                                               int64
КТ с КУ 1 зона                                 float64
КТ с КУ 2 и более зон                          float64
ММГ                                              int64
МРТ                                              int64
МРТ с КУ 1 зона                                float64
МРТ с КУ 2 и более зон                         float64
РГ                                               int64
Флюорограф                                     float64
ds                                      datetime64[ns]
feat_КТ с КУ 1 зона_lag-4_КТ                   float64
feat_КТ_lag-1_КТ с КУ 1 зона                   float64
feat_МРТ_lag-6_КТ с КУ 2 и более зон           float64
feat_МРТ с КУ 1 зона_lag-1_МРТ                 float64
feat_МРТ_lag-1_МРТ с КУ 1 зона                 float64
feat_КТ с КУ 1 зона_lag-1_ММГ                  float64
dtype: object

In [41]:
df = pd.read_parquet("../../data/processed/dataset.parquet")
df = df[[column for column in df.columns if 'feat' not in column]]
series = TimeSeries.from_dataframe(df, time_col='ds', value_cols=[column for column in df.columns if 'feat' not in column and column != 'ds'])

In [42]:
series