<a href="https://colab.research.google.com/github/Rohith616/Client-Project-1/blob/main/Example_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install common evalml

Collecting common
  Downloading common-0.1.2.tar.gz (3.5 kB)
Collecting evalml
  Downloading evalml-0.43.0-py3-none-any.whl (6.4 MB)
[K     |████████████████████████████████| 6.4 MB 4.4 MB/s 
Collecting featuretools>=1.2.0
  Downloading featuretools-1.6.0-py3-none-any.whl (356 kB)
[K     |████████████████████████████████| 356 kB 41.2 MB/s 
[?25hCollecting scipy>=1.5.0
  Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)
[K     |████████████████████████████████| 38.1 MB 1.7 MB/s 
[?25hCollecting sktime>=0.7.0
  Downloading sktime-0.10.1-py3-none-any.whl (6.5 MB)
[K     |████████████████████████████████| 6.5 MB 24.6 MB/s 
Collecting colorama>=0.4.4
  Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)
Collecting statsmodels>=0.12.2
  Downloading statsmodels-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.8 MB)
[K     |████████████████████████████████| 9.8 MB 10.6 MB/s 
[?25hCollecting vowpalwabbit>=8.11.0
  Download

In [None]:
import pandas as pd
from fbprophet import Prophet
from sklearn.model_selection import train_test_split
import re

from . import common


def train_model(
    data: pd.DataFrame, future_data: pd.DataFrame, parameters: dict
) -> [Prophet, pd.DataFrame, pd.DataFrame]:
    """
    1. データをモデルの形に合うように整形
    2. モデルによる学習
    3. 検証用予測データと未来の予測データを作成

    Args:
        data (pandas.DataFrame): 学習データ
        future_data (pandas.DataFrame): 予測用の未来データ
        validation_data_ratio (float): 学習データから取得する検証データ比率

    Returns:
        Prophet: 学習済みモデルオブジェクト
        pd.DataFrame: 検証用予測データ
        pd.DataFrame: 未来の予測データ
    """
    process_name = re.split("[/_.]", __file__)[-2]
    common.print_process_start(process_name)

    # 型変換
    data["ds"] = pd.to_datetime(data["ds"])
    data["target_id"] = data["target_id"].astype(str)
    data["y"] = data["y"].astype(float)
    future_data["ds"] = pd.to_datetime(future_data["ds"])
    future_data["target_id"] = future_data["target_id"].astype(str)

    # 有効なカラム取得
    use_cols = data.dropna(axis=1, how="all").columns

    # 回帰変数(数値)
    reg_numerics = [
        k for k, v in data.dtypes.items() if v in [int, float] and k not in ["y"]
    ]
    # 回帰変数(カテゴリ)
    reg_categories = [
        k
        for k, v in data.dtypes.items()
        if v == object and k not in ["target_id", "target_address", "holiday"]
    ]

    def sub_func(data, future_data):
        # モデルに組み込む回帰変数(後でダミー化カテゴリ変数を追加)
        regressors = reg_numerics.copy()

        # 休日データ組込
        holidays = data.loc[~data["holiday"].isna(), "ds"]
        holidays = holidays.append(
            future_data.loc[~future_data["holiday"].isna(), "ds"]
        )
        holidays = pd.DataFrame(
            {"holiday": "holiday", "ds": holidays, "lower_window": 0, "upper_window": 1}
        )

        # イベントデータ組込
        if "events" in use_cols:
            events = data.loc[~data["events"].isna(), ["ds", "events"]]
            events = events.append(
                future_data.loc[~future_data["events"].isna(), ["ds", "events"]]
            )
            events = pd.DataFrame(
                {
                    "holiday": "events",
                    "ds": events["ds"],
                    "lower_window": 0,
                    "upper_window": 1,
                }
            )
            holidays = holidays.append(events)

        # カテゴリ変数のダミー化
        all_data = pd.get_dummies(data.append(future_data), columns=reg_categories)
        data = all_data.iloc[: len(data)]
        future_data = all_data.iloc[-len(future_data) :]
        regressors.extend(
            data.columns[
                data.columns.str.contains("|".join(reg_categories), regex=True)
            ]
        )

        # 学習／検証データ設定
        tmp_train_data, validation_data = train_test_split(
            data, test_size=parameters["common"]["validation_data_ratio"], shuffle=False
        )
        if parameters["common"]["train_test_split"]:  # 検証データを学習に使わない
            train_data = tmp_train_data
        else:  # 検証データ含めて学習する
            train_data = data

        # 学習
        feed_dict = parameters["models_prophet"]
        feed_dict["holidays"] = holidays
        model = Prophet(**feed_dict)
        for reg in regressors:
            model.add_regressor(reg)
        model.fit(train_data[["ds", "y"] + regressors])

        # 検証データの予測検証
        validation_data = validation_data.reset_index(drop=True)
        pred = model.predict(validation_data[["ds"] + regressors])
        pred = pred[["ds", "yhat"]]
        pred = pred.rename(columns={"yhat": "y"})

        # 未来データの予測
        future_pred = model.predict(future_data[["ds"] + regressors])
        future_pred = future_pred[["ds", "yhat"]]
        future_pred = future_pred.rename(columns={"yhat": "y"})

        return model, pred[["ds", "y"]], future_pred[["ds", "y"]]

    # 実行処理
    results = {"model": {}, "pred": [], "future_pred": []}
    for target_id, sub_data in data.groupby("target_id"):
        sub_future_data = future_data[future_data["target_id"] == target_id]
        model, pred, future_pred = sub_func(sub_data, sub_future_data)
        results["model"][target_id] = model
        results["pred"].append(pred.assign(target_id=target_id))
        results["future_pred"].append(future_pred.assign(target_id=target_id))
    model = results["model"]
    pred = pd.concat(results["pred"], axis=0)
    future_pred = pd.concat(results["future_pred"], axis=0)

    common.print_process_end(process_name)

    return [
        model,
        pred[["ds", "target_id", "y"]],
        future_pred[["ds", "target_id", "y"]],
    ]

In [None]:
from pandas.core.algorithms import rank
if __name__ == "__main__":
    data = pd.read_csv("/content/drive/MyDrive/Datasets/preprocessed_data.csv")
    future_data = data.drop("y", axis=1)
    future_data = future_data.iloc[:1000]
    parameters = {
        "model_evalML": {
            "gap": 0,
            "max_delay": 7,
            "forecast_horizon": 7,
            "time_index": "ds",
        }
    }
    automl = train_model(data, future_data, parameters)

Time series support in evalml is still in beta, which means we are still actively building its core features. Please be mindful of that when running search().
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.766e+04, tolerance: 4.867e+00
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.725e+04, tolerance: 1.009e+01
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 5.985e+04, tolerance: 1.540e+01
stepwise model cannot be fit in parallel (n_jobs=1). Falling back to stepwise parameter search.
stepwise model cannot be fit in parallel (n_jobs=1). Falling back to stepwise parameter search.
stepwise model cannot be fit in parallel (n_jobs=1). Falli

KeyError: ignored

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Parameters for the example pipeline. Feel free to delete these once you
# remove the example pipeline from hooks.py and the example nodes in
# `src/pipelines/`
# Documentation for this file format can be found in "Parameters"
# Link: https://kedro.readthedocs.io/en/stable/04_kedro_project_setup/02_configuration.html#parameters

# common
common:
  validation_data_ratio: 0.2
  train_test_split: True

# data collection
collection_holiday: null
collection_weather: null

# data typing
data_typing: 
  data_types: {}
  data_properties:
    Meta: 
      - target_id
      - target_address
    DateTime: []
    Numeric: []
    Category: []

# data preparation
data_preparation:
  freq_day: 1

# data preprocessing
data_preprocessing:
  interpolate:
    methods: {}
  transform:
    methods: {}

# feature engineering
feature_engineering: null

# prophet
models_prophet:
  growth: 'linear'
  changepoints: null
  n_changepoints: 25
  changepoint_range: 0.8
  yearly_seasonality: 'auto'
  weekly_seasonality: 'auto'
  daily_seasonality: 'auto'
  seasonality_mode: 'additive'
  seasonality_prior_scale: 10.0
  holidays_prior_scale: 10.0
  changepoint_prior_scale: 0.05
  interval_width: 0.8

# sktime_AutoARIMA
models_sktime_AutoARIMA:
  start_p: 2
  d: null
  start_q: 2
  max_p: 5
  max_d: 2
  max_q: 5
  start_P: 1
  D: null
  start_Q: 1
  max_P: 2
  max_D: 1
  max_Q: 2
  max_order: 5
  sp: 1
  seasonal: False
  # simple_differencing: True ※高速化するっぽいけどなお遅かったので切る

# sktime_AutoETS
models_sktime_AutoETS:
  auto: True
  sp: 1

# sktime_ThetaForecaster
models_sktime_ThetaForecaster:
  sp: 1

# evaluations
evaluation_prophet: null
evaluation_sktime_AutoARIMA: null
evaluation_sktime_AutoETS: null
evaluation_sktime_ThetaForecaster: null

In [None]:
import pandas
pandas.__version__ 

'1.3.5'

In [None]:
!pip install --upgrade pandas

