In [2]:
from typing import Dict, Tuple, List, Any
import datetime as dt
import json
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import xgboost as xgb
import mlflow
from kedro.framework.session import KedroSession
from kedro.framework.startup import bootstrap_project

In [6]:
# use config with base
metadata = bootstrap_project(Path.cwd().parent.parent)
with KedroSession.create(metadata.package_name,
        project_path=metadata.project_path,
        # save_on_close=True,
        env=None,
        # extra_params=extra_params
    ) as session: 
    context = session.load_context()


2021-12-25 07:07:33,158 - kedro.framework.session.store - INFO - `save()` not implemented for `BaseSessionStore`. Skipping the step.


In [3]:
context.params

{'test_size': 0.2,
 'random_state': 3,
 'features': ['engines',
  'passenger_capacity',
  'crew',
  'd_check_complete',
  'moon_clearance_complete',
  'iata_approved',
  'company_rating',
  'review_scores_rating']}

In [4]:
def split_data(data: pd.DataFrame, parameters: Dict) -> Tuple:
    X = data[parameters["features"]]
    y = data["price"]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=parameters["test_size"], random_state=parameters["random_state"]
    )
    return X_train, X_test, y_train, y_test

In [5]:
def train_model(X_train: pd.DataFrame, y_train: pd.Series) -> LinearRegression:
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    return regressor

In [6]:
def evaluate_model(
    regressor: LinearRegression, X_test: pd.DataFrame, y_test: pd.Series
):
    y_pred = regressor.predict(X_test)
    score = r2_score(y_test, y_pred)
    with mlflow.start_run():
        mlflow.log_metric("r2", score)
        mlflow.log_param("time of prediction", str(dt.datetime.now()))
        # mlflow.log_param("model_params", parameters)
    

In [7]:
X_train, X_test, y_train, y_test = split_data(
    context.catalog.load("model_input_table")
    ,context.params
)
context.catalog.save("X_train", X_train)
context.catalog.save("X_test", X_test)
context.catalog.save("y_train", y_train)
context.catalog.save("y_test", y_test)

No files found in ['/kedro-sample/own_examples/conf/base', '/kedro-sample/own_examples/conf/local'] matching the glob pattern(s): ['credentials*', 'credentials*/**', '**/credentials*']
  warn(f"Credentials not found in your Kedro project config.\n{str(exc)}")


2021-12-25 06:43:51,698 - kedro.io.data_catalog - INFO - Loading data from `model_input_table` (CSVDataSet)...
2021-12-25 06:43:53,956 - kedro.io.data_catalog - INFO - Saving data to `X_train` (PickleDataSet)...
2021-12-25 06:43:54,058 - kedro.io.data_catalog - INFO - Saving data to `X_test` (PickleDataSet)...
2021-12-25 06:43:54,096 - kedro.io.data_catalog - INFO - Saving data to `y_train` (PickleDataSet)...
2021-12-25 06:43:54,144 - kedro.io.data_catalog - INFO - Saving data to `y_test` (PickleDataSet)...


In [8]:
regressor = train_model(
    context.catalog.load("X_train")
    ,context.catalog.load("y_train")
)
context.catalog.save("regressor", regressor)

2021-12-25 06:45:16,259 - kedro.io.data_catalog - INFO - Loading data from `X_train` (PickleDataSet)...
2021-12-25 06:45:16,335 - kedro.io.data_catalog - INFO - Loading data from `y_train` (PickleDataSet)...
2021-12-25 06:45:17,156 - kedro.io.data_catalog - INFO - Saving data to `regressor` (PickleDataSet)...


No files found in ['/kedro-sample/own_examples/conf/base', '/kedro-sample/own_examples/conf/local'] matching the glob pattern(s): ['credentials*', 'credentials*/**', '**/credentials*']
  warn(f"Credentials not found in your Kedro project config.\n{str(exc)}")


In [9]:
evaluate_model(
    context.catalog.load("regressor")
    ,context.catalog.load("X_test")
    ,context.catalog.load("y_test")
)

2021-12-25 06:47:09,873 - kedro.io.data_catalog - INFO - Loading data from `regressor` (PickleDataSet)...
2021-12-25 06:47:09,927 - kedro.io.data_catalog - INFO - Loading data from `X_test` (PickleDataSet)...
2021-12-25 06:47:09,968 - kedro.io.data_catalog - INFO - Loading data from `y_test` (PickleDataSet)...


No files found in ['/kedro-sample/own_examples/conf/base', '/kedro-sample/own_examples/conf/local'] matching the glob pattern(s): ['credentials*', 'credentials*/**', '**/credentials*']
  warn(f"Credentials not found in your Kedro project config.\n{str(exc)}")


In [4]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  int(TensorProto.STRING): np.dtype(np.object)


In [13]:
context.catalog.load("X_test").shape

2021-12-25 07:00:41,622 - kedro.io.data_catalog - INFO - Loading data from `X_test` (PickleDataSet)...


(151922, 8)

In [7]:
initial_type = [('float_input', FloatTensorType([None, 8]))]
onx = convert_sklearn(context.catalog.load("regressor"), initial_types=initial_type)
context.catalog.save("onx", onx.SerializeToString())

No files found in ['/kedro-sample/own_examples/conf/base', '/kedro-sample/own_examples/conf/local'] matching the glob pattern(s): ['credentials*', 'credentials*/**', '**/credentials*']
  warn(f"Credentials not found in your Kedro project config.\n{str(exc)}")


2021-12-25 07:07:40,016 - kedro.io.data_catalog - INFO - Loading data from `regressor` (PickleDataSet)...
2021-12-25 07:07:40,056 - kedro.io.data_catalog - INFO - Saving data to `onx` (PickleDataSet)...


In [13]:
with open("../../data/06_models/shuttles.onnx", "wb") as f:
    f.write(onx.SerializeToString())

In [10]:
import onnxruntime as rt
import numpy
sess = rt.InferenceSession(context.catalog.load("onx"))
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name], {input_name: context.catalog.load("X_test").astype(numpy.float32).values})[0]

2021-12-25 07:08:59,873 - kedro.io.data_catalog - INFO - Loading data from `onx` (PickleDataSet)...
2021-12-25 07:08:59,907 - kedro.io.data_catalog - INFO - Loading data from `X_test` (PickleDataSet)...


No files found in ['/kedro-sample/own_examples/conf/base', '/kedro-sample/own_examples/conf/local'] matching the glob pattern(s): ['credentials*', 'credentials*/**', '**/credentials*']
  warn(f"Credentials not found in your Kedro project config.\n{str(exc)}")


In [11]:
pred_onx

array([[5788.9775],
       [3973.5654],
       [3594.4954],
       ...,
       [3748.6594],
       [5454.851 ],
       [4444.2515]], dtype=float32)