# Local notebook to test the pipeline locally

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
## Change working directory
import os

os.environ["GIT_PYTHON_REFRESH"] = "quiet"
import git

repo = git.Repo(os.getcwd(), search_parent_directories=True)
os.chdir(repo.working_tree_dir)

print(os.getcwd())


## Pipeline Parameters

In [None]:
target_feat_param = 'count'
# DataSplitter params
test_size_param = 0.25
random_state_param = 1
targer_feat = 'count'

# PipelineTrainer params
method_missing_value = 'mean'
categorical_class_minimum_occurrences_param = 500
model_params_param = {"n_estimators": 100}

## Import the data


In [None]:
from Components.DataPreparation.DataFetcher.DataFetcher import DataFetcher

df = DataFetcher.execute(input_dataset_version='1',
                         input_dataset_name='BikeSharingPredictionsHours',
                         )
df.head()

_________________________________________

### Data Transformer

In [None]:
from Components.DataPreparation.DataTransformer.DataTransformer import DataTransformer

df_transformed = DataTransformer.execute(df.copy())
df_transformed.head()

### Data splitter

In [None]:
from Components.Training.DataSplitter.DataSplitter import DataSplitter

X_train, X_test, y_train, y_test = DataSplitter.execute(df=df_transformed,
                                                        target_feat=target_feat_param,
                                                        test_size=test_size_param,
                                                        random_state=random_state_param,
                                                        )

### PipelineTrainer

In [None]:
from Components.Training.PipelineTrainer.PipelineTrainer import PipelineTrainer
from pathlib import Path

pipeline = PipelineTrainer.execute(X_train=X_train,
                                   y_train=y_train,
                                   method_missing_value='mean',
                                   categorical_class_minimum_occurrences=categorical_class_minimum_occurrences_param,
                                   model_params=model_params_param,
                                   output_dir=Path('output_folder'),
                                   )


### PipelineEvaluator

In [None]:
import mlflow
mlflow_pipeline = mlflow.pyfunc.load_model(str('output_folder'))
pipeline = mlflow_pipeline._model_impl.python_model.model
pipeline

In [None]:
import pandas as pd

from Components.Training.PipelineEvaluator.PipelineEvaluator import PipelineEvaluator

metrics_test, df_results_test = PipelineEvaluator.execute(pipeline=pipeline,
                                                          X_test=X_test,
                                                          y_test=y_test,
                                                          )

pd.DataFrame([metrics_test]).T.round(3)