In [7]:
import sys
import pathlib
src_path = pathlib.Path().absolute().parent / "src"
sys.path.append(str(src_path))
import data_functions
import custom_transformers as ct
import pandas as pd
import numpy as np

import pickle

from sklearn.compose import  make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

from sklearn.model_selection import cross_val_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [2]:
x_train, x_test, y_train = data_functions.get_dataframes()

In [18]:
simple_impute_strict = make_column_transformer((SimpleImputer(), data_functions.get_numeric_features(data_functions.get_strict_features())), remainder = 'passthrough')
ss_strict = make_column_transformer((StandardScaler(), data_functions.get_numeric_features(data_functions.get_strict_features())), remainder = 'passthrough')
ohe_strict = make_column_transformer((OneHotEncoder(handle_unknown = 'ignore', sparse = False), data_functions.get_categorical_features(data_functions.get_strict_features())), remainder = 'passthrough')

simple_impute_cy = make_column_transformer((SimpleImputer(missing_values = 0.0), ['construction_year']), remainder = 'passthrough')
transform_construction = make_column_transformer((ct.TransformConstructionYear(), ['construction_year']), remainder = 'passthrough')

simple_impute_loose = make_column_transformer((SimpleImputer(), data_functions.get_numeric_features(data_functions.get_loose_features())), remainder = 'passthrough')
ss_loose = make_column_transformer((StandardScaler(), data_functions.get_numeric_features(data_functions.get_loose_features())), remainder = 'passthrough')
ohe_loose = make_column_transformer((OneHotEncoder(handle_unknown = 'ignore', sparse = False), data_functions.get_categorical_features(data_functions.get_loose_features())), remainder = 'passthrough')

strict_preprocessing = make_pipeline(ct.ChooseStrictFeatures(),
                                    ct.BinInstaller(),
                                    simple_impute_cy,
                                    simple_impute_strict,
                                    transform_construction,
                                    ss_strict,
                                    ohe_strict)
                                    

loose_preprocessing = make_pipeline(ct.ChooseLooseFeatures(),
                                    ct.BinInstaller(),
                                    simple_impute_cy,
                                    transform_construction,
                                    simple_impute_loose,
                                    ss_loose,
                                    ohe_loose)

In [19]:
strict_model = make_pipeline(strict_preprocessing, DecisionTreeClassifier())

In [20]:
cross_val_score(strict_model, x_train, y_train, cv = 5)



ValueError: Specifying the columns using strings is only supported for pandas DataFrames

In [8]:
strict_pickle_path = src_path / 'strict_pre_pipeline.pkl'
loose_pickle_path = src_path / 'loose_pre_pipeline.pkl'
file = open(strict_pickle_path, 'wb')
pickle.dump(strict_preprocessing, file)
file.close()
file = open(loose_pickle_path, 'wb')
pickle.dump(loose_preprocessing, file)
file.close()