In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder, StandardScaler

from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import cross_val_score

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train = pd.read_csv('/kaggle/input/DontGetKicked/training.csv')
test = pd.read_csv('/kaggle/input/DontGetKicked/test.csv')

In [None]:
y = train['IsBadBuy']
X = train.drop(['IsBadBuy','RefId'], axis = 1)

In [None]:
numerical_features = [c for c, dtype in zip(X.columns, X.dtypes) if dtype.kind in ['i','f']]
categorical_features = [c for c, dtype in zip(X.columns, X.dtypes) if dtype.kind not in ['i','f']]

In [None]:
#import train_test_split library
from sklearn.model_selection import train_test_split

# create train test split
X_train, X_test, y_train, y_test = train_test_split( X,  y, test_size=0.3, random_state=0, stratify = y)

In [None]:
preprocessor = make_column_transformer(
    
        (make_pipeline(
        SimpleImputer(strategy = 'median'),
        StandardScaler(),
        KBinsDiscretizer(n_bins=3)), numerical_features),

        (make_pipeline(
        SimpleImputer(strategy = 'constant', fill_value = 'missing'),
        OneHotEncoder(categories = 'auto', handle_unknown = 'ignore')), categorical_features),

)

In [None]:
Model = make_pipeline(preprocessor, RandomForestClassifier(n_estimators = 100))

In [None]:
Model.fit(X_train, y_train)

In [None]:
sub_test = test.drop(['RefId'], axis = 1)
sub_test_pred = Model.predict(sub_test).astype(int)
AllSub = pd.DataFrame({ 'RefId': test['RefId'],
                       'IsBadBuy' : sub_test_pred
    })

AllSub.to_csv("DGK_Pipeline_RF.csv", index = False)