# Data Train Functions

In [None]:
def build_pipeline():
    from sklearn.pipeline import Pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.preprocessing import QuantileTransformer
    from sklearn.preprocessing import StandardScaler

    pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy="median")),
        ('quantile_transformer', QuantileTransformer(output_distribution='normal')),
        ('std_scaler', StandardScaler()),
    ])
    
    return pipeline

In [None]:
def train_test_split(data, target):
    from sklearn.model_selection import train_test_split

    print('No Frauds', round(data['isFraud'].value_counts()[0]/len(data) * 100,2), '% of the dataset')
    print('Frauds', round(data['isFraud'].value_counts()[1]/len(data) * 100,2), '% of the dataset')
    
    features = data.drop(target, axis=1)
    targets = data[target]
    
    x_train, x_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, shuffle=True, stratify=targets)

    print("Number transactions x_train dataset: ", x_train.shape)
    print("Number transactions y_train dataset: ", y_train.shape)
    print("*" * 100)
    print("Number transactions x_test dataset: ", x_test.shape)
    print("Number transactions y_test dataset: ", y_test.shape)

    return x_train, x_test, y_train, y_test

In [None]:
def scale_pipeline(data, columns):
    pipeline = build_pipeline()

    features = data[columns]

    scaled_features = pipeline.fit_transform(features.values)
    
    data[columns] = scaled_features

    return data