***GENERATED CODE FOR whitespaceapp PIPELINE***
***DON'T EDIT THIS CODE***

**CONNECTOR FUNCTIONS TO READ DATA FROM DATABRICKS FILESYSTEM**

In [None]:
import datetime
import logging
import warnings
warnings.filterwarnings('ignore')
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)


class DBFSConnector:

    def fetch(spark, config):
        df = spark.read.\
            options(header='true' if eval(config)["is_header"] == "Use Header Line" else 'false',
                    inferschema='true',
                    delimiter=eval(config)["delimiter"])\
            .csv(eval(config)['url'])
        display(df.limit(2).toPandas())
        return df

    def put(df, spark, config):
        return df.write.format('csv').options(header='true' if eval(config)["is_header"] == "Use Header Line" else 'false',
                                              delimiter=eval(config)["delimiter"]).save(("%s %s") % (datetime.datetime.now().strftime("%Y-%m-%d %H.%M.%S")+"_", eval(config)['url']))


**TRANSFORMATIONS FUNCTIONS THAT WILL BE APPLIED ON DATA**

In [None]:
from pyspark.sql.functions import dayofmonth, month, year, col
import json
from pyspark.ml.feature import Binarizer
from pyspark.sql.functions import round
from pyspark.sql.types import IntegerType
from pyspark.ml.feature import StringIndexer
from pyspark.sql.functions import col, when
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import mean, stddev, min, max, col


class CleanseData:
    # def __init__(self,df):
    #     #print()

    def replaceByMean(self, feature, df, mean_=-1):

        meanValue = df.select(mean(col(feature.name)).alias(
            'mean')).collect()[0]["mean"]
        df.fillna(meanValue, subset=[feature.name])
        df.withColumn(feature.name, when(col(feature.name) == " ",
                                         meanValue).otherwise(col(feature.name).cast("Integer")))
        return df

    def replaceByMax(self, feature, df, max_=-1):
        maxValue = df.select(max(col(feature.name)).alias('max')).collect()[
            0]["max"]
        df.fillna(maxValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", maxValue).otherwise(col(feature.name)))
        return df

    def replaceByMin(self, feature, df, min_=-1):
        minValue = df.select(min(col(feature.name)).alias('min')).collect()[
            0]["min"]
        df.fillna(minValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", minValue).otherwise(col(feature.name)))
        return df

    def replaceByStandardDeviation(self, feature, df, stddev_=-1):
        stddevValue = df.select(stddev(col(feature.name)).alias(
            'stddev')).collect()[0]["stddev"]
        df.fillna(stddevValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", stddevValue).otherwise(col(feature.name)))
        return df

    def replaceDateRandomly(self, feature, df):
        fillValue = df.where(col(feature.name).isNotNull()
                             ).head(1)[0][feature.name]
        df.fillna(str(fillValue), subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", fillValue).otherwise(col(feature.name)))
        # print("CleanseData:replaceDateRandomly Schema : ", df.#printSchema())
        return df

    def replaceNullValues(self, fList, df):
        featuresList = df.schema.fields
        for featureObj in fList:
            for feat in featuresList:
                if featureObj["feature"] in feat.name:
                    featureName = feat
                    if "mean" in featureObj["replaceby"]:
                        df = self.replaceByMean(featureName, df)
                    elif "max" in featureObj["replaceby"]:
                        df = self.replaceByMax(featureName, df)
                    elif "min" in featureObj["replaceby"]:
                        df = self.replaceByMin(featureName, df)
                    elif "stddev" in featureObj["replaceby"]:
                        df = self.replaceByStandardDeviation(featureName, df)
                    elif "random" in featureObj["replaceby"]:
                        df = self.replaceDateRandomly(featureName, df)
        return df


def StringIndexerTransform(df, params, transformationData={}):
    dfReturn = df
    feature = params["feature"]

    dfReturn = dfReturn.fillna({feature: ''})
    outcol = feature + "_stringindexer"
    indexer = StringIndexer(
        inputCol=feature, outputCol=outcol, handleInvalid="skip")
    indexed = indexer.fit(dfReturn).transform(dfReturn)
    dfReturn = indexed
    distinct_values_list = dfReturn.select(
        outcol).distinct().rdd.map(lambda r: r[0]).collect()
    len_distinct_values_list = len(distinct_values_list)
    if len_distinct_values_list <= 4:
        changed_type_df = dfReturn.withColumn(
            outcol, dfReturn[outcol].cast(IntegerType()))
        return changed_type_df
    return dfReturn


def ExtractDateTransform(df, params, transformationData={}):
    transform_params = params
    dfReturn = df
    feature = transform_params['feature']
    dfReturn = dfReturn.fillna({feature: ''})
    dfReturn = dfReturn.withColumn(
        feature+'dayofmonth', dayofmonth(col(feature)))
    dfReturn = dfReturn.withColumn(feature+'month', month(col(feature)))
    dfReturn = dfReturn.withColumn(feature+'year', year(col(feature)))
    return dfReturn


def BinarizerTransform(df, params, transformationData={}):
    dfReturn = df
    transform_params = params
    feature = transform_params['feature']
    outcol = feature + "_binarizer"
    dfReturn = dfReturn.withColumn("feature_cast", dfReturn[feature].cast("double")).drop(feature)\
        .withColumnRenamed("feature_cast", feature)

    dfReturn = dfReturn.fillna({feature: 0.0})
    binarizer = Binarizer(threshold=float(
        transformationData['threshold']), inputCol=feature, outputCol=outcol)
    binarizedDataFrame = binarizer.transform(dfReturn)

    # binarizedDataFrame=binarizedDataFrame.drop(feature).withColumnRenamed(outcol,feature)

    dfReturn = binarizedDataFrame
    dfReturn = dfReturn.withColumn(feature, round(dfReturn[feature], 2))

    return dfReturn


class TransformationMain:
    # TODO: change df argument in run with following
    def run(transformationDF, config):
        configObj = json.loads(config)
        featureData = configObj["FE"]
        transformationDF = CleanseData().replaceNullValues(featureData, transformationDF)
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ChargeDetailID0', 'transformation_label': 'String Indexer'}], 'feature': 'ChargeDetailID0', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
                                                  'count': '1008', 'mean': '', 'stddev': '', 'min': '00edf044-8e73-4dbf-8656-465f2d1d46dd', 'max': 'ffb7ca3c-8cc3-49c7-b1e3-baf72a94e530', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'ChargeDetailID0', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ChargeDetailID0')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'CPTModifier', 'transformation_label': 'String Indexer'}], 'feature': 'CPTModifier', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '25.16', 'stddev': '4.75', 'min': '24', 'max': 'XU', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'CPTModifier', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('CPTModifier')
        transformationDF = ExtractDateTransform(transformationDF, {'transformationsData': [{'feature_label': 'DateOfService', 'transformation_label': 'Extract Date'}], 'feature': 'DateOfService', 'type': 'date', 'selected': 'True', 'replaceby': 'random', 'stats': {
            'count': '', 'mean': '', 'stddev': '', 'min': '', 'max': '', 'missing': '0'}, 'transformation': [{'transformation': 'Extract Date', 'selectedAsDefault': 1}], 'generated': 'True'}, {'feature_label': 'DateOfService', 'transformation_label': 'Extract Date'})
        transformationDF = transformationDF.drop('DateOfService')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'RVUModifier', 'transformation_label': 'String Indexer'}], 'feature': 'RVUModifier', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '26.0', 'stddev': '0.0', 'min': '26', 'max': 'TC', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'RVUModifier', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('RVUModifier')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Gender', 'transformation_label': 'String Indexer'}], 'feature': 'Gender', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'F', 'max': 'M', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Gender', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Gender')
        transformationDF = ExtractDateTransform(transformationDF, {'transformationsData': [{'feature_label': 'DateOfBirth', 'transformation_label': 'Extract Date'}], 'feature': 'DateOfBirth', 'type': 'date', 'selected': 'True', 'replaceby': 'random', 'stats': {
            'count': '', 'mean': '', 'stddev': '', 'min': '', 'max': '', 'missing': '0'}, 'transformation': [{'transformation': 'Extract Date', 'selectedAsDefault': 1}], 'generated': 'True'}, {'feature_label': 'DateOfBirth', 'transformation_label': 'Extract Date'})
        transformationDF = transformationDF.drop('DateOfBirth')
        transformationDF = ExtractDateTransform(transformationDF, {'transformationsData': [{'feature_label': 'LoadDate16', 'transformation_label': 'Extract Date'}], 'feature': 'LoadDate16', 'type': 'date', 'selected': 'True', 'replaceby': 'random', 'stats': {
            'count': '', 'mean': '', 'stddev': '', 'min': '', 'max': '', 'missing': '0'}, 'transformation': [{'transformation': 'Extract Date', 'selectedAsDefault': 1}], 'generated': 'True'}, {'feature_label': 'LoadDate16', 'transformation_label': 'Extract Date'})
        transformationDF = transformationDF.drop('LoadDate16')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PatientID', 'transformation_label': 'String Indexer'}], 'feature': 'PatientID', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': '001c9b3b-f212-47a5-b233-a5586ca1f709', 'max': 'fff9493b-ea16-4596-9968-4840dbc60f65', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'PatientID', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('PatientID')
        transformationDF = BinarizerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PatientNumber', 'threshold': 212407.8998015873, 'transformation_label': 'Binarizer'}], 'feature': 'PatientNumber', 'type': 'real', 'selected': 'True', 'replaceby': 'mean', 'stats': {
            'count': '1008', 'mean': '212407.9', 'stddev': '150350.07', 'min': '45.0', 'max': '472447.0', 'missing': '0'}, 'transformation': [{'transformation': 'Binarizer', 'selectedAsDefault': 1}]}, {'feature_label': 'PatientNumber', 'threshold': 212407.8998015873, 'transformation_label': 'Binarizer'})
        transformationDF = transformationDF.drop('PatientNumber')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PayerName', 'transformation_label': 'String Indexer'}], 'feature': 'PayerName', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'Aetna', 'max': 'WorkersComp', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'PayerName', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('PayerName')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PayerCategory', 'transformation_label': 'String Indexer'}], 'feature': 'PayerCategory', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'Aetna', 'max': 'WorkComp', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'PayerCategory', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('PayerCategory')
        transformationDF = ExtractDateTransform(transformationDF, {'transformationsData': [{'feature_label': 'LoadDate27', 'transformation_label': 'Extract Date'}], 'feature': 'LoadDate27', 'type': 'date', 'selected': 'True', 'replaceby': 'random', 'stats': {
            'count': '', 'mean': '', 'stddev': '', 'min': '', 'max': '', 'missing': '0'}, 'transformation': [{'transformation': 'Extract Date', 'selectedAsDefault': 1}], 'generated': 'True'}, {'feature_label': 'LoadDate27', 'transformation_label': 'Extract Date'})
        transformationDF = transformationDF.drop('LoadDate27')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PayerID30', 'transformation_label': 'String Indexer'}], 'feature': 'PayerID30', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': '0147e0b3-7731-485b-85be-38eaa94ceb8f', 'max': 'ff35917b-58d1-495c-90b6-a8eb2a1ad197', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'PayerID30', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('PayerID30')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PayerGroup', 'transformation_label': 'String Indexer'}], 'feature': 'PayerGroup', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'Aetna', 'max': 'OtherGovernment', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'PayerGroup', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('PayerGroup')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PayerSubGroup1', 'transformation_label': 'String Indexer'}], 'feature': 'PayerSubGroup1', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'Aetna', 'max': 'WorkerComp', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'PayerSubGroup1', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('PayerSubGroup1')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PayerSubGroup2', 'transformation_label': 'String Indexer'}], 'feature': 'PayerSubGroup2', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'Aetna', 'max': 'WorkerComp', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'PayerSubGroup2', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('PayerSubGroup2')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Firstname38', 'transformation_label': 'String Indexer'}], 'feature': 'Firstname38', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'Agnieszka', 'max': 'Yu', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Firstname38', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Firstname38')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Lastname40', 'transformation_label': 'String Indexer'}], 'feature': 'Lastname40', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'Albert', 'max': 'Zuniga', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Lastname40', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Lastname40')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ProviderCategory', 'transformation_label': 'String Indexer'}], 'feature': 'ProviderCategory', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'Albert,Audra', 'max': 'Zuniga,Dorcas', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'ProviderCategory', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ProviderCategory')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ProviderName', 'transformation_label': 'String Indexer'}], 'feature': 'ProviderName', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'Albert,Audra-8F576202-A3BD-4943-9BA1-AA523E353890', 'max': 'Zuniga,Dorcas-E217C7D0-130E-47B6-AC58-59C99E52E0F6', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'ProviderName', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ProviderName')
        transformationDF = ExtractDateTransform(transformationDF, {'transformationsData': [{'feature_label': 'LoadDate46', 'transformation_label': 'Extract Date'}], 'feature': 'LoadDate46', 'type': 'date', 'selected': 'True', 'replaceby': 'random', 'stats': {
            'count': '', 'mean': '', 'stddev': '', 'min': '', 'max': '', 'missing': '0'}, 'transformation': [{'transformation': 'Extract Date', 'selectedAsDefault': 1}], 'generated': 'True'}, {'feature_label': 'LoadDate46', 'transformation_label': 'Extract Date'})
        transformationDF = transformationDF.drop('LoadDate46')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ProviderID50', 'transformation_label': 'String Indexer'}], 'feature': 'ProviderID50', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': '001a87b0-d3ae-430a-aa10-66030913021d', 'max': 'fd58dece-1057-46a6-827d-43b0eb577036', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'ProviderID50', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ProviderID50')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ProviderGroup1', 'transformation_label': 'String Indexer'}], 'feature': 'ProviderGroup1', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'DenbighFamilyMedicine', 'max': 'Unspecified', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'ProviderGroup1', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ProviderGroup1')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ProviderGroup2', 'transformation_label': 'String Indexer'}], 'feature': 'ProviderGroup2', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'Unspecified', 'max': 'Unspecified', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'ProviderGroup2', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ProviderGroup2')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'TransactionID', 'transformation_label': 'String Indexer'}], 'feature': 'TransactionID', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': '00a9b297-cf24-447c-b4e1-b82507ebad33', 'max': 'fead2770-918c-422c-99bd-097edd39fde8', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'TransactionID', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('TransactionID')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ChargeDetailID56', 'transformation_label': 'String Indexer'}], 'feature': 'ChargeDetailID56', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': '00edf044-8e73-4dbf-8656-465f2d1d46dd', 'max': 'ffb7ca3c-8cc3-49c7-b1e3-baf72a94e530', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'ChargeDetailID56', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ChargeDetailID56')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'SourceKey58', 'transformation_label': 'String Indexer'}], 'feature': 'SourceKey58', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'CO109', 'max': 'PR33', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'SourceKey58', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('SourceKey58')
        transformationDF = ExtractDateTransform(transformationDF, {'transformationsData': [{'feature_label': 'ReasonDate', 'transformation_label': 'Extract Date'}], 'feature': 'ReasonDate', 'type': 'date', 'selected': 'True', 'replaceby': 'random', 'stats': {
            'count': '', 'mean': '', 'stddev': '', 'min': '', 'max': '', 'missing': '0'}, 'transformation': [{'transformation': 'Extract Date', 'selectedAsDefault': 1}], 'generated': 'True'}, {'feature_label': 'ReasonDate', 'transformation_label': 'Extract Date'})
        transformationDF = transformationDF.drop('ReasonDate')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ProviderID62', 'transformation_label': 'String Indexer'}], 'feature': 'ProviderID62', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': '001a87b0-d3ae-430a-aa10-66030913021d', 'max': 'fd58dece-1057-46a6-827d-43b0eb577036', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'ProviderID62', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ProviderID62')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'FacilityID63', 'transformation_label': 'String Indexer'}], 'feature': 'FacilityID63', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': '087613d0-c7f3-4bcd-900a-abbc6ee3bdad', 'max': 'fde71f17-372e-4cde-95a7-d8b9b45a015a', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'FacilityID63', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('FacilityID63')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PayerID64', 'transformation_label': 'String Indexer'}], 'feature': 'PayerID64', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': '0147e0b3-7731-485b-85be-38eaa94ceb8f', 'max': 'ff35917b-58d1-495c-90b6-a8eb2a1ad197', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'PayerID64', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('PayerID64')
        transformationDF = ExtractDateTransform(transformationDF, {'transformationsData': [{'feature_label': 'LoadDate65', 'transformation_label': 'Extract Date'}], 'feature': 'LoadDate65', 'type': 'date', 'selected': 'True', 'replaceby': 'random', 'stats': {
            'count': '', 'mean': '', 'stddev': '', 'min': '', 'max': '', 'missing': '0'}, 'transformation': [{'transformation': 'Extract Date', 'selectedAsDefault': 1}], 'generated': 'True'}, {'feature_label': 'LoadDate65', 'transformation_label': 'Extract Date'})
        transformationDF = transformationDF.drop('LoadDate65')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'BillID', 'transformation_label': 'String Indexer'}], 'feature': 'BillID', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': '0072ed45-ac4a-478c-8a82-9c805eb91de1', 'max': 'fff7f4bf-6bf9-462c-bcc1-177e2e37f1b4', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'BillID', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('BillID')
        transformationDF = ExtractDateTransform(transformationDF, {'transformationsData': [{'feature_label': 'BillDate', 'transformation_label': 'Extract Date'}], 'feature': 'BillDate', 'type': 'date', 'selected': 'True', 'replaceby': 'random', 'stats': {
            'count': '', 'mean': '', 'stddev': '', 'min': '', 'max': '', 'missing': '0'}, 'transformation': [{'transformation': 'Extract Date', 'selectedAsDefault': 1}], 'generated': 'True'}, {'feature_label': 'BillDate', 'transformation_label': 'Extract Date'})
        transformationDF = transformationDF.drop('BillDate')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'DepartmentID', 'transformation_label': 'String Indexer'}], 'feature': 'DepartmentID', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': '0a414089-7b3a-4378-ba12-23a2d7272bcf', 'max': 'fb68b47d-bad4-42c2-9ad0-9816864ea30a', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'DepartmentID', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('DepartmentID')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ReasonCode', 'transformation_label': 'String Indexer'}], 'feature': 'ReasonCode', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '60.37', 'stddev': '79.08', 'min': '1', 'max': 'N807', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'ReasonCode', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ReasonCode')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ReasonCategory', 'transformation_label': 'String Indexer'}], 'feature': 'ReasonCategory', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'Authorization', 'max': 'TimelyFiling', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'ReasonCategory', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ReasonCategory')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ReasonDescription', 'transformation_label': 'String Indexer'}], 'feature': 'ReasonDescription', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {'count': '1008', 'mean': '', 'stddev': '', 'min': 'Alert:Consultourcontractualagreementforrestrictions/billing/paymentinformationrelatedtothesecharges.',
                                                                                                                                                                                                                                                                                    'max': 'Yourclaimcontainsincompleteand/orinvalidinformation,andnoappealrightsareaffordedbecausetheclaimisunprocessable.Pleasesubmitanewclaimwiththecomplete/correctinformation.', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'ReasonDescription', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ReasonDescription')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ReasonType', 'transformation_label': 'String Indexer'}], 'feature': 'ReasonType', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '1008', 'mean': '', 'stddev': '', 'min': 'Co-Ins/Deduct', 'max': 'SPAY-PatientResponsibility', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'ReasonType', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ReasonType')
        transformationDF = ExtractDateTransform(transformationDF, {'transformationsData': [{'feature_label': 'LoadDate77', 'transformation_label': 'Extract Date'}], 'feature': 'LoadDate77', 'type': 'date', 'selected': 'True', 'replaceby': 'random', 'stats': {
            'count': '', 'mean': '', 'stddev': '', 'min': '', 'max': '', 'missing': '0'}, 'transformation': [{'transformation': 'Extract Date', 'selectedAsDefault': 1}], 'generated': 'True'}, {'feature_label': 'LoadDate77', 'transformation_label': 'Extract Date'})
        transformationDF = transformationDF.drop('LoadDate77')
        display(transformationDF.limit(2).toPandas())
        return transformationDF


**AUTOML FUNCTIONS**

In [None]:
from tpot import TPOTClassifier
from sklearn.model_selection import train_test_split
import pyspark


def functionClassification(sparkDF, listOfFeatures, label):
    sparkDF.persist(pyspark.StorageLevel.MEMORY_AND_DISK)
    df = sparkDF.toPandas()
    X = df.drop(label, axis=1).values
    y = df[label].values
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=1, test_size=0.1)
    tpotModel = TPOTClassifier(verbosity=3, n_jobs=-1, generations=10, max_time_mins=5,
                               population_size=15)
    tpotModel.fit(X_train, y_train)
    display(" Accuracy of Model : %s" % tpotModel.score(X_test, y_test))
    data = {'model': tpotModel,
            'X_test': X_test,
            'y_test': y_test,
            'label': label,
            'columnNames': sparkDF.columns}
    return data


**READING DATAFRAME**

In [None]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

%run whitespaceappHooks.ipynb
try:
	sourcePreExecutionHook()

	whitespaceappdbfs = DBFSConnector.fetch(spark, "{'url': '/Whitespace/whitespace.csv', 'file_type': 'Delimeted', 'delimiter': ',', 'is_header': 'Use Header Line'}")

	sourcePostExecutionHook(whitespaceappdbfs)

except Exception as ex: 
	logging.error(ex)


**TRANSFORMING DATAFRAME**

In [None]:
%run whitespaceappHooks.ipynb
try:
	transformationPreExecutionHook()

	whitespaceappautofe = TransformationMain.run(whitespaceappdbfs,json.dumps( {"FE": [{"transformationsData": [{"feature_label": "ChargeDetailID0", "transformation_label": "String Indexer"}], "feature": "ChargeDetailID0", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "00edf044-8e73-4dbf-8656-465f2d1d46dd", "max": "ffb7ca3c-8cc3-49c7-b1e3-baf72a94e530", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Amount1", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "1008", "mean": "251.84", "stddev": "421.28", "min": "4.0", "max": "5154.0", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Amount2", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "1008", "mean": "109.93", "stddev": "323.94", "min": "-566.3", "max": "5154.0", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "CPTCodeID3", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "1806.87", "stddev": "1195.58", "min": "1", "max": "4009", "missing": "0"}}, {"transformationsData": [{"feature_label": "CPTModifier", "transformation_label": "String Indexer"}], "feature": "CPTModifier", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "25.16", "stddev": "4.75", "min": "24", "max": "XU", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "DiagnosisCodeID2", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "29632.87", "stddev": "22507.73", "min": "13501", "max": "94911", "missing": "0"}}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "DiagnosisCodeID1", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "38259.46", "stddev": "20787.49", "min": "13573", "max": "94813", "missing": "0"}}, {"transformationsData": [{"feature_label": "DateOfService", "transformation_label": "Extract Date"}], "feature": "DateOfService", "type": "date", "selected": "True", "replaceby": "random", "stats": {"count": "", "mean": "", "stddev": "", "min": "", "max": "", "missing": "0"}, "transformation": [{"transformation": "Extract Date", "selectedAsDefault": 1}], "generated": "True"}, {"transformationsData": [{"feature_label": "RVUModifier", "transformation_label": "String Indexer"}], "feature": "RVUModifier", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "26.0", "stddev": "0.0", "min": "26", "max": "TC", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Gender", "transformation_label": "String Indexer"}], "feature": "Gender", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "F", "max": "M", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "DateOfBirth", "transformation_label": "Extract Date"}], "feature": "DateOfBirth", "type": "date", "selected": "True", "replaceby": "random", "stats": {"count": "", "mean": "", "stddev": "", "min": "", "max": "", "missing": "0"}, "transformation": [{"transformation": "Extract Date", "selectedAsDefault": 1}], "generated": "True"}, {"transformationsData": [{"feature_label": "LoadDate16", "transformation_label": "Extract Date"}], "feature": "LoadDate16", "type": "date", "selected": "True", "replaceby": "random", "stats": {"count": "", "mean": "", "stddev": "", "min": "", "max": "", "missing": "0"}, "transformation": [{"transformation": "Extract Date", "selectedAsDefault": 1}], "generated": "True"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "EnterpriseID18", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "1.01", "stddev": "0.08", "min": "1", "max": "2", "missing": "0"}}, {"transformationsData": [{"feature_label": "PatientID", "transformation_label": "String Indexer"}], "feature": "PatientID", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "001c9b3b-f212-47a5-b233-a5586ca1f709", "max": "fff9493b-ea16-4596-9968-4840dbc60f65", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "PatientNumber", "threshold": 212407.8998015873, "transformation_label": "Binarizer"}], "feature": "PatientNumber", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "1008", "mean": "212407.9", "stddev": "150350.07", "min": "45.0", "max": "472447.0", "missing": "0"}, "transformation": [{"transformation": "Binarizer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "PayerName", "transformation_label": "String Indexer"}], "feature": "PayerName", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "Aetna", "max": "WorkersComp", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "PayerCategory", "transformation_label": "String Indexer"}], "feature": "PayerCategory", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "Aetna", "max": "WorkComp", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "IsActive26", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "1.0", "stddev": "0.0", "min": "1", "max": "1", "missing": "0"}}, {"transformationsData": [{"feature_label": "LoadDate27", "transformation_label": "Extract Date"}], "feature": "LoadDate27", "type": "date", "selected": "True", "replaceby": "random", "stats": {"count": "", "mean": "", "stddev": "", "min": "", "max": "", "missing": "0"}, "transformation": [{"transformation": "Extract Date", "selectedAsDefault": 1}], "generated": "True"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "SourceSystemID28", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "1.0", "stddev": "0.0", "min": "1", "max": "1", "missing": "0"}}, {"transformationsData": [{"feature_label": "PayerID30", "transformation_label": "String Indexer"}], "feature": "PayerID30", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "0147e0b3-7731-485b-85be-38eaa94ceb8f", "max": "ff35917b-58d1-495c-90b6-a8eb2a1ad197", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "PayerGroup", "transformation_label": "String Indexer"}], "feature": "PayerGroup", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "Aetna", "max": "OtherGovernment", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "PayerSubGroup1", "transformation_label": "String Indexer"}], "feature": "PayerSubGroup1", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "Aetna", "max": "WorkerComp", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "PayerSubGroup2", "transformation_label": "String Indexer"}], "feature": "PayerSubGroup2", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "Aetna", "max": "WorkerComp", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Firstname38", "transformation_label": "String Indexer"}], "feature": "Firstname38", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "Agnieszka", "max": "Yu", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Lastname40", "transformation_label": "String Indexer"}], "feature": "Lastname40", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "Albert", "max": "Zuniga", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "SpecialityID", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "35.4", "stddev": "29.79", "min": "-1", "max": "120", "missing": "0"}}, {"transformationsData": [{"feature_label": "ProviderCategory", "transformation_label": "String Indexer"}], "feature": "ProviderCategory", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "Albert,Audra", "max": "Zuniga,Dorcas", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "IsActive44", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "1.0", "stddev": "0.0", "min": "1", "max": "1", "missing": "0"}}, {"transformationsData": [{"feature_label": "ProviderName", "transformation_label": "String Indexer"}], "feature": "ProviderName", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "Albert,Audra-8F576202-A3BD-4943-9BA1-AA523E353890", "max": "Zuniga,Dorcas-E217C7D0-130E-47B6-AC58-59C99E52E0F6", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "LoadDate46", "transformation_label": "Extract Date"}], "feature": "LoadDate46", "type": "date", "selected": "True", "replaceby": "random", "stats": {"count": "", "mean": "", "stddev": "", "min": "", "max": "", "missing": "0"}, "transformation": [{"transformation": "Extract Date", "selectedAsDefault": 1}], "generated": "True"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "SourceSystemID47", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "1.0", "stddev": "0.0", "min": "1", "max": "1", "missing": "0"}}, {"transformationsData": [{"feature_label": "ProviderID50", "transformation_label": "String Indexer"}], "feature": "ProviderID50", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "001a87b0-d3ae-430a-aa10-66030913021d", "max": "fd58dece-1057-46a6-827d-43b0eb577036", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "TaxonomyID", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "360.52", "stddev": "173.8", "min": "-1", "max": "770", "missing": "0"}}, {"transformationsData": [{"feature_label": "ProviderGroup1", "transformation_label": "String Indexer"}], "feature": "ProviderGroup1", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "DenbighFamilyMedicine", "max": "Unspecified", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "ProviderGroup2", "transformation_label": "String Indexer"}], "feature": "ProviderGroup2", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "Unspecified", "max": "Unspecified", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "TransactionID", "transformation_label": "String Indexer"}], "feature": "TransactionID", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "00a9b297-cf24-447c-b4e1-b82507ebad33", "max": "fead2770-918c-422c-99bd-097edd39fde8", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "ChargeDetailID56", "transformation_label": "String Indexer"}], "feature": "ChargeDetailID56", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "00edf044-8e73-4dbf-8656-465f2d1d46dd", "max": "ffb7ca3c-8cc3-49c7-b1e3-baf72a94e530", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "TransactionReasonCodeID57", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "1127.96", "stddev": "551.89", "min": "1", "max": "3204", "missing": "0"}}, {"transformationsData": [{"feature_label": "SourceKey58", "transformation_label": "String Indexer"}], "feature": "SourceKey58", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "CO109", "max": "PR33", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Amount59", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "1008", "mean": "251.84", "stddev": "421.28", "min": "4.0", "max": "5154.0", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "ReasonDate", "transformation_label": "Extract Date"}], "feature": "ReasonDate", "type": "date", "selected": "True", "replaceby": "random", "stats": {"count": "", "mean": "", "stddev": "", "min": "", "max": "", "missing": "0"}, "transformation": [{"transformation": "Extract Date", "selectedAsDefault": 1}], "generated": "True"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "PracticeID", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "1.01", "stddev": "0.08", "min": "1", "max": "2", "missing": "0"}}, {"transformationsData": [{"feature_label": "ProviderID62", "transformation_label": "String Indexer"}], "feature": "ProviderID62", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "001a87b0-d3ae-430a-aa10-66030913021d", "max": "fd58dece-1057-46a6-827d-43b0eb577036", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "FacilityID63", "transformation_label": "String Indexer"}], "feature": "FacilityID63", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "087613d0-c7f3-4bcd-900a-abbc6ee3bdad", "max": "fde71f17-372e-4cde-95a7-d8b9b45a015a", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "PayerID64", "transformation_label": "String Indexer"}], "feature": "PayerID64", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "0147e0b3-7731-485b-85be-38eaa94ceb8f", "max": "ff35917b-58d1-495c-90b6-a8eb2a1ad197", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "LoadDate65", "transformation_label": "Extract Date"}], "feature": "LoadDate65", "type": "date", "selected": "True", "replaceby": "random", "stats": {"count": "", "mean": "", "stddev": "", "min": "", "max": "", "missing": "0"}, "transformation": [{"transformation": "Extract Date", "selectedAsDefault": 1}], "generated": "True"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "CPTCodeID66", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "1806.87", "stddev": "1195.58", "min": "1", "max": "4009", "missing": "0"}}, {"transformationsData": [{"feature_label": "BillID", "transformation_label": "String Indexer"}], "feature": "BillID", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "0072ed45-ac4a-478c-8a82-9c805eb91de1", "max": "fff7f4bf-6bf9-462c-bcc1-177e2e37f1b4", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "BillDate", "transformation_label": "Extract Date"}], "feature": "BillDate", "type": "date", "selected": "True", "replaceby": "random", "stats": {"count": "", "mean": "", "stddev": "", "min": "", "max": "", "missing": "0"}, "transformation": [{"transformation": "Extract Date", "selectedAsDefault": 1}], "generated": "True"}, {"transformationsData": [{"feature_label": "DepartmentID", "transformation_label": "String Indexer"}], "feature": "DepartmentID", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "0a414089-7b3a-4378-ba12-23a2d7272bcf", "max": "fb68b47d-bad4-42c2-9ad0-9816864ea30a", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "TransactionReasonCodeID70", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "1127.96", "stddev": "551.89", "min": "1", "max": "3204", "missing": "0"}}, {"transformationsData": [{"feature_label": "ReasonCode", "transformation_label": "String Indexer"}], "feature": "ReasonCode", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "60.37", "stddev": "79.08", "min": "1", "max": "N807", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "ReasonCategory", "transformation_label": "String Indexer"}], "feature": "ReasonCategory", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "Authorization", "max": "TimelyFiling", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "ReasonDescription", "transformation_label": "String Indexer"}], "feature": "ReasonDescription", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "Alert:Consultourcontractualagreementforrestrictions/billing/paymentinformationrelatedtothesecharges.", "max": "Yourclaimcontainsincompleteand/orinvalidinformation,andnoappealrightsareaffordedbecausetheclaimisunprocessable.Pleasesubmitanewclaimwiththecomplete/correctinformation.", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "ReasonType", "transformation_label": "String Indexer"}], "feature": "ReasonType", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "1008", "mean": "", "stddev": "", "min": "Co-Ins/Deduct", "max": "SPAY-PatientResponsibility", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "isActive76", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "0.99", "stddev": "0.1", "min": "0", "max": "1", "missing": "0"}}, {"transformationsData": [{"feature_label": "LoadDate77", "transformation_label": "Extract Date"}], "feature": "LoadDate77", "type": "date", "selected": "True", "replaceby": "random", "stats": {"count": "", "mean": "", "stddev": "", "min": "", "max": "", "missing": "0"}, "transformation": [{"transformation": "Extract Date", "selectedAsDefault": 1}], "generated": "True"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "status", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "1008", "mean": "0.52", "stddev": "0.5", "min": "0", "max": "1", "missing": "0"}}, {"feature": "ChargeDetailID0_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "345.78", "stddev": "252.58", "min": "0.0", "max": "819.0", "missing": "0"}}, {"feature": "CPTModifier_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "1.39", "stddev": "2.73", "min": "0.0", "max": "23.0", "missing": "0"}}, {"feature": "DateOfService_dayofmonth", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "15.39", "stddev": "12.45", "min": "1", "max": "31", "missing": "0"}}, {"feature": "DateOfService_month", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "7.15", "stddev": "5.41", "min": "1", "max": "12", "missing": "0"}}, {"feature": "DateOfService_year", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "2019.43", "stddev": "0.5", "min": "2018", "max": "2020", "missing": "0"}}, {"feature": "RVUModifier_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "1008", "mean": "0.0", "stddev": "0.03", "min": "0", "max": "1", "missing": "0"}}, {"feature": "Gender_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "1008", "mean": "0.39", "stddev": "0.49", "min": "0", "max": "1", "missing": "0"}}, {"feature": "DateOfBirth_dayofmonth", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "15.55", "stddev": "8.64", "min": "1", "max": "31", "missing": "0"}}, {"feature": "DateOfBirth_month", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "6.87", "stddev": "3.37", "min": "1", "max": "12", "missing": "0"}}, {"feature": "DateOfBirth_year", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "1955.74", "stddev": "18.34", "min": "1922", "max": "2019", "missing": "0"}}, {"feature": "LoadDate16_dayofmonth", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "3.04", "stddev": "0.44", "min": "3", "max": "8", "missing": "0"}}, {"feature": "LoadDate16_month", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "1.0", "stddev": "0.0", "min": "1", "max": "1", "missing": "0"}}, {"feature": "LoadDate16_year", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "2020.0", "stddev": "0.0", "min": "2020", "max": "2020", "missing": "0"}}, {"feature": "PatientID_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "253.21", "stddev": "210.04", "min": "0.0", "max": "683.0", "missing": "0"}}, {"feature": "PatientNumber_binarizer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "0.5", "stddev": "0.5", "min": "0.0", "max": "1.0", "missing": "0"}}, {"feature": "PayerName_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "4.17", "stddev": "7.76", "min": "0.0", "max": "50.0", "missing": "0"}}, {"feature": "PayerCategory_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "2.26", "stddev": "3.3", "min": "0.0", "max": "18.0", "missing": "0"}}, {"feature": "LoadDate27_dayofmonth", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "3.01", "stddev": "0.27", "min": "3", "max": "8", "missing": "0"}}, {"feature": "LoadDate27_month", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "1.0", "stddev": "0.0", "min": "1", "max": "1", "missing": "0"}}, {"feature": "LoadDate27_year", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "2020.0", "stddev": "0.0", "min": "2020", "max": "2020", "missing": "0"}}, {"feature": "PayerID30_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "4.29", "stddev": "8.08", "min": "0.0", "max": "52.0", "missing": "0"}}, {"feature": "PayerGroup_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "0.94", "stddev": "1.72", "min": "0.0", "max": "7.0", "missing": "0"}}, {"feature": "PayerSubGroup1_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "1.24", "stddev": "1.78", "min": "0.0", "max": "10.0", "missing": "0"}}, {"feature": "PayerSubGroup2_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "1.49", "stddev": "2.26", "min": "0.0", "max": "13.0", "missing": "0"}}, {"feature": "Firstname38_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "32.04", "stddev": "30.57", "min": "0.0", "max": "124.0", "missing": "0"}}, {"feature": "Lastname40_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "38.61", "stddev": "36.52", "min": "0.0", "max": "151.0", "missing": "0"}}, {"feature": "ProviderCategory_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "40.38", "stddev": "38.08", "min": "0.0", "max": "156.0", "missing": "0"}}, {"feature": "ProviderName_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "40.38", "stddev": "38.08", "min": "0.0", "max": "156.0", "missing": "0"}}, {"feature": "LoadDate46_dayofmonth", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "3.0", "stddev": "0.0", "min": "3", "max": "3", "missing": "0"}}, {"feature": "LoadDate46_month", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "1.0", "stddev": "0.0", "min": "1", "max": "1", "missing": "0"}}, {"feature": "LoadDate46_year", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "2020.0", "stddev": "0.0", "min": "2020", "max": "2020", "missing": "0"}}, {"feature": "ProviderID50_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "40.38", "stddev": "38.08", "min": "0.0", "max": "156.0", "missing": "0"}}, {"feature": "ProviderGroup1_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "1008", "mean": "0.02", "stddev": "0.14", "min": "0", "max": "1", "missing": "0"}}, {"feature": "ProviderGroup2_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "1008", "mean": "0.0", "stddev": "0.0", "min": "0", "max": "0", "missing": "0"}}, {"feature": "TransactionID_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "374.46", "stddev": "265.49", "min": "0.0", "max": "859.0", "missing": "0"}}, {"feature": "ChargeDetailID56_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "345.78", "stddev": "252.58", "min": "0.0", "max": "819.0", "missing": "0"}}, {"feature": "SourceKey58_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "7.1", "stddev": "11.74", "min": "0.0", "max": "69.0", "missing": "0"}}, {"feature": "ReasonDate_dayofmonth", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "17.91", "stddev": "7.65", "min": "1", "max": "31", "missing": "0"}}, {"feature": "ReasonDate_month", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "1.43", "stddev": "1.07", "min": "1", "max": "6", "missing": "0"}}, {"feature": "ReasonDate_year", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "2020.0", "stddev": "0.0", "min": "2020", "max": "2020", "missing": "0"}}, {"feature": "ProviderID62_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "40.38", "stddev": "38.08", "min": "0.0", "max": "156.0", "missing": "0"}}, {"feature": "FacilityID63_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "18.3", "stddev": "19.64", "min": "0.0", "max": "84.0", "missing": "0"}}, {"feature": "PayerID64_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "6.6", "stddev": "10.44", "min": "0.0", "max": "63.0", "missing": "0"}}, {"feature": "LoadDate65_dayofmonth", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "24.72", "stddev": "7.83", "min": "1", "max": "31", "missing": "0"}}, {"feature": "LoadDate65_month", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "2.68", "stddev": "1.05", "min": "1", "max": "6", "missing": "0"}}, {"feature": "LoadDate65_year", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "2020.0", "stddev": "0.0", "min": "2020", "max": "2020", "missing": "0"}}, {"feature": "BillID_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "276.4", "stddev": "220.88", "min": "0.0", "max": "718.0", "missing": "0"}}, {"feature": "BillDate_dayofmonth", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "3.05", "stddev": "3.08", "min": "1", "max": "31", "missing": "0"}}, {"feature": "BillDate_month", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "1.24", "stddev": "1.54", "min": "1", "max": "12", "missing": "0"}}, {"feature": "BillDate_year", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "2019.98", "stddev": "0.16", "min": "2019", "max": "2020", "missing": "0"}}, {"feature": "DepartmentID_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "2.07", "stddev": "3.36", "min": "0.0", "max": "23.0", "missing": "0"}}, {"feature": "ReasonCode_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "6.82", "stddev": "10.94", "min": "0.0", "max": "64.0", "missing": "0"}}, {"feature": "ReasonCategory_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "1.84", "stddev": "2.85", "min": "0.0", "max": "17.0", "missing": "0"}}, {"feature": "ReasonDescription_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "6.82", "stddev": "10.94", "min": "0.0", "max": "64.0", "missing": "0"}}, {"feature": "ReasonType_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "1008", "mean": "1.19", "stddev": "1.14", "min": "0.0", "max": "4.0", "missing": "0"}}, {"feature": "LoadDate77_dayofmonth", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "10.0", "stddev": "0.0", "min": "10", "max": "10", "missing": "0"}}, {"feature": "LoadDate77_month", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "1.0", "stddev": "0.0", "min": "1", "max": "1", "missing": "0"}}, {"feature": "LoadDate77_year", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "generated": "True", "selected": "True", "stats": {"count": "1008", "mean": "2020.0", "stddev": "0.0", "min": "2020", "max": "2020", "missing": "0"}}]}))

	transformationPostExecutionHook(whitespaceappautofe)

except Exception as ex: 
	logging.error(ex)


**TRAIN MODEL**

In [None]:
%run whitespaceappHooks.ipynb
try:
	mlPreExecutionHook()

	dataAutoML=functionClassification(whitespaceappautofe, ["Amount1", "Amount2", "CPTCodeID3", "DiagnosisCodeID2", "DiagnosisCodeID1", "EnterpriseID18", "IsActive26", "SourceSystemID28", "SpecialityID", "IsActive44", "SourceSystemID47", "TaxonomyID", "TransactionReasonCodeID57", "Amount59", "PracticeID", "CPTCodeID66", "TransactionReasonCodeID70", "isActive76", "ChargeDetailID0_stringindexer", "CPTModifier_stringindexer", "DateOfService_extractdate", "RVUModifier_stringindexer", "Gender_stringindexer", "DateOfBirth_extractdate", "LoadDate16_extractdate", "PatientID_stringindexer", "PatientNumber_binarizer", "PayerName_stringindexer", "PayerCategory_stringindexer", "LoadDate27_extractdate", "PayerID30_stringindexer", "PayerGroup_stringindexer", "PayerSubGroup1_stringindexer", "PayerSubGroup2_stringindexer", "Firstname38_stringindexer", "Lastname40_stringindexer", "ProviderCategory_stringindexer", "ProviderName_stringindexer", "LoadDate46_extractdate", "ProviderID50_stringindexer", "ProviderGroup1_stringindexer", "ProviderGroup2_stringindexer", "TransactionID_stringindexer", "ChargeDetailID56_stringindexer", "SourceKey58_stringindexer", "ReasonDate_extractdate", "ProviderID62_stringindexer", "FacilityID63_stringindexer", "PayerID64_stringindexer", "LoadDate65_extractdate", "BillID_stringindexer", "BillDate_extractdate", "DepartmentID_stringindexer", "ReasonCode_stringindexer", "ReasonCategory_stringindexer", "ReasonDescription_stringindexer", "ReasonType_stringindexer", "LoadDate77_extractdate"], "status")

	mlPostExecutionHook(dataAutoML)

except Exception as ex: 
	logging.error(ex)


**PREDICT ON TRAINED MODEL**

In [None]:
import pandas as pd
import numpy as np
import sklearn.metrics

try:
    model=dataAutoML['model']
    X_test=dataAutoML['X_test']
    y_test=dataAutoML['y_test']
    label=dataAutoML['label']
    columnNames=dataAutoML['columnNames']
    if label in columnNames:
        columnNames.remove(label)
    predicted=label+"_predicted"
    y_predicted=model.predict(X_test)
    df =pd.DataFrame(X_test , columns=columnNames)
    df[label]=y_test
    df[predicted]=y_predicted
    columnNames.insert(0,predicted)
    columnNames.insert(0,label)
    Accuracy = np.round((100 * sklearn.metrics.accuracy_score(y_true=y_test, y_pred=y_predicted)), 1)
    F1= np.round(
            (100 * sklearn.metrics.f1_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    Precision= np.round((
                100 * sklearn.metrics.precision_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    Recall = np.round((
                100 * sklearn.metrics.recall_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    display(" Accuracy of Prediction on test data    : %s"%Accuracy)
    display(" F1 score of Prediction on test data    : %s"%F1)
    display(" Precision of Prediction on test data   : %s"%Precision)
    display(" Recall of Prediction on test data      : %s"%Recall)
    display(df.head())
except Exception as ex:
    logging.error(ex)

