***GENERATED CODE FOR autofedataprep PIPELINE.***

***DON'T EDIT THIS CODE.***

***CONNECTOR FUNCTIONS TO READ DATA.***

In [None]:
import logging
import warnings
warnings.filterwarnings('ignore')
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)


class DBFSConnector:

    def fetch(spark, config):
        df = spark.read.\
            options(header='true' if eval(config)["is_header"] == "Use Header Line" else 'false',
                    inferschema='true',
                    delimiter=eval(config)["delimiter"])\
            .csv(eval(config)['url'])
        display(df.limit(2).toPandas())
        return df

    def put(df, path):
        df.to_csv("/dbfs" + eval(config)['url'], header=True, index=False)
        return True


***TRANSFORMATIONS FUNCTIONS THAT WILL BE APPLIED ON DATA***

In [None]:
import json
from pyspark.sql.types import IntegerType
from pyspark.ml.feature import StringIndexer
from pyspark.sql.functions import col, when
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import mean, stddev, min, max, col


class CleanseData:
    # def __init__(self,df):
    #     #print()

    def replaceByMean(self, feature, df, mean_=-1):

        meanValue = df.select(mean(col(feature.name)).alias(
            'mean')).collect()[0]["mean"]
        df.fillna(meanValue, subset=[feature.name])
        df.withColumn(feature.name, when(col(feature.name) == " ",
                                         meanValue).otherwise(col(feature.name).cast("Integer")))
        return df

    def replaceByMax(self, feature, df, max_=-1):
        maxValue = df.select(max(col(feature.name)).alias('max')).collect()[
            0]["max"]
        df.fillna(maxValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", maxValue).otherwise(col(feature.name)))
        return df

    def replaceByMin(self, feature, df, min_=-1):
        minValue = df.select(min(col(feature.name)).alias('min')).collect()[
            0]["min"]
        df.fillna(minValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", minValue).otherwise(col(feature.name)))
        return df

    def replaceByStandardDeviation(self, feature, df, stddev_=-1):
        stddevValue = df.select(stddev(col(feature.name)).alias(
            'stddev')).collect()[0]["stddev"]
        df.fillna(stddevValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", stddevValue).otherwise(col(feature.name)))
        return df

    def replaceDateRandomly(self, feature, df):
        fillValue = df.where(col(feature.name).isNotNull()
                             ).head(1)[0][feature.name]
        df.fillna(str(fillValue), subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", fillValue).otherwise(col(feature.name)))
        # print("CleanseData:replaceDateRandomly Schema : ", df.#printSchema())
        return df

    def replaceNullValues(self, fList, df):
        featuresList = df.schema.fields
        for featureObj in fList:
            for feat in featuresList:
                if featureObj["feature"] in feat.name:
                    featureName = feat
                    if "mean" in featureObj["replaceby"]:
                        df = self.replaceByMean(featureName, df)
                    elif "max" in featureObj["replaceby"]:
                        df = self.replaceByMax(featureName, df)
                    elif "min" in featureObj["replaceby"]:
                        df = self.replaceByMin(featureName, df)
                    elif "stddev" in featureObj["replaceby"]:
                        df = self.replaceByStandardDeviation(featureName, df)
                    elif "random" in featureObj["replaceby"]:
                        df = self.replaceDateRandomly(featureName, df)
        return df


def StringIndexerTransform(df, params, transformationData={}):
    dfReturn = df
    feature = params["feature"]

    dfReturn = dfReturn.fillna({feature: ''})
    outcol = feature + "_stringindexer"
    indexer = StringIndexer(
        inputCol=feature, outputCol=outcol, handleInvalid="skip")
    indexed = indexer.fit(dfReturn).transform(dfReturn)
    dfReturn = indexed
    distinct_values_list = dfReturn.select(
        outcol).distinct().rdd.map(lambda r: r[0]).collect()
    len_distinct_values_list = len(distinct_values_list)
    if len_distinct_values_list <= 4:
        changed_type_df = dfReturn.withColumn(
            outcol, dfReturn[outcol].cast(IntegerType()))
        return changed_type_df
    return dfReturn


class TransformationMain:
    # TODO: change df argument in run with following
    def run(transformationDF, config):
        configObj = json.loads(config)
        featureData = configObj["FE"]
        transformationDF = CleanseData().replaceNullValues(featureData, transformationDF)
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'State', 'transformation_label': 'String Indexer'}], 'feature': 'State', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
                                                  'count': '181', 'mean': '', 'stddev': '', 'min': 'AK', 'max': 'WY', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'State'}, {'feature_label': 'State', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('State')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Phone', 'transformation_label': 'String Indexer'}], 'feature': 'Phone', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '181', 'mean': '', 'stddev': '', 'min': '327-8732', 'max': '422-8344', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Phone'}, {'feature_label': 'Phone', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Phone')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intl_Plan', 'transformation_label': 'String Indexer'}], 'feature': 'Intl_Plan', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '181', 'mean': '', 'stddev': '', 'min': 'no', 'max': 'yes', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intl_Plan'}, {'feature_label': 'Intl_Plan', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intl_Plan')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'VMail_Plan', 'transformation_label': 'String Indexer'}], 'feature': 'VMail_Plan', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '181', 'mean': '', 'stddev': '', 'min': 'no', 'max': 'yes', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'VMail_Plan'}, {'feature_label': 'VMail_Plan', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('VMail_Plan')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'cluster_labels', 'transformation_label': 'String Indexer'}], 'feature': 'cluster_labels', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '181', 'mean': '', 'stddev': '', 'min': 'day_callers', 'max': 'vmailers', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'cluster_labels'}, {'feature_label': 'cluster_labels', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('cluster_labels')
        display(transformationDF.limit(2).toPandas())
        return transformationDF


***OPERATION FUNCTIONS***

In [None]:
import pyspark
from dask.dataframe import from_pandas
import json


def calculateFormula(df, functionsData, applyOn):
    dfcp = df
    for functionData in functionsData:
        if functionData['useNext']:
            if 'dataPrepColumn_Inprocess' in list(dfcp.columns):
                dfcp['dataPrepColumn_Inprocess'] = calculateEquation(
                    dfcp, functionData['operator'], functionData['dataPrepColumn_Inprocess'], functionData['operand2'])
            else:
                dfcp['dataPrepColumn_Inprocess'] = calculateEquation(
                    dfcp, functionData['operator'], functionData['operand1'], functionData['operand2'])
        else:
            if 'dataPrepColumn_Inprocess' in list(dfcp.columns):
                if functionData['outputColumn'] == "newColumn":
                    newColumnName = "New_" + \
                        functionData['operand1']+"_" + \
                        functionData['operator']+"_"+functionData['operand2']
                    dfcp[newColumnName] = calculateEquation(dfcp, functionData['operator'],
                                                            functionData['dataPrepColumn_Inprocess'], functionData['operand2'])
                else:
                    dfcp[functionData['outputColumn']] = calculateEquation(dfcp, functionData[
                        'operator'], functionData['dataPrepColumn_Inprocess'], functionData['operand2'])
                dfcp.drop('dataPrepColumn_Inprocess', axis=1)
            else:
                if functionData['outputColumn'] == "newColumn":
                    newColumnName = "New_" + \
                        functionData['operand1']+"_" + \
                        functionData['operator']+"_"+functionData['operand2']
                    dfcp[newColumnName] = calculateEquation(dfcp, functionData[
                        'operator'], functionData['operand1'], functionData['operand2'])
                else:
                    dfcp[functionData['outputColumn']] = calculateEquation(dfcp, functionData[
                        'operator'], functionData['operand1'], functionData['operand2'])
    return dfcp


def calculateEquation(df, operator, columnA, columnB):
    if (operator == "add"):
        return df[columnA].add(df[columnB])
    elif (operator == "sub"):
        return df[columnA].sub(df[columnB])
    elif (operator == "mul"):
        return df[columnA].mul(df[columnB])
    elif (operator == "div"):
        return df[columnA].div(df[columnB])
    elif (operator == "mod"):
        return df[columnA].mod(df[columnB])


def renameColumns(df, functionsData, applyOn):
    for functionData in functionsData:
        df = df.rename(
            columns={functionData['oldName']: functionData['newName']})
    return df


def runDataCleansing(sparkDf, spark, config):
    configObj = json.loads(config)
    sparkDf.persist(pyspark.StorageLevel.MEMORY_AND_DISK)
    df = from_pandas((sparkDf.toPandas()), npartitions=5)
    functionList = configObj['functionsApplied']
    Data_Cleansing_Methods = {"replaceBy": replaceValues,
                              "formula": calculateFormula,
                              "aggregate": aggregation,
                              "converttostringtype": changeToString,
                              "editname": renameColumns}
    for function in functionList:
        function['functionName']
        df = Data_Cleansing_Methods[function['functionName']](df, function['functionsData'],
                                                              function['applyOn'])
    sparkDf = spark.createDataFrame(df.compute())

    display(sparkDf.limit(2).toPandas())
    return sparkDf


***AUTOML FUNCTIONS***

In [None]:
from tpot import TPOTClassifier
from sklearn.model_selection import train_test_split
import pyspark


def functionClassification(sparkDF, listOfFeatures, label):
    sparkDF.persist(pyspark.StorageLevel.MEMORY_AND_DISK)
    df = (sparkDF.toPandas())
    X = (df.drop(label, axis=1))[listOfFeatures].values
    y = df[label].values
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=1, test_size=0.1)
    tpotModel = TPOTClassifier(verbosity=3, n_jobs=-1, generations=10, max_time_mins=5,
                               population_size=15)
    tpotModel.fit(X_train, y_train)
    display(" Accuracy of Model : %s" % tpotModel.score(X_test, y_test))
    data = {'model': tpotModel,
            'X_test': X_test,
            'y_test': y_test,
            'label': label,
            'columnNames': listOfFeatures}
    return data


***READING DATAFRAME***

In [None]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

#%run autofedataprepHooks.ipynb
try:
	#sourcePreExecutionHook()

	azuredbfs = DBFSConnector.fetch(spark, "{'url': '/Demo/PredictiveChurnTrain.csv', 'delimiter': ',', 'file_type': 'Delimeted', 'is_header': 'Use Header Line', 'server_url': '/numtraPlatform/NumtraPlatformV2/uploads/platform/'}")
	#sourcePostExecutionHook(azuredbfs)

except Exception as ex: 
	logging.error(ex)


***TRANSFORMING DATAFRAME***

In [None]:
#%run autofedataprepHooks.ipynb
try:
	#transformationPreExecutionHook()

	autofe = TransformationMain.run(azuredbfs,json.dumps( {"FE": [{"transformationsData": [{"feature_label": "State", "transformation_label": "String Indexer"}], "feature": "State", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "181", "mean": "", "stddev": "", "min": "AK", "max": "WY", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "State"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Account_Length", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "181", "mean": "104.73", "stddev": "40.02", "min": "3", "max": "205", "missing": "0"}, "updatedLabel": "Account_Length"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Area_Code", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "181", "mean": "438.61", "stddev": "43.1", "min": "408", "max": "510", "missing": "0"}, "updatedLabel": "Area_Code"}, {"transformationsData": [{"feature_label": "Phone", "transformation_label": "String Indexer"}], "feature": "Phone", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "181", "mean": "", "stddev": "", "min": "327-8732", "max": "422-8344", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Phone"}, {"transformationsData": [{"feature_label": "Intl_Plan", "transformation_label": "String Indexer"}], "feature": "Intl_Plan", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "181", "mean": "", "stddev": "", "min": "no", "max": "yes", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intl_Plan"}, {"transformationsData": [{"feature_label": "VMail_Plan", "transformation_label": "String Indexer"}], "feature": "VMail_Plan", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "181", "mean": "", "stddev": "", "min": "no", "max": "yes", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "VMail_Plan"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "VMail_Message", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "181", "mean": "7.17", "stddev": "12.92", "min": "0", "max": "46", "missing": "0"}, "updatedLabel": "VMail_Message"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Day_Mins", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "181", "mean": "174.4", "stddev": "54.72", "min": "19.5", "max": "335.5", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Day_Mins"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Day_Calls", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "181", "mean": "99.62", "stddev": "21.59", "min": "45", "max": "150", "missing": "0"}, "updatedLabel": "Day_Calls"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Day_Charge", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "181", "mean": "29.65", "stddev": "9.3", "min": "3.32", "max": "57.04", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Day_Charge"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Eve_Mins", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "181", "mean": "203.15", "stddev": "54.47", "min": "42.5", "max": "354.2", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Eve_Mins"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Eve_Calls", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "181", "mean": "98.64", "stddev": "20.41", "min": "48", "max": "164", "missing": "0"}, "updatedLabel": "Eve_Calls"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Eve_Charge", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "181", "mean": "17.27", "stddev": "4.63", "min": "3.61", "max": "30.11", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Eve_Charge"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Night_Mins", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "181", "mean": "193.69", "stddev": "46.61", "min": "57.5", "max": "317.8", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Night_Mins"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Night_Calls", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "181", "mean": "101.52", "stddev": "19.55", "min": "61", "max": "155", "missing": "0"}, "updatedLabel": "Night_Calls"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Night_Charge", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "181", "mean": "8.72", "stddev": "2.1", "min": "2.59", "max": "14.3", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Night_Charge"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Intl_Mins", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "181", "mean": "10.3", "stddev": "3.1", "min": "0.0", "max": "18.2", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Intl_Mins"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "total_Mins", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "181", "mean": "581.54", "stddev": "92.72", "min": "301.5", "max": "842.3", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "total_Mins"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Intl_Calls", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "181", "mean": "4.45", "stddev": "2.64", "min": "0", "max": "13", "missing": "0"}, "updatedLabel": "Intl_Calls"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Intl_Charge", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "181", "mean": "2.78", "stddev": "0.84", "min": "0.0", "max": "4.91", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Intl_Charge"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Total_Charge", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "181", "mean": "58.42", "stddev": "10.83", "min": "25.52", "max": "90.46", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Total_Charge"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "CustServ_Calls", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "181", "mean": "1.62", "stddev": "1.38", "min": "0", "max": "7", "missing": "0"}, "updatedLabel": "CustServ_Calls"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Churn", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "181", "mean": "0.1", "stddev": "0.3", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Churn"}, {"transformationsData": [{"feature_label": "cluster_labels", "transformation_label": "String Indexer"}], "feature": "cluster_labels", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "181", "mean": "", "stddev": "", "min": "day_callers", "max": "vmailers", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "cluster_labels"}, {"feature": "State_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "181", "mean": "16.96", "stddev": "13.19", "min": "0.0", "max": "49.0", "missing": "0"}, "updatedLabel": "State_stringindexer_trans..."}, {"feature": "Phone_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "181", "mean": "90.0", "stddev": "52.39", "min": "0.0", "max": "180.0", "missing": "0"}, "updatedLabel": "Phone_stringindexer_trans..."}, {"feature": "Intl_Plan_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "181", "mean": "0.09", "stddev": "0.28", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Intl_Plan_stringindexer_t..."}, {"feature": "VMail_Plan_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "181", "mean": "0.25", "stddev": "0.44", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "VMail_Plan_stringindexer_..."}, {"feature": "cluster_labels_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "181", "mean": "2.24", "stddev": "1.68", "min": "0.0", "max": "5.0", "missing": "0"}, "updatedLabel": "cluster_labels_stringinde..."}]}))

	#transformationPostExecutionHook(autofe)

except Exception as ex: 
	logging.error(ex)


***PERFORMING OPERATIONS***

In [None]:
#%run autofedataprepHooks.ipynb
try:
	#operationPreExecutionHook()

datapreparation = runDataCleansing(autofe,spark,json.dumps( {"url": "/Demo/PredictiveChurnTrain.csv", "source_attributes": {}, "DataPrepFile": "/Demo/PredictiveChurnTrain.csv", "data_source": "DBFS", "startListenerOnly": 1, "FilePath": "/FileStore/platform/extra/61388c01982088568bd49eb01631119264/0part.csv", "requestRatio": 7.0, "totalRows": 3332, "BasicStats": {"missingValues": 0.0, "numberOfColumns": 26, "numberOfRows": 3332, "duplicateRowCount": 0, "stats": [{"column": "Account_Length", "alias": "Account_Length", "generated": 0, "type": "numeric", "max": 243.0, "min": 1.0, "mean": 101.05672268907563, "missing": 0.0, "stddev": 39.83}, {"column": "Area_Code", "alias": "Area_Code", "generated": 0, "type": "numeric", "max": 510.0, "min": 408.0, "mean": 437.1890756302521, "missing": 0.0, "stddev": 42.38}, {"column": "VMail_Message", "alias": "VMail_Message", "generated": 0, "type": "numeric", "max": 51.0, "min": 0.0, "mean": 8.093937575030012, "missing": 0.0, "stddev": 13.69}, {"column": "Day_Mins", "alias": "Day_Mins", "generated": 0, "type": "real", "max": 350.0, "min": 0.0, "mean": 179.30132052821128, "missing": 0.0, "stddev": 54.45}, {"column": "Day_Calls", "alias": "Day_Calls", "generated": 0, "type": "numeric", "max": 165.0, "min": 0.0, "mean": 100.4327731092437, "missing": 0.0, "stddev": 20.07}, {"column": "Day_Charge", "alias": "Day_Charge", "generated": 0, "type": "real", "max": 59.0, "min": 0.0, "mean": 30.06392557022809, "missing": 0.0, "stddev": 9.26}, {"column": "Eve_Mins", "alias": "Eve_Mins", "generated": 0, "type": "real", "max": 363.0, "min": 0.0, "mean": 200.52671068427372, "missing": 0.0, "stddev": 50.72}, {"column": "Eve_Calls", "alias": "Eve_Calls", "generated": 0, "type": "numeric", "max": 170.0, "min": 0.0, "mean": 100.11464585834334, "missing": 0.0, "stddev": 19.93}, {"column": "Eve_Charge", "alias": "Eve_Charge", "generated": 0, "type": "real", "max": 30.0, "min": 0.0, "mean": 16.585534213685474, "missing": 0.0, "stddev": 4.31}, {"column": "Night_Mins", "alias": "Night_Mins", "generated": 0, "type": "real", "max": 395.0, "min": 23.0, "mean": 200.4093637454982, "missing": 0.0, "stddev": 50.57}, {"column": "Night_Calls", "alias": "Night_Calls", "generated": 0, "type": "numeric", "max": 175.0, "min": 33.0, "mean": 100.11044417767107, "missing": 0.0, "stddev": 19.57}, {"column": "Night_Charge", "alias": "Night_Charge", "generated": 0, "type": "real", "max": 17.0, "min": 1.0, "mean": 8.540816326530612, "missing": 0.0, "stddev": 2.29}, {"column": "Intl_Mins", "alias": "Intl_Mins", "generated": 0, "type": "real", "max": 20.0, "min": 0.0, "mean": 9.793217286914766, "missing": 0.0, "stddev": 2.81}, {"column": "total_Mins", "alias": "total_Mins", "generated": 0, "type": "real", "max": 885.0, "min": 284.0, "mean": 591.3835534213686, "missing": 0.0, "stddev": 89.94}, {"column": "Intl_Calls", "alias": "Intl_Calls", "generated": 0, "type": "numeric", "max": 20.0, "min": 0.0, "mean": 4.479891956782713, "missing": 0.0, "stddev": 2.46}, {"column": "Intl_Charge", "alias": "Intl_Charge", "generated": 0, "type": "real", "max": 5.0, "min": 0.0, "mean": 2.283313325330132, "missing": 0.0, "stddev": 0.8}, {"column": "Total_Charge", "alias": "Total_Charge", "generated": 0, "type": "real", "max": 96.0, "min": 22.0, "mean": 58.95738295318127, "missing": 0.0, "stddev": 10.5}, {"column": "CustServ_Calls", "alias": "CustServ_Calls", "generated": 0, "type": "numeric", "max": 9.0, "min": 0.0, "mean": 1.5630252100840336, "missing": 0.0, "stddev": 1.32}, {"column": "Churn", "alias": "Churn", "generated": 0, "type": "numeric", "max": 1.0, "min": 0.0, "mean": 0.14495798319327732, "missing": 0.0, "stddev": 0.35}, {"column": "State_stringindexer", "alias": "State_stringindexer", "generated": 0, "type": "real", "max": 50.0, "min": 0.0, "mean": 22.53811524609844, "missing": 0.0, "stddev": 14.55}, {"column": "Phone_stringindexer", "alias": "Phone_stringindexer", "generated": 0, "type": "real", "max": 3331.0, "min": 0.0, "mean": 1665.5, "missing": 0.0, "stddev": 962.01}, {"column": "Intl_Plan_stringindexer", "alias": "Intl_Plan_stringindexer", "generated": 0, "type": "numeric", "max": 1.0, "min": 0.0, "mean": 0.09693877551020408, "missing": 0.0, "stddev": 0.3}, {"column": "VMail_Plan_stringindexer", "alias": "VMail_Plan_stringindexer", "generated": 0, "type": "numeric", "max": 1.0, "min": 0.0, "mean": 0.27641056422569027, "missing": 0.0, "stddev": 0.45}, {"column": "cluster_labels_stringindexer", "alias": "cluster_labels_stringindexer", "generated": 0, "type": "real", "max": 5.0, "min": 0.0, "mean": 2.3895558223289317, "missing": 0.0, "stddev": 1.7}, {"column": "dayscallpluscharge", "alias": "dayscallpluscharge", "generated": 1, "type": "real", "max": 199.0, "min": 0.0, "mean": 130.4966986794718, "missing": 0.0, "stddev": 22.16}, {"column": "vmailMessage", "alias": "vmailMessage", "generated": 1, "type": "numeric", "max": 51.0, "min": 0.0, "mean": 8.093937575030012, "missing": 0.0, "stddev": 13.69}]}, "dateColumnNames": [], "predictionPowerScore": [{"Account_Length": 1.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.0, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 1.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.0, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 1.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.0, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 1.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.0, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 1.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.0011483426, "dayscallpluscharge": 0.5677730208, "total_Mins": 0.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0108314816, "CustServ_Calls": 0.0003750523, "Day_Calls": 0.0, "Day_Charge": 1.0, "Day_Mins": 0.9920345577, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0002544535, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.5277038517, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.3360908501, "dayscallpluscharge": 0.0680349988, "total_Mins": 0.1307344398, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0116347363, "CustServ_Calls": 0.0008407092, "Day_Calls": 0.0, "Day_Charge": 0.9656454485, "Day_Mins": 1.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0006935986, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.5280294416, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.3373363512, "dayscallpluscharge": 0.0679392605, "total_Mins": 0.1315444788, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 1.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.0008036626, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0005128193, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 1.0, "Eve_Mins": 0.99002592, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0740881743, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.0936618743, "dayscallpluscharge": 0.0, "total_Mins": 0.098941742, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0039757508, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.9255525764, "Eve_Mins": 1.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0757086655, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0003470132, "cluster_labels_stringindexer": 0.0960182057, "dayscallpluscharge": 0.0, "total_Mins": 0.0999173194, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 1.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.0, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 1.0, "Intl_Mins": 0.8677247984, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.05443524, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0087993776, "Intl_Charge": 0.6061754227, "Intl_Mins": 1.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.1725232336, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 1.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.0, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 1.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.0002641815, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 1.0, "Night_Mins": 0.9933744413, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0136261652, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.1979477637, "dayscallpluscharge": 0.0, "total_Mins": 0.0763039507, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 8.2419e-05, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.8645991257, "Night_Mins": 1.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0104412137, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.1989706388, "dayscallpluscharge": 0.0, "total_Mins": 0.0758470441, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0017758771, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 1.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 1.32285e-05, "cluster_labels_stringindexer": 0.0, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 1.0, "Total_Charge": 0.0, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.0, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0117779011, "CustServ_Calls": 0.0010134244, "Day_Calls": 0.0, "Day_Charge": 0.5235252531, "Day_Mins": 0.5042880102, "Eve_Calls": 0.0, "Eve_Charge": 0.0833075068, "Eve_Mins": 0.0307609806, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 1.6226e-05, "Night_Calls": 0.0, "Night_Charge": 0.0197636558, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 1.0, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.4359781436, "dayscallpluscharge": 0.0408710966, "total_Mins": 0.4969889034, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 1.0, "VMail_Plan_stringindexer": 0.7933795212, "cluster_labels_stringindexer": 0.2915726724, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 0.9996662835}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 0.9989142237, "VMail_Plan_stringindexer": 1.0, "cluster_labels_stringindexer": 0.3542245921, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 0.9989142237}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0671730265, "Day_Mins": 0.0314667936, "Eve_Calls": 0.0, "Eve_Charge": 0.0542253396, "Eve_Mins": 0.0217682844, "Intl_Calls": 0.0, "Intl_Charge": 0.0762536502, "Intl_Mins": 0.1045571874, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0264226135, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.1690845133, "VMail_Message": 0.0827473657, "VMail_Plan_stringindexer": 0.093832752, "cluster_labels_stringindexer": 1.0, "dayscallpluscharge": 0.0, "total_Mins": 0.1431359474, "vmailMessage": 0.0827473657}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0041147492, "CustServ_Calls": 0.0, "Day_Calls": 0.5709870876, "Day_Charge": 0.0771384958, "Day_Mins": 0.0271251867, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0002700734, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0571876613, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.0538523876, "dayscallpluscharge": 1.0, "total_Mins": 0.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0101125624, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.1971850902, "Day_Mins": 0.1627144154, "Eve_Calls": 0.0, "Eve_Charge": 0.1728642472, "Eve_Mins": 0.1278775641, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.1615524705, "Night_Mins": 0.1137143795, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.5369929693, "VMail_Message": 0.0, "VMail_Plan_stringindexer": 0.0, "cluster_labels_stringindexer": 0.4287401908, "dayscallpluscharge": 0.0066929104, "total_Mins": 1.0, "vmailMessage": 0.0}, {"Account_Length": 0.0, "Area_Code": 0.0, "Churn": 0.0, "CustServ_Calls": 0.0, "Day_Calls": 0.0, "Day_Charge": 0.0, "Day_Mins": 0.0, "Eve_Calls": 0.0, "Eve_Charge": 0.0, "Eve_Mins": 0.0, "Intl_Calls": 0.0, "Intl_Charge": 0.0, "Intl_Mins": 0.0, "Intl_Plan_stringindexer": 0.0, "Night_Calls": 0.0, "Night_Charge": 0.0, "Night_Mins": 0.0, "Phone_stringindexer": 0.0, "State_stringindexer": 0.0, "Total_Charge": 0.0, "VMail_Message": 0.9996662835, "VMail_Plan_stringindexer": 0.7933795212, "cluster_labels_stringindexer": 0.2915726724, "dayscallpluscharge": 0.0, "total_Mins": 0.0, "vmailMessage": 1.0}], "HasBasicStats": 1, "functionsApplied": [{"functionName": "formula", "applyOn": [{"columnName": "Day_Calls", "type": "numeric", "min": "0.0", "max": "165.0", "mean": "20.1"}, {"columnName": "Day_Charge", "type": "real", "min": "0.0", "max": "59.0", "mean": "9.3"}], "functionsData": [{"operand1": "Day_Calls", "operator": "add", "operand2": "Day_Charge", "outputColumn": "newColumn", "useNext": 0, "column": [{"Columndata": {"columnName": "Day_Calls", "type": "numeric", "min": "0.0", "max": "165.0", "mean": "20.1"}}, {"Columndata": {"columnName": "Day_Charge", "type": "real", "min": "0.0", "max": "59.0", "mean": "9.3"}}, {"Columndata": ""}], "newColumnName": "dayscallpluscharge"}]}, {"functionName": "editname", "applyOn": [{"columnName": "VMail_Message", "type": "numeric", "min": 0, "max": 51, "mean": 8.1}], "functionsData": [{"oldName": "VMail_Message", "newName": "vmailMessage", "asNewColumn": 1, "column": {"columnName": "VMail_Message", "type": "numeric", "min": 0, "max": 51, "mean": 8.1}}]}], "functionChanges": [{"columnName": "Day_Calls", "functionName": "Formula", "Type": "numeric", "Parameters": [{"operand1": "Day_Calls", "operator": "add", "operand2": "Day_Charge", "outputColumn": "newColumn", "useNext": 0, "column": [{"Columndata": {"columnName": "Day_Calls", "type": "numeric", "min": "0.0", "max": "165.0", "mean": "20.1"}}, {"Columndata": {"columnName": "Day_Charge", "type": "real", "min": "0.0", "max": "59.0", "mean": "9.3"}}, {"Columndata": ""}], "newColumnName": "dayscallpluscharge"}]}, {"columnName": "Day_Charge", "functionName": "Formula", "Type": "real", "Parameters": [{"operand1": "Day_Calls", "operator": "add", "operand2": "Day_Charge", "outputColumn": "newColumn", "useNext": 0, "column": [{"Columndata": {"columnName": "Day_Calls", "type": "numeric", "min": "0.0", "max": "165.0", "mean": "20.1"}}, {"Columndata": {"columnName": "Day_Charge", "type": "real", "min": "0.0", "max": "59.0", "mean": "9.3"}}, {"Columndata": ""}], "newColumnName": "dayscallpluscharge"}]}, {"columnName": "VMail_Message", "functionName": "Edit Column Name", "Type": "numeric", "Parameters": [{"oldName": "VMail_Message", "newName": "vmailMessage", "asNewColumn": 1, "column": {"columnName": "VMail_Message", "type": "numeric", "min": 0, "max": 51, "mean": 8.1}}]}], "fileheader": [{"field": "Account_Length", "alias": "Account_Length", "generated": 0, "position": 1, "type": "numeric"}, {"field": "Area_Code", "alias": "Area_Code", "generated": 0, "position": 2, "type": "numeric"}, {"field": "VMail_Message", "alias": "VMail_Message", "generated": 0, "position": 3, "type": "numeric"}, {"field": "Day_Mins", "alias": "Day_Mins", "generated": 0, "position": 4, "type": "real"}, {"field": "Day_Calls", "alias": "Day_Calls", "generated": 0, "position": 5, "type": "numeric"}, {"field": "Day_Charge", "alias": "Day_Charge", "generated": 0, "position": 6, "type": "real"}, {"field": "Eve_Mins", "alias": "Eve_Mins", "generated": 0, "position": 7, "type": "real"}, {"field": "Eve_Calls", "alias": "Eve_Calls", "generated": 0, "position": 8, "type": "numeric"}, {"field": "Eve_Charge", "alias": "Eve_Charge", "generated": 0, "position": 9, "type": "real"}, {"field": "Night_Mins", "alias": "Night_Mins", "generated": 0, "position": 10, "type": "real"}, {"field": "Night_Calls", "alias": "Night_Calls", "generated": 0, "position": 11, "type": "numeric"}, {"field": "Night_Charge", "alias": "Night_Charge", "generated": 0, "position": 12, "type": "real"}, {"field": "Intl_Mins", "alias": "Intl_Mins", "generated": 0, "position": 13, "type": "real"}, {"field": "total_Mins", "alias": "total_Mins", "generated": 0, "position": 14, "type": "real"}, {"field": "Intl_Calls", "alias": "Intl_Calls", "generated": 0, "position": 15, "type": "numeric"}, {"field": "Intl_Charge", "alias": "Intl_Charge", "generated": 0, "position": 16, "type": "real"}, {"field": "Total_Charge", "alias": "Total_Charge", "generated": 0, "position": 17, "type": "real"}, {"field": "CustServ_Calls", "alias": "CustServ_Calls", "generated": 0, "position": 18, "type": "numeric"}, {"field": "Churn", "alias": "Churn", "generated": 0, "position": 19, "type": "numeric"}, {"field": "State_stringindexer", "alias": "State_stringindexer", "generated": 0, "position": 20, "type": "real"}, {"field": "Phone_stringindexer", "alias": "Phone_stringindexer", "generated": 0, "position": 21, "type": "real"}, {"field": "Intl_Plan_stringindexer", "alias": "Intl_Plan_stringindexer", "generated": 0, "position": 22, "type": "numeric"}, {"field": "VMail_Plan_stringindexer", "alias": "VMail_Plan_stringindexer", "generated": 0, "position": 23, "type": "numeric"}, {"field": "cluster_labels_stringindexer", "alias": "cluster_labels_stringindexer", "generated": 0, "position": 24, "type": "real"}, {"field": "dayscallpluscharge", "alias": "dayscallpluscharge", "generated": 1, "position": 25, "type": "real"}, {"field": "vmailMessage", "alias": "vmailMessage", "generated": 1, "position": 26, "type": "numeric"}]}))
	#operationPostExecutionHook(datapreparation)

except Exception as ex: 
	logging.error(ex)


***TRAIN MODEL***

In [None]:
#%run autofedataprepHooks.ipynb
try:
	#mlPreExecutionHook()

	dataAutoML=functionClassification(datapreparation, ["State_stringindexer", "Account_Length", "Area_Code", "Phone_stringindexer", "Intl_Plan_stringindexer", "VMail_Plan_stringindexer", "Day_Mins", "Day_Calls", "Day_Charge", "Eve_Mins", "Eve_Calls", "Eve_Charge", "Night_Mins", "Night_Calls", "Night_Charge", "Intl_Mins", "total_Mins", "Intl_Calls", "Intl_Charge", "Total_Charge", "CustServ_Calls", "cluster_labels_stringindexer", "dayscallpluscharge", "vmailMessage"], "Churn")

	#mlPostExecutionHook(dataAutoML)

except Exception as ex: 
	logging.error(ex)


***PREDICT ON TRAINED MODEL***

In [None]:
import pandas as pd
import numpy as np
import sklearn.metrics

try:
    model=dataAutoML['model']
    X_test=dataAutoML['X_test']
    y_test=dataAutoML['y_test']
    label=dataAutoML['label']
    columnNames=dataAutoML['columnNames']
    if label in columnNames:
        columnNames.remove(label)
    predicted=label+"_predicted"
    y_predicted=model.predict(X_test)
    df =pd.DataFrame(X_test , columns=columnNames)
    df[label]=y_test
    df[predicted]=y_predicted
    columnNames.insert(0,predicted)
    columnNames.insert(0,label)
    Accuracy = np.round((100 * sklearn.metrics.accuracy_score(y_true=y_test, y_pred=y_predicted)), 1)
    F1= np.round(
            (100 * sklearn.metrics.f1_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    Precision= np.round((
                100 * sklearn.metrics.precision_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    Recall = np.round((
                100 * sklearn.metrics.recall_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    display(" Accuracy of Prediction on test data    : %s"%Accuracy)
    display(" F1 score of Prediction on test data    : %s"%F1)
    display(" Precision of Prediction on test data   : %s"%Precision)
    display(" Recall of Prediction on test data      : %s"%Recall)
    display(df.head())
except Exception as ex:
    logging.error(ex)

