***GENERATED CODE FOR profitablecustomeranalysis PIPELINE***
***DON'T EDIT THIS CODE***

**CONNECTOR FUNCTIONS TO READ DATA FROM DATABRICKS FILESYSTEM**

In [None]:
import datetime
import logging
import warnings
warnings.filterwarnings('ignore')
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)


class DBFSConnector:

    def fetch(spark, config):
        df = spark.read.\
            options(header='true' if eval(config)["is_header"] == "Use Header Line" else 'false',
                    inferschema='true',
                    delimiter=eval(config)["delimiter"])\
            .csv(eval(config)['url'])
        display(df.limit(2).toPandas())
        return df

    def put(df, spark, config):
        return df.write.format('csv').options(header='true' if eval(config)["is_header"] == "Use Header Line" else 'false',
                                              delimiter=eval(config)["delimiter"]).save(("%s %s") % (datetime.datetime.now().strftime("%Y-%m-%d %H.%M.%S")+"_", eval(config)['url']))


**TRANSFORMATIONS FUNCTIONS THAT WILL BE APPLIED ON DATA**

In [None]:
from pyspark.ml.feature import StandardScaler
from pyspark.ml.feature import OneHotEncoderEstimator
import json
from pyspark.sql.functions import col, round
from pyspark.ml.feature import VectorAssembler
from pyspark.sql.functions import round, col
from pyspark.sql.functions import col, udf
from pyspark.sql.types import IntegerType
from pyspark.ml.feature import StringIndexer
from pyspark.sql.types import *
from pyspark.sql.functions import col, when
from pyspark.sql.types import DoubleType, IntegerType
from pyspark.sql.functions import mean, stddev, min, max, col


class CleanseData:
    # def __init__(self,df):
    #     #print()

    def replaceByMean(self, feature, df, mean_=-1):

        meanValue = df.select(mean(col(feature.name)).alias(
            'mean')).collect()[0]["mean"]
        df.fillna(meanValue, subset=[feature.name])
        df.withColumn(feature.name, when(col(feature.name) == " ",
                                         meanValue).otherwise(col(feature.name).cast("Integer")))
        return df

    def replaceByMax(self, feature, df, max_=-1):
        maxValue = df.select(max(col(feature.name)).alias('max')).collect()[
            0]["max"]
        df.fillna(maxValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", maxValue).otherwise(col(feature.name)))
        return df

    def replaceByMin(self, feature, df, min_=-1):
        minValue = df.select(min(col(feature.name)).alias('min')).collect()[
            0]["min"]
        df.fillna(minValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", minValue).otherwise(col(feature.name)))
        return df

    def replaceByStandardDeviation(self, feature, df, stddev_=-1):
        stddevValue = df.select(stddev(col(feature.name)).alias(
            'stddev')).collect()[0]["stddev"]
        df.fillna(stddevValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", stddevValue).otherwise(col(feature.name)))
        return df

    def replaceDateRandomly(self, feature, df):
        fillValue = df.where(col(feature.name).isNotNull()
                             ).head(1)[0][feature.name]
        df.fillna(str(fillValue), subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", fillValue).otherwise(col(feature.name)))
        # print("CleanseData:replaceDateRandomly Schema : ", df.#printSchema())
        return df

    def replaceNullValues(self, fList, df):
        featuresList = df.schema.fields
        for featureObj in fList:
            for feat in featuresList:
                if featureObj["feature"] in feat.name:
                    featureName = feat
                    if "mean" in featureObj["replaceby"]:
                        df = self.replaceByMean(featureName, df)
                    elif "max" in featureObj["replaceby"]:
                        df = self.replaceByMax(featureName, df)
                    elif "min" in featureObj["replaceby"]:
                        df = self.replaceByMin(featureName, df)
                    elif "stddev" in featureObj["replaceby"]:
                        df = self.replaceByStandardDeviation(featureName, df)
                    elif "random" in featureObj["replaceby"]:
                        df = self.replaceDateRandomly(featureName, df)
        return df


def to_array(col):
    def to_array_(v):
        return v.toArray().tolist()
    return udf(to_array_, ArrayType(DoubleType()))(col)


def OHETransform(df, params, transformationData={}):
    feature = params["feature"]
    dfReturn = df
    dfReturn = dfReturn.fillna({feature: ''})
    outcolS = feature + "_toint"
    indexer = StringIndexer(
        inputCol=feature, outputCol=outcolS, handleInvalid="skip")
    indexed = indexer.fit(dfReturn).transform(dfReturn)
    dfReturn = indexed
    distinct_values_list = dfReturn.select(
        outcolS).distinct().rdd.map(lambda r: r[0]).collect()
    len_distinct_values_list = len(distinct_values_list)
    if len_distinct_values_list <= 4:
        changed_type_df = dfReturn.withColumn(
            outcolS, dfReturn[outcolS].cast(IntegerType()))
        dfReturn = changed_type_df

    outcol = feature + "_onehotencoder"
    encoder = OneHotEncoderEstimator(inputCols=[outcolS], outputCols=[
                                     outcol], handleInvalid="keep")
    model = encoder.fit(dfReturn)
    dfReturn = model.transform(dfReturn)
    dfReturn = dfReturn.drop(outcolS)
    dfReturn = dfReturn.withColumn("final_col", to_array(dfReturn[outcol])) \
        .select(dfReturn.schema.names + [col("final_col")[0]])
    dfReturn = dfReturn.drop(outcol).withColumnRenamed("final_col[0]", outcol)
    dfReturn = dfReturn.withColumn(feature, round(dfReturn[outcol], 2))

    return dfReturn


def StringIndexerTransform(df, params, transformationData={}):
    dfReturn = df
    feature = params["feature"]

    dfReturn = dfReturn.fillna({feature: ''})
    outcol = feature + "_stringindexer"
    indexer = StringIndexer(
        inputCol=feature, outputCol=outcol, handleInvalid="skip")
    indexed = indexer.fit(dfReturn).transform(dfReturn)
    dfReturn = indexed
    distinct_values_list = dfReturn.select(
        outcol).distinct().rdd.map(lambda r: r[0]).collect()
    len_distinct_values_list = len(distinct_values_list)
    if len_distinct_values_list <= 4:
        changed_type_df = dfReturn.withColumn(
            outcol, dfReturn[outcol].cast(IntegerType()))
        return changed_type_df
    return dfReturn


def vectorAssemblerTransform(df, param):

    dfReturn = df

    if (type(param) == str):
        outcol = param + "_vector"
        assembler = VectorAssembler(inputCols=[param], outputCol=outcol)
        dfReturn = assembler.transform(dfReturn)
        return dfReturn

    if (type(param) == list):
        vecAssembler = VectorAssembler(inputCols=param, outputCol="features")
        new_df = vecAssembler.transform(df)
        return new_df


def standardScalarTransform(df, params, transformationData={}):
    dfReturn = df
    transform_params = params
    feature = transform_params['feature']
    dfReturn = dfReturn.fillna({feature: '0.0'})
    scalarFlags = transformationData["std_scalar"]
    if scalarFlags["mean_flag"]:
        stdflag = False
        meanflag = True
    elif scalarFlags["std_flag"]:
        stdflag = True
        meanflag = False

    outcol = feature + "_standardscalar"

    featureVector = feature + "_vector"
    dfReturn = vectorAssemblerTransform(dfReturn, feature)

    standardscale = StandardScaler(inputCol=featureVector, outputCol=outcol, withStd=stdflag,
                                   withMean=meanflag)
    scaledata = standardscale.fit(dfReturn).transform(dfReturn)
    dfReturn = scaledata.withColumn("final_col", to_array(scaledata[outcol]))\
        .select(scaledata.schema.names + [col("final_col")[0]])

    dfReturn = dfReturn.drop(outcol).drop(featureVector)\
        .withColumnRenamed("final_col[0]", outcol)
    dfReturn = dfReturn.withColumn(feature, round(dfReturn[outcol], 2))

    return dfReturn


class TransformationMain:
    # TODO: change df argument in run with following
    def run(transformationDF, config):
        configObj = json.loads(config)
        featureData = configObj["FE"]
        transformationDF = CleanseData().replaceNullValues(featureData, transformationDF)
        transformationDF = OHETransform(transformationDF, {'transformationsData': [{'feature_label': 'Customer', 'transformation_label': 'One Hot Encoding'}], 'feature': 'Customer', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
                                        'count': '3688', 'mean': '', 'stddev': '', 'min': 'AA10041', 'max': 'ZZ91716', 'missing': '0'}, 'transformation': [{'transformation': 'One Hot Encoding', 'selectedAsDefault': 1}]}, {'feature_label': 'Customer', 'transformation_label': 'One Hot Encoding'})
        transformationDF = transformationDF.drop('Customer')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'State', 'transformation_label': 'String Indexer'}], 'feature': 'State', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': 'Arizona', 'max': 'Washington', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'State', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('State')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Coverage', 'transformation_label': 'String Indexer'}], 'feature': 'Coverage', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': 'Basic', 'max': 'Premium', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Coverage', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Coverage')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Education', 'transformation_label': 'String Indexer'}], 'feature': 'Education', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': 'Bachelor', 'max': 'Master', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Education', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Education')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Effective To Date', 'transformation_label': 'String Indexer'}], 'feature': 'Effective To Date', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': '01/01/2011', 'max': '2/28/11', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Effective To Date', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Effective To Date')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'EmploymentStatus', 'transformation_label': 'String Indexer'}], 'feature': 'EmploymentStatus', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': 'Disabled', 'max': 'Unemployed', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'EmploymentStatus', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('EmploymentStatus')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Gender', 'transformation_label': 'String Indexer'}], 'feature': 'Gender', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': 'F', 'max': 'M', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Gender', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Gender')
        transformationDF = standardScalarTransform(transformationDF, {'transformationsData': [{'feature_label': 'Income', 'std_scalar': {'mean_flag': 'True', 'std_flag': 'False'}, 'transformation_label': 'Standard Scalar'}], 'feature': 'Income', 'transformation': [
            {'transformation': 'Standard Scalar', 'selectedAsDefault': 1}], 'type': 'numeric', 'replaceby': 'mean', 'selected': 'True', 'stats': {'count': '3688', 'mean': '37655.75', 'stddev': '30335.1', 'min': '0', 'max': '99981', 'missing': '0'}}, {'feature_label': 'Income', 'std_scalar': {'mean_flag': 'True', 'std_flag': 'False'}, 'transformation_label': 'Standard Scalar'})
        transformationDF = transformationDF.drop('Income')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Location Code', 'transformation_label': 'String Indexer'}], 'feature': 'Location Code', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': 'Rural', 'max': 'Urban', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Location Code', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Location Code')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Marital Status', 'transformation_label': 'String Indexer'}], 'feature': 'Marital Status', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': 'Divorced', 'max': 'Single', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Marital Status', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Marital Status')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Policy Type', 'transformation_label': 'String Indexer'}], 'feature': 'Policy Type', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': 'Corporate Auto', 'max': 'Special Auto', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Policy Type', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Policy Type')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Policy', 'transformation_label': 'String Indexer'}], 'feature': 'Policy', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': 'Corporate L1', 'max': 'Special L3', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Policy', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Policy')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Renew Offer Type', 'transformation_label': 'String Indexer'}], 'feature': 'Renew Offer Type', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': 'Offer1', 'max': 'Offer4', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Renew Offer Type', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Renew Offer Type')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Sales Channel', 'transformation_label': 'String Indexer'}], 'feature': 'Sales Channel', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': 'Agent', 'max': 'Web', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Sales Channel', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Sales Channel')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Vehicle Class', 'transformation_label': 'String Indexer'}], 'feature': 'Vehicle Class', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': 'Four-Door Car', 'max': 'Two-Door Car', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Vehicle Class', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Vehicle Class')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Vehicle Size', 'transformation_label': 'String Indexer'}], 'feature': 'Vehicle Size', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': 'Large', 'max': 'Small', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Vehicle Size', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Vehicle Size')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Response', 'transformation_label': 'String Indexer'}], 'feature': 'Response', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3688', 'mean': '', 'stddev': '', 'min': 'No', 'max': 'Yes', 'missing': '0'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}]}, {'feature_label': 'Response', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Response')
        display(transformationDF.limit(2).toPandas())
        return transformationDF


**AUTOML FUNCTIONS**

In [None]:
from tpot import TPOTClassifier
from sklearn.model_selection import train_test_split
import pyspark


def functionClassification(sparkDF, listOfFeatures, label):
    sparkDF.persist(pyspark.StorageLevel.MEMORY_AND_DISK)
    df = sparkDF.toPandas()
    X = df.drop(label, axis=1).values
    y = df[label].values
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=1, test_size=0.1)
    tpotModel = TPOTClassifier(verbosity=3, n_jobs=-1, generations=10, max_time_mins=5,
                               population_size=15)
    tpotModel.fit(X_train, y_train)
    display(" Accuracy of Model : %s" % tpotModel.score(X_test, y_test))
    data = {'model': tpotModel,
            'X_test': X_test,
            'y_test': y_test,
            'label': label,
            'columnNames': sparkDF.columns}
    return data


**READING DATAFRAME**

In [None]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

%run profitablecustomeranalysisHooks.ipynb
try:
	sourcePreExecutionHook()

	profitablecustomeranalysisdbfs = DBFSConnector.fetch(spark, "{'url': '/Demo/ProfitableCustomerAnalysisTrain.csv', 'file_type': 'Delimeted', 'delimiter': ',', 'is_header': 'Use Header Line'}")

	sourcePostExecutionHook(profitablecustomeranalysisdbfs)

except Exception as ex: 
	logging.error(ex)


**TRANSFORMING DATAFRAME**

In [None]:
%run profitablecustomeranalysisHooks.ipynb
try:
	transformationPreExecutionHook()

	profitablecustomeranalysisautofe = TransformationMain.run(profitablecustomeranalysisdbfs,json.dumps( {"FE": [{"transformationsData": [{"feature_label": "Customer", "transformation_label": "One Hot Encoding"}], "feature": "Customer", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "AA10041", "max": "ZZ91716", "missing": "0"}, "transformation": [{"transformation": "One Hot Encoding", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "State", "transformation_label": "String Indexer"}], "feature": "State", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "Arizona", "max": "Washington", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Customer Lifetime Value", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "3688", "mean": "7977.45", "stddev": "6800.61", "min": "1898.683686", "max": "74228.51604", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Coverage", "transformation_label": "String Indexer"}], "feature": "Coverage", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "Basic", "max": "Premium", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Education", "transformation_label": "String Indexer"}], "feature": "Education", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "Bachelor", "max": "Master", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Effective To Date", "transformation_label": "String Indexer"}], "feature": "Effective To Date", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "01/01/2011", "max": "2/28/11", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "EmploymentStatus", "transformation_label": "String Indexer"}], "feature": "EmploymentStatus", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "Disabled", "max": "Unemployed", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Gender", "transformation_label": "String Indexer"}], "feature": "Gender", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "F", "max": "M", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Income", "std_scalar": {"mean_flag": "True", "std_flag": "False"}, "transformation_label": "Standard Scalar"}], "feature": "Income", "transformation": [{"transformation": "Standard Scalar", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "3688", "mean": "37655.75", "stddev": "30335.1", "min": "0", "max": "99981", "missing": "0"}}, {"transformationsData": [{"feature_label": "Location Code", "transformation_label": "String Indexer"}], "feature": "Location Code", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "Rural", "max": "Urban", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Marital Status", "transformation_label": "String Indexer"}], "feature": "Marital Status", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "Divorced", "max": "Single", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Monthly Premium Auto", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "3688", "mean": "93.24", "stddev": "33.94", "min": "61", "max": "297", "missing": "0"}}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Months Since Last Claim", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "3688", "mean": "15.18", "stddev": "10.05", "min": "0", "max": "35", "missing": "0"}}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Months Since Policy Inception", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "3688", "mean": "48.71", "stddev": "27.87", "min": "0", "max": "99", "missing": "0"}}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Number of Open Complaints", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "3688", "mean": "0.4", "stddev": "0.94", "min": "0", "max": "5", "missing": "0"}}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Number of Policies", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "3688", "mean": "2.91", "stddev": "2.36", "min": "1", "max": "9", "missing": "0"}}, {"transformationsData": [{"feature_label": "Policy Type", "transformation_label": "String Indexer"}], "feature": "Policy Type", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "Corporate Auto", "max": "Special Auto", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Policy", "transformation_label": "String Indexer"}], "feature": "Policy", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "Corporate L1", "max": "Special L3", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Renew Offer Type", "transformation_label": "String Indexer"}], "feature": "Renew Offer Type", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "Offer1", "max": "Offer4", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Sales Channel", "transformation_label": "String Indexer"}], "feature": "Sales Channel", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "Agent", "max": "Web", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Total Claim Amount", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "3688", "mean": "433.0", "stddev": "287.66", "min": "0.382107", "max": "2759.794354", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Vehicle Class", "transformation_label": "String Indexer"}], "feature": "Vehicle Class", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "Four-Door Car", "max": "Two-Door Car", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Vehicle Size", "transformation_label": "String Indexer"}], "feature": "Vehicle Size", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "Large", "max": "Small", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"transformationsData": [{"feature_label": "Response", "transformation_label": "String Indexer"}], "feature": "Response", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3688", "mean": "", "stddev": "", "min": "No", "max": "Yes", "missing": "0"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}]}, {"feature": "State_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "3688", "mean": "1.27", "stddev": "1.26", "min": "0.0", "max": "4.0", "missing": "0"}}, {"feature": "Coverage_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "3688", "mean": "0.48", "stddev": "0.65", "min": "0", "max": "2", "missing": "0"}}, {"feature": "Education_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "3688", "mean": "1.29", "stddev": "1.11", "min": "0.0", "max": "4.0", "missing": "0"}}, {"feature": "Effective To Date_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "3688", "mean": "26.84", "stddev": "17.14", "min": "0.0", "max": "58.0", "missing": "0"}}, {"feature": "EmploymentStatus_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "3688", "mean": "0.62", "stddev": "1.01", "min": "0.0", "max": "4.0", "missing": "0"}}, {"feature": "Gender_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "3688", "mean": "0.5", "stddev": "0.5", "min": "0", "max": "1", "missing": "0"}}, {"feature": "Location Code_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "3688", "mean": "0.54", "stddev": "0.77", "min": "0", "max": "2", "missing": "0"}}, {"feature": "Marital Status_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "3688", "mean": "0.57", "stddev": "0.74", "min": "0", "max": "2", "missing": "0"}}, {"feature": "Policy Type_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "3688", "mean": "0.28", "stddev": "0.53", "min": "0", "max": "2", "missing": "0"}}, {"feature": "Policy_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "3688", "mean": "1.53", "stddev": "1.77", "min": "0.0", "max": "8.0", "missing": "0"}}, {"feature": "Renew Offer Type_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "3688", "mean": "0.97", "stddev": "1.01", "min": "0", "max": "3", "missing": "0"}}, {"feature": "Sales Channel_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "3688", "mean": "1.1", "stddev": "1.06", "min": "0", "max": "3", "missing": "0"}}, {"feature": "Vehicle Class_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "selected": "True", "stats": {"count": "3688", "mean": "0.93", "stddev": "1.16", "min": "0.0", "max": "5.0", "missing": "0"}}, {"feature": "Vehicle Size_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "3688", "mean": "0.39", "stddev": "0.66", "min": "0", "max": "2", "missing": "0"}}, {"feature": "Response_stringindexer_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "transformationsData": [{"transformation_label": "novalue"}], "type": "numeric", "selected": "True", "stats": {"count": "3688", "mean": "0.15", "stddev": "0.35", "min": "0", "max": "1", "missing": "0"}}, {"feature": "Customer_onehotencoder_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "selected": "True", "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "stats": {"count": "3688", "mean": "0.0", "stddev": "0.02", "min": "0.0", "max": "1.0", "missing": "0"}}, {"feature": "Income_standardscalar_transform", "transformation": [{"transformation": "novalue", "selectedAsDefault": 0}], "selected": "True", "transformationsData": [{"transformation_label": "novalue"}], "type": "real", "stats": {"count": "3688", "mean": "-0.0", "stddev": "30335.1", "min": "-37655.74783080262", "max": "62325.25216919738", "missing": "0"}}]}))

	transformationPostExecutionHook(profitablecustomeranalysisautofe)

except Exception as ex: 
	logging.error(ex)


**TRAIN MODEL**

In [None]:
%run profitablecustomeranalysisHooks.ipynb
try:
	mlPreExecutionHook()

	dataAutoML=functionClassification(profitablecustomeranalysisautofe, ["Customer Lifetime Value", "Monthly Premium Auto", "Months Since Last Claim", "Months Since Policy Inception", "Number of Open Complaints", "Number of Policies", "Total Claim Amount", "State_stringindexer", "Coverage_stringindexer", "Education_stringindexer", "Effective To Date_stringindexer", "EmploymentStatus_stringindexer", "Gender_stringindexer", "Location Code_stringindexer", "Marital Status_stringindexer", "Policy Type_stringindexer", "Policy_stringindexer", "Renew Offer Type_stringindexer", "Sales Channel_stringindexer", "Vehicle Class_stringindexer", "Vehicle Size_stringindexer", "Customer_onehotencoder", "Income_standardscalar"], "Response_stringindexer")

	mlPostExecutionHook(dataAutoML)

except Exception as ex: 
	logging.error(ex)


**PREDICT ON TRAINED MODEL**

In [None]:
import pandas as pd
import numpy as np
import sklearn.metrics

try:
    model=dataAutoML['model']
    X_test=dataAutoML['X_test']
    y_test=dataAutoML['y_test']
    label=dataAutoML['label']
    columnNames=dataAutoML['columnNames']
    if label in columnNames:
        columnNames.remove(label)
    predicted=label+"_predicted"
    y_predicted=model.predict(X_test)
    df =pd.DataFrame(X_test , columns=columnNames)
    df[label]=y_test
    df[predicted]=y_predicted
    columnNames.insert(0,predicted)
    columnNames.insert(0,label)
    Accuracy = np.round((100 * sklearn.metrics.accuracy_score(y_true=y_test, y_pred=y_predicted)), 1)
    F1= np.round(
            (100 * sklearn.metrics.f1_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    Precision= np.round((
                100 * sklearn.metrics.precision_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    Recall = np.round((
                100 * sklearn.metrics.recall_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    display(" Accuracy of Prediction on test data    : %s"%Accuracy)
    display(" F1 score of Prediction on test data    : %s"%F1)
    display(" Precision of Prediction on test data   : %s"%Precision)
    display(" Recall of Prediction on test data      : %s"%Recall)
    display(df.head())
except Exception as ex:
    logging.error(ex)

