***GENERATED CODE FOR churn PIPELINE.***

***DON'T EDIT THIS CODE.***

***CONNECTOR FUNCTIONS TO READ DATA.***

In [None]:
import os
import datetime
import logging
import warnings
warnings.filterwarnings('ignore')
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)


class HDFSConnector:

    def fetch(spark, config):
        ################### INPUT HADOOP HOST PORT TO CONNECT WITH ###############################
        hdfs_server = str(os.environ['HDFS_SERVER'])
        hdfs_port = int(os.environ['HDFS_PORT'])
        df = spark.read.options(header='true', inferschema='true').csv(
            f"hdfs://{hdfs_server}:{hdfs_port}{eval(config)['url']}", header='true')
        display(df.limit(2).toPandas())
        return df

    def put(df, spark, config):
        return df.write.format('csv').options(header='true' if eval(config)["is_header"] == "Use Header Line" else 'false',
                                              delimiter=eval(config)["delimiter"]).save(("%s %s") % (datetime.datetime.now().strftime("%Y-%m-%d %H.%M.%S")+"_", eval(config)['url']))


***TRANSFORMATIONS FUNCTIONS THAT WILL BE APPLIED ON DATA***

In [None]:
import json
from pyspark.sql.types import IntegerType
from pyspark.ml.feature import StringIndexer
from pyspark.sql.functions import col, when
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import mean, stddev, min, max, col


class CleanseData:
    # def __init__(self,df):
    #     #print()

    def cleanValueForFE(self, value):
        if value == None:
            return ""
        elif str(value) == 'nan':
            return "nan"
        else:
            return value

    def replaceByMean(self, feature, df, mean_=-1):
        df1 = df
        df1 = df1.dropna()
        meanValue = self.cleanValueForFE(df1.select(
            mean(col(feature.name)).alias('mean')).collect()[0]["mean"])
        df = df.fillna(meanValue, subset=[feature.name])
        df.withColumn(feature.name, when(col(feature.name) == " ",
                      meanValue).otherwise(col(feature.name).cast("Integer")))
        return df

    def replaceByMax(self, feature, df, max_=-1):
        df1 = df
        df1 = df1.dropna()
        maxValue = self.cleanValueForFE(df1.select(
            max(col(feature.name)).alias('max')).collect()[0]["max"])
        df = df.fillna(maxValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", maxValue).otherwise(col(feature.name)))
        return df

    def replaceByMin(self, feature, df, min_=-1):
        df1 = df
        df1 = df1.dropna()
        minValue = self.cleanValueForFE(df1.select(
            min(col(feature.name)).alias('min')).collect()[0]["min"])
        df = df.fillna(minValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", minValue).otherwise(col(feature.name)))
        return df

    def replaceByStandardDeviation(self, feature, df, stddev_=-1):
        df1 = df
        df1 = df1.dropna()
        stddevValue = self.cleanValueForFE(df1.select(
            stddev(col(feature.name)).alias('stddev')).collect()[0]["stddev"])
        df = df.fillna(stddevValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", stddevValue).otherwise(col(feature.name)))
        return df

    def replaceDateRandomly(self, feature, df):
        df1 = df
        df1 = df1.dropna()
        fillValue = self.cleanValueForFE(
            df.where(col(feature.name).isNotNull()).head(1)[0][feature.name])
        df = df.fillna(str(fillValue), subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", fillValue).otherwise(col(feature.name)))
        # print("CleanseData:replaceDateRandomly Schema : ", df.#printSchema())
        return df

    def replaceNullValues(self, fList, df):
        featuresList = df.schema.fields
        for featureObj in fList:
            for feat in featuresList:
                if featureObj["feature"] in feat.name:
                    featureName = feat
                    if "mean" in featureObj["replaceby"]:
                        df = self.replaceByMean(featureName, df)
                    elif "max" in featureObj["replaceby"]:
                        df = self.replaceByMax(featureName, df)
                    elif "min" in featureObj["replaceby"]:
                        df = self.replaceByMin(featureName, df)
                    elif "stddev" in featureObj["replaceby"]:
                        df = self.replaceByStandardDeviation(featureName, df)
                    elif "random" in featureObj["replaceby"]:
                        df = self.replaceDateRandomly(featureName, df)
        return df


def StringIndexerTransform(df, params, transformationData={}):
    dfReturn = df
    feature = params["feature"]

    dfReturn = dfReturn.fillna({feature: ''})
    outcol = feature + "_stringindexer"
    indexer = StringIndexer(
        inputCol=feature, outputCol=outcol, handleInvalid="skip")
    indexed = indexer.fit(dfReturn).transform(dfReturn)
    dfReturn = indexed
    distinct_values_list = dfReturn.select(
        outcol).distinct().rdd.map(lambda r: r[0]).collect()
    len_distinct_values_list = len(distinct_values_list)
    if len_distinct_values_list <= 4:
        changed_type_df = dfReturn.withColumn(
            outcol, dfReturn[outcol].cast(IntegerType()))
        return changed_type_df
    return dfReturn


class TransformationMain:
    # TODO: change df argument in run with following
    def run(transformationDF, config):
        configObj = json.loads(config)
        featureData = configObj["FE"]
        transformationDF = CleanseData().replaceNullValues(featureData, transformationDF)
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern', 'transformation_label': 'String Indexer'}], 'feature': 'Intern', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
                                                  'count': '500', 'mean': '', 'stddev': '', 'min': 'AHMED REDA', 'max': 'vaishali oruganti', 'missing': '0', 'distinct': '66'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern'}, {'feature_label': 'Intern', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'DaysInInternship', 'transformation_label': 'String Indexer'}], 'feature': 'DaysInInternship', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '473.08', 'stddev': '273.84', 'min': ' ', 'max': '94', 'missing': '0', 'distinct': '41'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'DaysInInternship'}, {'feature_label': 'DaysInInternship', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('DaysInInternship')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Attendance', 'transformation_label': 'String Indexer'}], 'feature': 'Attendance', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '2.51', 'stddev': '1.43', 'min': ' ', 'max': '6', 'missing': '0', 'distinct': '7'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Attendance'}, {'feature_label': 'Attendance', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Attendance')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'BaseCampCommentScore', 'transformation_label': 'String Indexer'}], 'feature': 'BaseCampCommentScore', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '2.51', 'stddev': '1.43', 'min': ' ', 'max': '6', 'missing': '0', 'distinct': '7'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'BaseCampCommentScore'}, {'feature_label': 'BaseCampCommentScore', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('BaseCampCommentScore')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'BaseCampTotalComments', 'transformation_label': 'String Indexer'}], 'feature': 'BaseCampTotalComments', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '4.87', 'stddev': '4.54', 'min': ' ', 'max': '9', 'missing': '0', 'distinct': '18'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'BaseCampTotalComments'}, {'feature_label': 'BaseCampTotalComments', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('BaseCampTotalComments')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'GoalResponseLen', 'transformation_label': 'String Indexer'}], 'feature': 'GoalResponseLen', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': ' ', 'missing': '0', 'distinct': '1'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'GoalResponseLen'}, {'feature_label': 'GoalResponseLen', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('GoalResponseLen')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'IPBC_Score', 'transformation_label': 'String Indexer'}], 'feature': 'IPBC_Score', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.31', 'stddev': '0.34', 'min': ' ', 'max': '1', 'missing': '0', 'distinct': '17'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'IPBC_Score'}, {'feature_label': 'IPBC_Score', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('IPBC_Score')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'InternEmail', 'transformation_label': 'String Indexer'}], 'feature': 'InternEmail', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': '5000JOB@GMAIL.COM', 'max': 'yirsawemebet@gmail.com', 'missing': '0', 'distinct': '66'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'InternEmail'}, {'feature_label': 'InternEmail', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('InternEmail')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'InternStartDate', 'transformation_label': 'String Indexer'}], 'feature': 'InternStartDate', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': '11:56.0', 'missing': '0', 'distinct': '28'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'InternStartDate'}, {'feature_label': 'InternStartDate', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('InternStartDate')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'InternEndDate', 'transformation_label': 'String Indexer'}], 'feature': 'InternEndDate', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': '54:18.0', 'missing': '0', 'distinct': '40'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'InternEndDate'}, {'feature_label': 'InternEndDate', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('InternEndDate')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'InternTechGroupName', 'transformation_label': 'String Indexer'}], 'feature': 'InternTechGroupName', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': 'Data Warehousing and ETL', 'max': 'Tableau', 'missing': '0', 'distinct': '6'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'InternTechGroupName'}, {'feature_label': 'InternTechGroupName', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('InternTechGroupName')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'InternManager', 'transformation_label': 'String Indexer'}], 'feature': 'InternManager', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': 'Ali Muwwakkil', 'max': 'Ali Muwwakkil', 'missing': '0', 'distinct': '1'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'InternManager'}, {'feature_label': 'InternManager', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('InternManager')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PName', 'transformation_label': 'String Indexer'}], 'feature': 'PName', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': 'Update Chat and Zendesk Reports', 'missing': '0', 'distinct': '12'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'PName'}, {'feature_label': 'PName', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('PName')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'L_URL', 'transformation_label': 'String Indexer'}], 'feature': 'L_URL', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': 'https://3.basecamp.com/3945211/buckets/24865175/todolists/7255296952', 'missing': '0', 'distinct': '12'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'L_URL'}, {'feature_label': 'L_URL', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('L_URL')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_Wk', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_Wk', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '22.92', 'stddev': '14.71', 'min': ' ', 'max': '9', 'missing': '0', 'distinct': '54'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_Wk'}, {'feature_label': 'Intern_MM_Wk', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_Wk')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_Yr', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_Yr', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '2022.9', 'stddev': '1.03', 'min': ' ', 'max': '2024', 'missing': '0', 'distinct': '6'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_Yr'}, {'feature_label': 'Intern_MM_Yr', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_Yr')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_LastActivitySection', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_LastActivitySection', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': 'Power BI Designer - Power BI Desktop', 'missing': '0', 'distinct': '14'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_LastActivitySectio...'}, {'feature_label': 'Intern_LastActivitySection', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Intern_LastActivitySection')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_HW', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_HW', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.01', 'stddev': '0.08', 'min': ' ', 'max': '1', 'missing': '0', 'distinct': '3'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_HW'}, {'feature_label': 'Intern_MM_HW', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_HW')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_Videos', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_Videos', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.11', 'stddev': '0.37', 'min': ' ', 'max': '2', 'missing': '0', 'distinct': '4'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_Videos'}, {'feature_label': 'Intern_MM_Videos', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_Videos')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_VideosScore', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_VideosScore', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.38', 'stddev': '1.27', 'min': ' ', 'max': '5', 'missing': '0', 'distinct': '5'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_VideosScore'}, {'feature_label': 'Intern_MM_VideosScore', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_VideosScore')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_AutoInt', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_AutoInt', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.47', 'stddev': '1.65', 'min': ' ', 'max': '6', 'missing': '0', 'distinct': '9'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_AutoInt'}, {'feature_label': 'Intern_MM_AutoInt', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_AutoInt')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_Phone', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_Phone', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.12', 'stddev': '0.47', 'min': ' ', 'max': '4', 'missing': '0', 'distinct': '6'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_Phone'}, {'feature_label': 'Intern_MM_Phone', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_Phone')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_LoginPer', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_LoginPer', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.48', 'stddev': '0.35', 'min': ' ', 'max': '1', 'missing': '0', 'distinct': '10'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_LoginPer'}, {'feature_label': 'Intern_MM_LoginPer', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_LoginPer')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_ActivityScore', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_ActivityScore', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.86', 'stddev': '1.94', 'min': ' ', 'max': '8', 'missing': '0', 'distinct': '11'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_ActivityScore'}, {'feature_label': 'Intern_MM_ActivityScore', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_ActivityScore')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_ParticipationPer', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_ParticipationPer', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.37', 'stddev': '0.48', 'min': ' ', 'max': '1', 'missing': '0', 'distinct': '3'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_ParticipationPe...'}, {'feature_label': 'Intern_MM_ParticipationPer', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Intern_MM_ParticipationPer')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_Mentor', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_Mentor', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': 'hubert ndifusah', 'missing': '0', 'distinct': '13'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_Mentor'}, {'feature_label': 'Intern_MM_Mentor', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_Mentor')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_UserComments', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_UserComments', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.73', 'stddev': '0.79', 'min': ' ', 'max': '4', 'missing': '0', 'distinct': '6'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_UserComments'}, {'feature_label': 'Intern_MM_UserComments', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_UserComments')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_ActivityScoreChk', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_ActivityScoreChk', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.37', 'stddev': '0.48', 'min': ' ', 'max': '1', 'missing': '0', 'distinct': '3'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_ActivityScoreCh...'}, {'feature_label': 'Intern_MM_ActivityScoreChk', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Intern_MM_ActivityScoreChk')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_UserCommentsChk', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_UserCommentsChk', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.57', 'stddev': '0.5', 'min': ' ', 'max': '1', 'missing': '0', 'distinct': '3'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_UserCommentsChk'}, {'feature_label': 'Intern_MM_UserCommentsChk', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Intern_MM_UserCommentsChk')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_PlacementDate', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_PlacementDate', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': '58:27.9', 'missing': '0', 'distinct': '6'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_PlacementDate'}, {'feature_label': 'Intern_MM_PlacementDate', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_PlacementDate')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_JRPChk', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_JRPChk', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.0', 'stddev': '0.05', 'min': ' ', 'max': '1', 'missing': '0', 'distinct': '3'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_JRPChk'}, {'feature_label': 'Intern_MM_JRPChk', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_JRPChk')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_SubChk', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_SubChk', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.0', 'stddev': '0.0', 'min': ' ', 'max': '0', 'missing': '0', 'distinct': '2'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_SubChk'}, {'feature_label': 'Intern_MM_SubChk', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_SubChk')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_AutoIntChk', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_AutoIntChk', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.17', 'stddev': '0.38', 'min': ' ', 'max': '1', 'missing': '0', 'distinct': '3'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_AutoIntChk'}, {'feature_label': 'Intern_MM_AutoIntChk', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_AutoIntChk')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_PhoneChk', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_PhoneChk', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.08', 'stddev': '0.28', 'min': ' ', 'max': '1', 'missing': '0', 'distinct': '3'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_PhoneChk'}, {'feature_label': 'Intern_MM_PhoneChk', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_PhoneChk')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_Wk_ActivityScore', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_Wk_ActivityScore', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.19', 'stddev': '0.08', 'min': ' ', 'max': '0.472857', 'missing': '0', 'distinct': '144'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_Wk_ActivityScor...'}, {'feature_label': 'Intern_MM_Wk_ActivityScore', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Intern_MM_Wk_ActivityScore')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_WklyComments', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_WklyComments', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': ' ', 'missing': '0', 'distinct': '1'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_WklyComments'}, {'feature_label': 'Intern_MM_WklyComments', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_MM_WklyComments')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_MM_LastCommentDate', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_MM_LastCommentDate', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': ' ', 'missing': '0', 'distinct': '1'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_MM_LastCommentDate'}, {'feature_label': 'Intern_MM_LastCommentDate', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Intern_MM_LastCommentDate')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_CC_Answer', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_CC_Answer', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '2.06', 'stddev': '5.19', 'min': ' ', 'max': '8', 'missing': '0', 'distinct': '19'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_CC_Answer'}, {'feature_label': 'Intern_CC_Answer', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_CC_Answer')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_CC_CorrectAnswer', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_CC_CorrectAnswer', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.01', 'stddev': '0.09', 'min': ' ', 'max': '1', 'missing': '0', 'distinct': '3'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_CC_CorrectAnswer'}, {'feature_label': 'Intern_CC_CorrectAnswer', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_CC_CorrectAnswer')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_CC_VideosHosted', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_CC_VideosHosted', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.0', 'stddev': '0.0', 'min': ' ', 'max': '0', 'missing': '0', 'distinct': '2'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_CC_VideosHosted'}, {'feature_label': 'Intern_CC_VideosHosted', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_CC_VideosHosted')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_CC_QualfyVideo', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_CC_QualfyVideo', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.0', 'stddev': '0.0', 'min': ' ', 'max': '0', 'missing': '0', 'distinct': '2'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_CC_QualfyVideo'}, {'feature_label': 'Intern_CC_QualfyVideo', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Intern_CC_QualfyVideo')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_COE_Participation', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_COE_Participation', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '6.08', 'stddev': '5.88', 'min': ' ', 'max': '9', 'missing': '0', 'distinct': '23'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_COE_Participation'}, {'feature_label': 'Intern_COE_Participation', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Intern_COE_Participation')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Intern_COE_ProjectUpdates', 'transformation_label': 'String Indexer'}], 'feature': 'Intern_COE_ProjectUpdates', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.38', 'stddev': '1.86', 'min': ' ', 'max': '9', 'missing': '0', 'distinct': '9'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Intern_COE_ProjectUpdates'}, {'feature_label': 'Intern_COE_ProjectUpdates', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Intern_COE_ProjectUpdates')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'IPBC_Part1', 'transformation_label': 'String Indexer'}], 'feature': 'IPBC_Part1', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '0.81', 'stddev': '1.47', 'min': ' ', 'max': '8', 'missing': '0', 'distinct': '11'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'IPBC_Part1'}, {'feature_label': 'IPBC_Part1', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('IPBC_Part1')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'IPBC_Score_Pre_Total', 'transformation_label': 'String Indexer'}], 'feature': 'IPBC_Score_Pre_Total', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '4.89', 'stddev': '5.67', 'min': ' ', 'max': '9', 'missing': '0', 'distinct': '23'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'IPBC_Score_Pre_Total'}, {'feature_label': 'IPBC_Score_Pre_Total', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('IPBC_Score_Pre_Total')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Phase', 'transformation_label': 'String Indexer'}], 'feature': 'Phase', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': 'Phase 5 - Completed', 'missing': '0', 'distinct': '7'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Phase'}, {'feature_label': 'Phase', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Phase')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'InternLevelGroup', 'transformation_label': 'String Indexer'}], 'feature': 'InternLevelGroup', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '2.43', 'stddev': '0.58', 'min': ' ', 'max': '4', 'missing': '0', 'distinct': '5'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'InternLevelGroup'}, {'feature_label': 'InternLevelGroup', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('InternLevelGroup')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'CurrentSprintTask', 'transformation_label': 'String Indexer'}], 'feature': 'CurrentSprintTask', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': ' ', 'missing': '0', 'distinct': '1'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'CurrentSprintTask'}, {'feature_label': 'CurrentSprintTask', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('CurrentSprintTask')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'StudentProfile', 'transformation_label': 'String Indexer'}], 'feature': 'StudentProfile', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': 'https://app.colaberry.com/app/ipbc/students/ipbc/15676', 'max': 'https://app.colaberry.com/app/ipbc/students/ipbc/39043', 'missing': '0', 'distinct': '66'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'StudentProfile'}, {'feature_label': 'StudentProfile', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('StudentProfile')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ProjectName_2', 'transformation_label': 'String Indexer'}], 'feature': 'ProjectName_2', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': 'Update Chat and Zendesk Reports', 'missing': '0', 'distinct': '12'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'ProjectName_2'}, {'feature_label': 'ProjectName_2', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ProjectName_2')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ProjectKPI_Link', 'transformation_label': 'String Indexer'}], 'feature': 'ProjectKPI_Link', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': 'https://app.colaberry.com/app/student/ProjMgmtByAdmin/1956', 'missing': '0', 'distinct': '3'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'ProjectKPI_Link'}, {'feature_label': 'ProjectKPI_Link', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ProjectKPI_Link')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Gender', 'transformation_label': 'String Indexer'}], 'feature': 'Gender', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': 'M', 'missing': '0', 'distinct': '3'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Gender'}, {'feature_label': 'Gender', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Gender')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Dominance', 'transformation_label': 'String Indexer'}], 'feature': 'Dominance', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '41.89', 'stddev': '25.65', 'min': ' ', 'max': '92', 'missing': '0', 'distinct': '21'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Dominance'}, {'feature_label': 'Dominance', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Dominance')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Influencer', 'transformation_label': 'String Indexer'}], 'feature': 'Influencer', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '44.99', 'stddev': '12.2', 'min': ' ', 'max': '83', 'missing': '0', 'distinct': '19'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Influencer'}, {'feature_label': 'Influencer', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Influencer')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Steadiness', 'transformation_label': 'String Indexer'}], 'feature': 'Steadiness', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '64.02', 'stddev': '23.5', 'min': ' ', 'max': '90', 'missing': '0', 'distinct': '19'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Steadiness'}, {'feature_label': 'Steadiness', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Steadiness')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Compliance', 'transformation_label': 'String Indexer'}], 'feature': 'Compliance', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '62.45', 'stddev': '17.46', 'min': ' ', 'max': '93', 'missing': '0', 'distinct': '15'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Compliance'}, {'feature_label': 'Compliance', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Compliance')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Theoretical', 'transformation_label': 'String Indexer'}], 'feature': 'Theoretical', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '51.18', 'stddev': '10.66', 'min': ' ', 'max': '78', 'missing': '0', 'distinct': '20'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Theoretical'}, {'feature_label': 'Theoretical', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Theoretical')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Utilitarian', 'transformation_label': 'String Indexer'}], 'feature': 'Utilitarian', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '45.2', 'stddev': '11.25', 'min': ' ', 'max': '73', 'missing': '0', 'distinct': '22'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Utilitarian'}, {'feature_label': 'Utilitarian', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Utilitarian')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Aesthetic', 'transformation_label': 'String Indexer'}], 'feature': 'Aesthetic', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '39.14', 'stddev': '7.69', 'min': ' ', 'max': '52', 'missing': '0', 'distinct': '21'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Aesthetic'}, {'feature_label': 'Aesthetic', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Aesthetic')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Social', 'transformation_label': 'String Indexer'}], 'feature': 'Social', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '36.61', 'stddev': '13.13', 'min': ' ', 'max': '8', 'missing': '0', 'distinct': '21'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Social'}, {'feature_label': 'Social', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Social')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Individualistic', 'transformation_label': 'String Indexer'}], 'feature': 'Individualistic', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '42.54', 'stddev': '10.17', 'min': ' ', 'max': '63', 'missing': '0', 'distinct': '19'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Individualistic'}, {'feature_label': 'Individualistic', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Individualistic')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Traditional', 'transformation_label': 'String Indexer'}], 'feature': 'Traditional', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '40.84', 'stddev': '7.89', 'min': ' ', 'max': '56', 'missing': '0', 'distinct': '19'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Traditional'}, {'feature_label': 'Traditional', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Traditional')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Flexibility', 'transformation_label': 'String Indexer'}], 'feature': 'Flexibility', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '11.11', 'stddev': '19.2', 'min': ' ', 'max': '9', 'missing': '0', 'distinct': '24'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Flexibility'}, {'feature_label': 'Flexibility', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Flexibility')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'FuturisticThinking', 'transformation_label': 'String Indexer'}], 'feature': 'FuturisticThinking', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '5.37', 'stddev': '10.84', 'min': ' ', 'max': '7.7', 'missing': '0', 'distinct': '20'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'FuturisticThinking'}, {'feature_label': 'FuturisticThinking', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('FuturisticThinking')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'GoalOrientation', 'transformation_label': 'String Indexer'}], 'feature': 'GoalOrientation', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '13.71', 'stddev': '24.38', 'min': ' ', 'max': '9.7', 'missing': '0', 'distinct': '19'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'GoalOrientation'}, {'feature_label': 'GoalOrientation', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('GoalOrientation')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'InterpersonalSkills', 'transformation_label': 'String Indexer'}], 'feature': 'InterpersonalSkills', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '12.31', 'stddev': '24.81', 'min': ' ', 'max': '9.7', 'missing': '0', 'distinct': '23'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'InterpersonalSkills'}, {'feature_label': 'InterpersonalSkills', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('InterpersonalSkills')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Leadership', 'transformation_label': 'String Indexer'}], 'feature': 'Leadership', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '10.96', 'stddev': '20.53', 'min': ' ', 'max': '9.7', 'missing': '0', 'distinct': '21'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Leadership'}, {'feature_label': 'Leadership', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Leadership')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Negotiation', 'transformation_label': 'String Indexer'}], 'feature': 'Negotiation', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '10.92', 'stddev': '25.21', 'min': ' ', 'max': '9.3', 'missing': '0', 'distinct': '21'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Negotiation'}, {'feature_label': 'Negotiation', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Negotiation')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PersonalResponsibility', 'transformation_label': 'String Indexer'}], 'feature': 'PersonalResponsibility', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '11.61', 'stddev': '19.53', 'min': ' ', 'max': '81', 'missing': '0', 'distinct': '20'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'PersonalResponsibility'}, {'feature_label': 'PersonalResponsibility', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('PersonalResponsibility')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Persuasion', 'transformation_label': 'String Indexer'}], 'feature': 'Persuasion', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '8.69', 'stddev': '21.01', 'min': ' ', 'max': '9', 'missing': '0', 'distinct': '19'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Persuasion'}, {'feature_label': 'Persuasion', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Persuasion')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PlanningOrganizing', 'transformation_label': 'String Indexer'}], 'feature': 'PlanningOrganizing', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '13.97', 'stddev': '24.25', 'min': ' ', 'max': '9', 'missing': '0', 'distinct': '21'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'PlanningOrganizing'}, {'feature_label': 'PlanningOrganizing', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('PlanningOrganizing')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Presenting', 'transformation_label': 'String Indexer'}], 'feature': 'Presenting', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '10.16', 'stddev': '25.43', 'min': ' ', 'max': '9.7', 'missing': '0', 'distinct': '21'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Presenting'}, {'feature_label': 'Presenting', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Presenting')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Teamwork', 'transformation_label': 'String Indexer'}], 'feature': 'Teamwork', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '12.01', 'stddev': '20.12', 'min': ' ', 'max': '9.7', 'missing': '0', 'distinct': '17'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Teamwork'}, {'feature_label': 'Teamwork', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Teamwork')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'WrittenCommunication', 'transformation_label': 'String Indexer'}], 'feature': 'WrittenCommunication', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '10.22', 'stddev': '22.49', 'min': ' ', 'max': '90', 'missing': '0', 'distinct': '21'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'WrittenCommunication'}, {'feature_label': 'WrittenCommunication', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('WrittenCommunication')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'UnderstandingOthers', 'transformation_label': 'String Indexer'}], 'feature': 'UnderstandingOthers', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '11.05', 'stddev': '18.99', 'min': ' ', 'max': '9.4', 'missing': '0', 'distinct': '20'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'UnderstandingOthers'}, {'feature_label': 'UnderstandingOthers', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('UnderstandingOthers')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PracticalThinking', 'transformation_label': 'String Indexer'}], 'feature': 'PracticalThinking', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '11.87', 'stddev': '18.14', 'min': ' ', 'max': '9.4', 'missing': '0', 'distinct': '19'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'PracticalThinking'}, {'feature_label': 'PracticalThinking', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('PracticalThinking')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'SystemsJudgment', 'transformation_label': 'String Indexer'}], 'feature': 'SystemsJudgment', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '11.92', 'stddev': '18.61', 'min': ' ', 'max': '9.2', 'missing': '0', 'distinct': '18'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'SystemsJudgment'}, {'feature_label': 'SystemsJudgment', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('SystemsJudgment')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'SenseOfSelf', 'transformation_label': 'String Indexer'}], 'feature': 'SenseOfSelf', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '11.44', 'stddev': '15.96', 'min': ' ', 'max': '9', 'missing': '0', 'distinct': '21'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'SenseOfSelf'}, {'feature_label': 'SenseOfSelf', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('SenseOfSelf')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'RoleAwareness', 'transformation_label': 'String Indexer'}], 'feature': 'RoleAwareness', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '11.52', 'stddev': '21.55', 'min': ' ', 'max': '88', 'missing': '0', 'distinct': '18'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'RoleAwareness'}, {'feature_label': 'RoleAwareness', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('RoleAwareness')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'SelfDirection', 'transformation_label': 'String Indexer'}], 'feature': 'SelfDirection', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '11.94', 'stddev': '18.07', 'min': ' ', 'max': '9.2', 'missing': '0', 'distinct': '18'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'SelfDirection'}, {'feature_label': 'SelfDirection', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('SelfDirection')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'UnderstandingOthersBias', 'transformation_label': 'String Indexer'}], 'feature': 'UnderstandingOthersBias', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '-0.68', 'stddev': '0.47', 'min': ' ', 'max': '1', 'missing': '0', 'distinct': '4'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'UnderstandingOthersBias'}, {'feature_label': 'UnderstandingOthersBias', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('UnderstandingOthersBias')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ReportDate', 'transformation_label': 'String Indexer'}], 'feature': 'ReportDate', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': '00:00.0', 'missing': '0', 'distinct': '2'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'ReportDate'}, {'feature_label': 'ReportDate', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('ReportDate')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'TopDISCscore', 'transformation_label': 'String Indexer'}], 'feature': 'TopDISCscore', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': 'Steadiness', 'missing': '0', 'distinct': '5'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'TopDISCscore'}, {'feature_label': 'TopDISCscore', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('TopDISCscore')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'LowDISCscore', 'transformation_label': 'String Indexer'}], 'feature': 'LowDISCscore', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': 'Steadiness', 'missing': '0', 'distinct': '5'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'LowDISCscore'}, {'feature_label': 'LowDISCscore', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('LowDISCscore')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'TopMotivator', 'transformation_label': 'String Indexer'}], 'feature': 'TopMotivator', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': 'Utilitarian', 'missing': '0', 'distinct': '7'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'TopMotivator'}, {'feature_label': 'TopMotivator', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('TopMotivator')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'LowMotivator', 'transformation_label': 'String Indexer'}], 'feature': 'LowMotivator', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': 'Utilitarian', 'missing': '0', 'distinct': '6'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'LowMotivator'}, {'feature_label': 'LowMotivator', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('LowMotivator')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'TopSkill', 'transformation_label': 'String Indexer'}], 'feature': 'TopSkill', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': 'Teamwork', 'missing': '0', 'distinct': '10'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'TopSkill'}, {'feature_label': 'TopSkill', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('TopSkill')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'LowSkill', 'transformation_label': 'String Indexer'}], 'feature': 'LowSkill', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': ' ', 'max': 'WrittenCommunication', 'missing': '0', 'distinct': '9'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'LowSkill'}, {'feature_label': 'LowSkill', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('LowSkill')
        display(transformationDF.limit(2).toPandas())
        return transformationDF


***AUTOML FUNCTIONS***

In [None]:
from sklearn.model_selection import train_test_split
from tpot import TPOTRegressor
import pyspark


def functionRegression(sparkDF, listOfFeatures, label):
    sparkDF.persist(pyspark.StorageLevel.MEMORY_AND_DISK)
    df = sparkDF.toPandas()
    X = (df.drop(label, axis=1))[listOfFeatures].values
    y = df[label].values
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=1, test_size=0.1)
    tpotModel = TPOTRegressor(verbosity=3, generations=10, max_time_mins=5,
                              n_jobs=-1, random_state=25, population_size=15, use_dask=True)
    tpotModel.fit(X_train, y_train)
    display(" Error rate of Model : %s" % tpotModel.score(X_test, y_test))
    data = {'model': tpotModel,
            'X_test': X_test,
            'y_test': y_test,
            'label': label,
            'columnNames': listOfFeatures}
    return data


***READING DATAFRAME***

In [None]:
############## CREATE SPARK SESSION ############################ ENTER YOUR SPARK MASTER IP AND PORT TO CONNECT TO SERVER ################
from pyspark.sql import SparkSession
spark = SparkSession.builder.master('local[1]').getOrCreate()
#%run churnHooks.ipynb
try:
	#sourcePreExecutionHook()

	aiforgeupdated = HDFSConnector.fetch(spark, "{'url': '/FileStore/platform/uploadedSourceFiles/AI FORGE updated.csv', 'filename': 'AI FORGE updated.csv', 'delimiter': ',', 'file_type': 'Delimeted', 'is_header': 'Use Header Line', 'domain': 'http://172.31.59.158', 'port': '40070', 'dirPath': '/FileStore/platform', 'server_url': '/nexusMax/NexusMaxPlatform/uploads/platform/'}")
	#sourcePostExecutionHook(aiforgeupdated)

except Exception as ex: 
	logging.error(ex)
#spark.stop()


***TRANSFORMING DATAFRAME***

In [None]:
#%run churnHooks.ipynb
try:
	#transformationPreExecutionHook()

	churnautofe = TransformationMain.run(aiforgeupdated,json.dumps( {"FE": [{"transformationsData": [{"feature_label": "Intern", "transformation_label": "String Indexer"}], "feature": "Intern", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "AHMED REDA", "max": "vaishali oruganti", "missing": "0", "distinct": "66"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern"}, {"transformationsData": [{"feature_label": "DaysInInternship", "transformation_label": "String Indexer"}], "feature": "DaysInInternship", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "473.08", "stddev": "273.84", "min": " ", "max": "94", "missing": "0", "distinct": "41"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "DaysInInternship"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "ClassSignupsID", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "12175.63", "stddev": "2169.81", "min": "2469", "max": "14790", "missing": "0"}, "updatedLabel": "ClassSignupsID"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "SurveySent", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "1.11", "stddev": "1.14", "min": "0", "max": "4", "missing": "0"}, "updatedLabel": "SurveySent"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "INTERNSHIP_SCORE", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "500", "mean": "0.47", "stddev": "0.24", "min": "0.208546", "max": "1.0", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "INTERNSHIP_SCORE"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "SurveysTaken", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.65", "stddev": "1.01", "min": "0", "max": "4", "missing": "0"}, "updatedLabel": "SurveysTaken"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "SurveysGiven", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.57", "stddev": "0.84", "min": "0", "max": "4", "missing": "0"}, "updatedLabel": "SurveysGiven"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "QuestionsScored", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "7.36", "stddev": "10.9", "min": "0", "max": "52", "missing": "0"}, "updatedLabel": "QuestionsScored"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Work Ethic", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "500", "mean": "0.36", "stddev": "0.46", "min": "0.0", "max": "1.0", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Work Ethic"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Availability", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "500", "mean": "0.36", "stddev": "0.47", "min": "0.0", "max": "1.0", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Availability"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Team Player", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "500", "mean": "0.36", "stddev": "0.47", "min": "0.0", "max": "1.0", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Team Player"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Quality of Work", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "500", "mean": "0.36", "stddev": "0.46", "min": "0.0", "max": "1.0", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Quality of Work"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Trust_2", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "500", "mean": "0.36", "stddev": "0.46", "min": "0.0", "max": "1.0", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Trust_2"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Respectful", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "500", "mean": "0.37", "stddev": "0.47", "min": "0.0", "max": "1.0", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "Respectful"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "SurveyScore", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "500", "mean": "0.36", "stddev": "0.46", "min": "0.0", "max": "1.0", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "SurveyScore"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "BaseCamp_FinalScore", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "500", "mean": "0.3", "stddev": "0.4", "min": "0.0", "max": "1.0", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "BaseCamp_FinalScore"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "IPBC_FinalScore", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "500", "mean": "0.28", "stddev": "0.33", "min": "0.0", "max": "1.0", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "IPBC_FinalScore"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "BaseCamp_Score", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "500", "mean": "0.3", "stddev": "0.4", "min": "0.0", "max": "1.0", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "BaseCamp_Score"}, {"transformationsData": [{"feature_label": "Attendance", "transformation_label": "String Indexer"}], "feature": "Attendance", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "2.51", "stddev": "1.43", "min": " ", "max": "6", "missing": "0", "distinct": "7"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Attendance"}, {"transformationsData": [{"feature_label": "BaseCampCommentScore", "transformation_label": "String Indexer"}], "feature": "BaseCampCommentScore", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "2.51", "stddev": "1.43", "min": " ", "max": "6", "missing": "0", "distinct": "7"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "BaseCampCommentScore"}, {"transformationsData": [{"feature_label": "BaseCampTotalComments", "transformation_label": "String Indexer"}], "feature": "BaseCampTotalComments", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "4.87", "stddev": "4.54", "min": " ", "max": "9", "missing": "0", "distinct": "18"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "BaseCampTotalComments"}, {"transformationsData": [{"feature_label": "GoalResponseLen", "transformation_label": "String Indexer"}], "feature": "GoalResponseLen", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": " ", "missing": "0", "distinct": "1"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "GoalResponseLen"}, {"transformationsData": [{"feature_label": "IPBC_Score", "transformation_label": "String Indexer"}], "feature": "IPBC_Score", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.31", "stddev": "0.34", "min": " ", "max": "1", "missing": "0", "distinct": "17"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "IPBC_Score"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "InternID", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "348.79", "stddev": "95.46", "min": "6", "max": "436", "missing": "0"}, "updatedLabel": "InternID"}, {"transformationsData": [{"feature_label": "InternEmail", "transformation_label": "String Indexer"}], "feature": "InternEmail", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "5000JOB@GMAIL.COM", "max": "yirsawemebet@gmail.com", "missing": "0", "distinct": "66"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "InternEmail"}, {"transformationsData": [{"feature_label": "InternStartDate", "transformation_label": "String Indexer"}], "feature": "InternStartDate", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "11:56.0", "missing": "0", "distinct": "28"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "InternStartDate"}, {"transformationsData": [{"feature_label": "InternEndDate", "transformation_label": "String Indexer"}], "feature": "InternEndDate", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "54:18.0", "missing": "0", "distinct": "40"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "InternEndDate"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "InternTechGroupID", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "3.9", "stddev": "2.51", "min": "1", "max": "10", "missing": "0"}, "updatedLabel": "InternTechGroupID"}, {"transformationsData": [{"feature_label": "InternTechGroupName", "transformation_label": "String Indexer"}], "feature": "InternTechGroupName", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "Data Warehousing and ETL", "max": "Tableau", "missing": "0", "distinct": "6"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "InternTechGroupName"}, {"transformationsData": [{"feature_label": "InternManager", "transformation_label": "String Indexer"}], "feature": "InternManager", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "Ali Muwwakkil", "max": "Ali Muwwakkil", "missing": "0", "distinct": "1"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "InternManager"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "internisactive", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.55", "stddev": "0.5", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "internisactive"}, {"transformationsData": [{"feature_label": "PName", "transformation_label": "String Indexer"}], "feature": "PName", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "Update Chat and Zendesk Reports", "missing": "0", "distinct": "12"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "PName"}, {"transformationsData": [{"feature_label": "L_URL", "transformation_label": "String Indexer"}], "feature": "L_URL", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "https://3.basecamp.com/3945211/buckets/24865175/todolists/7255296952", "missing": "0", "distinct": "12"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "L_URL"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Intern_MM_WkOrder", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "46.49", "stddev": "37.82", "min": "1", "max": "159", "missing": "0"}, "updatedLabel": "Intern_MM_WkOrder"}, {"transformationsData": [{"feature_label": "Intern_MM_Wk", "transformation_label": "String Indexer"}], "feature": "Intern_MM_Wk", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "22.92", "stddev": "14.71", "min": " ", "max": "9", "missing": "0", "distinct": "54"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_Wk"}, {"transformationsData": [{"feature_label": "Intern_MM_Yr", "transformation_label": "String Indexer"}], "feature": "Intern_MM_Yr", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "2022.9", "stddev": "1.03", "min": " ", "max": "2024", "missing": "0", "distinct": "6"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_Yr"}, {"transformationsData": [{"feature_label": "Intern_LastActivitySection", "transformation_label": "String Indexer"}], "feature": "Intern_LastActivitySection", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "Power BI Designer - Power BI Desktop", "missing": "0", "distinct": "14"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_LastActivitySectio..."}, {"transformationsData": [{"feature_label": "Intern_MM_HW", "transformation_label": "String Indexer"}], "feature": "Intern_MM_HW", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.01", "stddev": "0.08", "min": " ", "max": "1", "missing": "0", "distinct": "3"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_HW"}, {"transformationsData": [{"feature_label": "Intern_MM_Videos", "transformation_label": "String Indexer"}], "feature": "Intern_MM_Videos", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.11", "stddev": "0.37", "min": " ", "max": "2", "missing": "0", "distinct": "4"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_Videos"}, {"transformationsData": [{"feature_label": "Intern_MM_VideosScore", "transformation_label": "String Indexer"}], "feature": "Intern_MM_VideosScore", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.38", "stddev": "1.27", "min": " ", "max": "5", "missing": "0", "distinct": "5"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_VideosScore"}, {"transformationsData": [{"feature_label": "Intern_MM_AutoInt", "transformation_label": "String Indexer"}], "feature": "Intern_MM_AutoInt", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.47", "stddev": "1.65", "min": " ", "max": "6", "missing": "0", "distinct": "9"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_AutoInt"}, {"transformationsData": [{"feature_label": "Intern_MM_Phone", "transformation_label": "String Indexer"}], "feature": "Intern_MM_Phone", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.12", "stddev": "0.47", "min": " ", "max": "4", "missing": "0", "distinct": "6"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_Phone"}, {"transformationsData": [{"feature_label": "Intern_MM_LoginPer", "transformation_label": "String Indexer"}], "feature": "Intern_MM_LoginPer", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.48", "stddev": "0.35", "min": " ", "max": "1", "missing": "0", "distinct": "10"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_LoginPer"}, {"transformationsData": [{"feature_label": "Intern_MM_ActivityScore", "transformation_label": "String Indexer"}], "feature": "Intern_MM_ActivityScore", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.86", "stddev": "1.94", "min": " ", "max": "8", "missing": "0", "distinct": "11"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_ActivityScore"}, {"transformationsData": [{"feature_label": "Intern_MM_ParticipationPer", "transformation_label": "String Indexer"}], "feature": "Intern_MM_ParticipationPer", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.37", "stddev": "0.48", "min": " ", "max": "1", "missing": "0", "distinct": "3"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_ParticipationPe..."}, {"transformationsData": [{"feature_label": "Intern_MM_Mentor", "transformation_label": "String Indexer"}], "feature": "Intern_MM_Mentor", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "hubert ndifusah", "missing": "0", "distinct": "13"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_Mentor"}, {"transformationsData": [{"feature_label": "Intern_MM_UserComments", "transformation_label": "String Indexer"}], "feature": "Intern_MM_UserComments", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.73", "stddev": "0.79", "min": " ", "max": "4", "missing": "0", "distinct": "6"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_UserComments"}, {"transformationsData": [{"feature_label": "Intern_MM_ActivityScoreChk", "transformation_label": "String Indexer"}], "feature": "Intern_MM_ActivityScoreChk", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.37", "stddev": "0.48", "min": " ", "max": "1", "missing": "0", "distinct": "3"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_ActivityScoreCh..."}, {"transformationsData": [{"feature_label": "Intern_MM_UserCommentsChk", "transformation_label": "String Indexer"}], "feature": "Intern_MM_UserCommentsChk", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.57", "stddev": "0.5", "min": " ", "max": "1", "missing": "0", "distinct": "3"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_UserCommentsChk"}, {"transformationsData": [{"feature_label": "Intern_MM_PlacementDate", "transformation_label": "String Indexer"}], "feature": "Intern_MM_PlacementDate", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "58:27.9", "missing": "0", "distinct": "6"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_PlacementDate"}, {"transformationsData": [{"feature_label": "Intern_MM_JRPChk", "transformation_label": "String Indexer"}], "feature": "Intern_MM_JRPChk", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.0", "stddev": "0.05", "min": " ", "max": "1", "missing": "0", "distinct": "3"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_JRPChk"}, {"transformationsData": [{"feature_label": "Intern_MM_SubChk", "transformation_label": "String Indexer"}], "feature": "Intern_MM_SubChk", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.0", "stddev": "0.0", "min": " ", "max": "0", "missing": "0", "distinct": "2"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_SubChk"}, {"transformationsData": [{"feature_label": "Intern_MM_AutoIntChk", "transformation_label": "String Indexer"}], "feature": "Intern_MM_AutoIntChk", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.17", "stddev": "0.38", "min": " ", "max": "1", "missing": "0", "distinct": "3"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_AutoIntChk"}, {"transformationsData": [{"feature_label": "Intern_MM_PhoneChk", "transformation_label": "String Indexer"}], "feature": "Intern_MM_PhoneChk", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.08", "stddev": "0.28", "min": " ", "max": "1", "missing": "0", "distinct": "3"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_PhoneChk"}, {"transformationsData": [{"feature_label": "Intern_MM_Wk_ActivityScore", "transformation_label": "String Indexer"}], "feature": "Intern_MM_Wk_ActivityScore", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.19", "stddev": "0.08", "min": " ", "max": "0.472857", "missing": "0", "distinct": "144"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_Wk_ActivityScor..."}, {"transformationsData": [{"feature_label": "Intern_MM_WklyComments", "transformation_label": "String Indexer"}], "feature": "Intern_MM_WklyComments", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": " ", "missing": "0", "distinct": "1"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_WklyComments"}, {"transformationsData": [{"feature_label": "Intern_MM_LastCommentDate", "transformation_label": "String Indexer"}], "feature": "Intern_MM_LastCommentDate", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": " ", "missing": "0", "distinct": "1"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_MM_LastCommentDate"}, {"transformationsData": [{"feature_label": "Intern_CC_Answer", "transformation_label": "String Indexer"}], "feature": "Intern_CC_Answer", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "2.06", "stddev": "5.19", "min": " ", "max": "8", "missing": "0", "distinct": "19"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_CC_Answer"}, {"transformationsData": [{"feature_label": "Intern_CC_CorrectAnswer", "transformation_label": "String Indexer"}], "feature": "Intern_CC_CorrectAnswer", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.01", "stddev": "0.09", "min": " ", "max": "1", "missing": "0", "distinct": "3"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_CC_CorrectAnswer"}, {"transformationsData": [{"feature_label": "Intern_CC_VideosHosted", "transformation_label": "String Indexer"}], "feature": "Intern_CC_VideosHosted", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.0", "stddev": "0.0", "min": " ", "max": "0", "missing": "0", "distinct": "2"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_CC_VideosHosted"}, {"transformationsData": [{"feature_label": "Intern_CC_QualfyVideo", "transformation_label": "String Indexer"}], "feature": "Intern_CC_QualfyVideo", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.0", "stddev": "0.0", "min": " ", "max": "0", "missing": "0", "distinct": "2"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_CC_QualfyVideo"}, {"transformationsData": [{"feature_label": "Intern_COE_Participation", "transformation_label": "String Indexer"}], "feature": "Intern_COE_Participation", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "6.08", "stddev": "5.88", "min": " ", "max": "9", "missing": "0", "distinct": "23"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_COE_Participation"}, {"transformationsData": [{"feature_label": "Intern_COE_ProjectUpdates", "transformation_label": "String Indexer"}], "feature": "Intern_COE_ProjectUpdates", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.38", "stddev": "1.86", "min": " ", "max": "9", "missing": "0", "distinct": "9"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Intern_COE_ProjectUpdates"}, {"transformationsData": [{"feature_label": "IPBC_Part1", "transformation_label": "String Indexer"}], "feature": "IPBC_Part1", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "0.81", "stddev": "1.47", "min": " ", "max": "8", "missing": "0", "distinct": "11"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "IPBC_Part1"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "IPBC_Part2", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.77", "stddev": "2.22", "min": "0", "max": "10", "missing": "0"}, "updatedLabel": "IPBC_Part2"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "IPBC_Part3", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "2.92", "stddev": "3.65", "min": "0", "max": "10", "missing": "0"}, "updatedLabel": "IPBC_Part3"}, {"transformationsData": [{"feature_label": "IPBC_Score_Pre_Total", "transformation_label": "String Indexer"}], "feature": "IPBC_Score_Pre_Total", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "4.89", "stddev": "5.67", "min": " ", "max": "9", "missing": "0", "distinct": "23"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "IPBC_Score_Pre_Total"}, {"transformationsData": [{"feature_label": "Phase", "transformation_label": "String Indexer"}], "feature": "Phase", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "Phase 5 - Completed", "missing": "0", "distinct": "7"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Phase"}, {"transformationsData": [{"feature_label": "InternLevelGroup", "transformation_label": "String Indexer"}], "feature": "InternLevelGroup", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "2.43", "stddev": "0.58", "min": " ", "max": "4", "missing": "0", "distinct": "5"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "InternLevelGroup"}, {"transformationsData": [{"feature_label": "CurrentSprintTask", "transformation_label": "String Indexer"}], "feature": "CurrentSprintTask", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": " ", "missing": "0", "distinct": "1"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "CurrentSprintTask"}, {"transformationsData": [{"feature_label": "StudentProfile", "transformation_label": "String Indexer"}], "feature": "StudentProfile", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "https://app.colaberry.com/app/ipbc/students/ipbc/15676", "max": "https://app.colaberry.com/app/ipbc/students/ipbc/39043", "missing": "0", "distinct": "66"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "StudentProfile"}, {"transformationsData": [{"feature_label": "ProjectName_2", "transformation_label": "String Indexer"}], "feature": "ProjectName_2", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "Update Chat and Zendesk Reports", "missing": "0", "distinct": "12"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "ProjectName_2"}, {"transformationsData": [{"feature_label": "ProjectKPI_Link", "transformation_label": "String Indexer"}], "feature": "ProjectKPI_Link", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "https://app.colaberry.com/app/student/ProjMgmtByAdmin/1956", "missing": "0", "distinct": "3"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "ProjectKPI_Link"}, {"transformationsData": [{"feature_label": "Gender", "transformation_label": "String Indexer"}], "feature": "Gender", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "M", "missing": "0", "distinct": "3"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Gender"}, {"transformationsData": [{"feature_label": "Dominance", "transformation_label": "String Indexer"}], "feature": "Dominance", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "41.89", "stddev": "25.65", "min": " ", "max": "92", "missing": "0", "distinct": "21"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Dominance"}, {"transformationsData": [{"feature_label": "Influencer", "transformation_label": "String Indexer"}], "feature": "Influencer", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "44.99", "stddev": "12.2", "min": " ", "max": "83", "missing": "0", "distinct": "19"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Influencer"}, {"transformationsData": [{"feature_label": "Steadiness", "transformation_label": "String Indexer"}], "feature": "Steadiness", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "64.02", "stddev": "23.5", "min": " ", "max": "90", "missing": "0", "distinct": "19"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Steadiness"}, {"transformationsData": [{"feature_label": "Compliance", "transformation_label": "String Indexer"}], "feature": "Compliance", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "62.45", "stddev": "17.46", "min": " ", "max": "93", "missing": "0", "distinct": "15"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Compliance"}, {"transformationsData": [{"feature_label": "Theoretical", "transformation_label": "String Indexer"}], "feature": "Theoretical", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "51.18", "stddev": "10.66", "min": " ", "max": "78", "missing": "0", "distinct": "20"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Theoretical"}, {"transformationsData": [{"feature_label": "Utilitarian", "transformation_label": "String Indexer"}], "feature": "Utilitarian", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "45.2", "stddev": "11.25", "min": " ", "max": "73", "missing": "0", "distinct": "22"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Utilitarian"}, {"transformationsData": [{"feature_label": "Aesthetic", "transformation_label": "String Indexer"}], "feature": "Aesthetic", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "39.14", "stddev": "7.69", "min": " ", "max": "52", "missing": "0", "distinct": "21"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Aesthetic"}, {"transformationsData": [{"feature_label": "Social", "transformation_label": "String Indexer"}], "feature": "Social", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "36.61", "stddev": "13.13", "min": " ", "max": "8", "missing": "0", "distinct": "21"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Social"}, {"transformationsData": [{"feature_label": "Individualistic", "transformation_label": "String Indexer"}], "feature": "Individualistic", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "42.54", "stddev": "10.17", "min": " ", "max": "63", "missing": "0", "distinct": "19"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Individualistic"}, {"transformationsData": [{"feature_label": "Traditional", "transformation_label": "String Indexer"}], "feature": "Traditional", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "40.84", "stddev": "7.89", "min": " ", "max": "56", "missing": "0", "distinct": "19"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Traditional"}, {"transformationsData": [{"feature_label": "Flexibility", "transformation_label": "String Indexer"}], "feature": "Flexibility", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "11.11", "stddev": "19.2", "min": " ", "max": "9", "missing": "0", "distinct": "24"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Flexibility"}, {"transformationsData": [{"feature_label": "FuturisticThinking", "transformation_label": "String Indexer"}], "feature": "FuturisticThinking", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "5.37", "stddev": "10.84", "min": " ", "max": "7.7", "missing": "0", "distinct": "20"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "FuturisticThinking"}, {"transformationsData": [{"feature_label": "GoalOrientation", "transformation_label": "String Indexer"}], "feature": "GoalOrientation", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "13.71", "stddev": "24.38", "min": " ", "max": "9.7", "missing": "0", "distinct": "19"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "GoalOrientation"}, {"transformationsData": [{"feature_label": "InterpersonalSkills", "transformation_label": "String Indexer"}], "feature": "InterpersonalSkills", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "12.31", "stddev": "24.81", "min": " ", "max": "9.7", "missing": "0", "distinct": "23"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "InterpersonalSkills"}, {"transformationsData": [{"feature_label": "Leadership", "transformation_label": "String Indexer"}], "feature": "Leadership", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "10.96", "stddev": "20.53", "min": " ", "max": "9.7", "missing": "0", "distinct": "21"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Leadership"}, {"transformationsData": [{"feature_label": "Negotiation", "transformation_label": "String Indexer"}], "feature": "Negotiation", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "10.92", "stddev": "25.21", "min": " ", "max": "9.3", "missing": "0", "distinct": "21"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Negotiation"}, {"transformationsData": [{"feature_label": "PersonalResponsibility", "transformation_label": "String Indexer"}], "feature": "PersonalResponsibility", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "11.61", "stddev": "19.53", "min": " ", "max": "81", "missing": "0", "distinct": "20"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "PersonalResponsibility"}, {"transformationsData": [{"feature_label": "Persuasion", "transformation_label": "String Indexer"}], "feature": "Persuasion", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "8.69", "stddev": "21.01", "min": " ", "max": "9", "missing": "0", "distinct": "19"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Persuasion"}, {"transformationsData": [{"feature_label": "PlanningOrganizing", "transformation_label": "String Indexer"}], "feature": "PlanningOrganizing", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "13.97", "stddev": "24.25", "min": " ", "max": "9", "missing": "0", "distinct": "21"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "PlanningOrganizing"}, {"transformationsData": [{"feature_label": "Presenting", "transformation_label": "String Indexer"}], "feature": "Presenting", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "10.16", "stddev": "25.43", "min": " ", "max": "9.7", "missing": "0", "distinct": "21"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Presenting"}, {"transformationsData": [{"feature_label": "Teamwork", "transformation_label": "String Indexer"}], "feature": "Teamwork", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "12.01", "stddev": "20.12", "min": " ", "max": "9.7", "missing": "0", "distinct": "17"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Teamwork"}, {"transformationsData": [{"feature_label": "WrittenCommunication", "transformation_label": "String Indexer"}], "feature": "WrittenCommunication", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "10.22", "stddev": "22.49", "min": " ", "max": "90", "missing": "0", "distinct": "21"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "WrittenCommunication"}, {"transformationsData": [{"feature_label": "UnderstandingOthers", "transformation_label": "String Indexer"}], "feature": "UnderstandingOthers", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "11.05", "stddev": "18.99", "min": " ", "max": "9.4", "missing": "0", "distinct": "20"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "UnderstandingOthers"}, {"transformationsData": [{"feature_label": "PracticalThinking", "transformation_label": "String Indexer"}], "feature": "PracticalThinking", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "11.87", "stddev": "18.14", "min": " ", "max": "9.4", "missing": "0", "distinct": "19"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "PracticalThinking"}, {"transformationsData": [{"feature_label": "SystemsJudgment", "transformation_label": "String Indexer"}], "feature": "SystemsJudgment", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "11.92", "stddev": "18.61", "min": " ", "max": "9.2", "missing": "0", "distinct": "18"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "SystemsJudgment"}, {"transformationsData": [{"feature_label": "SenseOfSelf", "transformation_label": "String Indexer"}], "feature": "SenseOfSelf", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "11.44", "stddev": "15.96", "min": " ", "max": "9", "missing": "0", "distinct": "21"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "SenseOfSelf"}, {"transformationsData": [{"feature_label": "RoleAwareness", "transformation_label": "String Indexer"}], "feature": "RoleAwareness", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "11.52", "stddev": "21.55", "min": " ", "max": "88", "missing": "0", "distinct": "18"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "RoleAwareness"}, {"transformationsData": [{"feature_label": "SelfDirection", "transformation_label": "String Indexer"}], "feature": "SelfDirection", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "11.94", "stddev": "18.07", "min": " ", "max": "9.2", "missing": "0", "distinct": "18"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "SelfDirection"}, {"transformationsData": [{"feature_label": "UnderstandingOthersBias", "transformation_label": "String Indexer"}], "feature": "UnderstandingOthersBias", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "-0.68", "stddev": "0.47", "min": " ", "max": "1", "missing": "0", "distinct": "4"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "UnderstandingOthersBias"}, {"transformationsData": [{"feature_label": "ReportDate", "transformation_label": "String Indexer"}], "feature": "ReportDate", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "00:00.0", "missing": "0", "distinct": "2"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "ReportDate"}, {"transformationsData": [{"feature_label": "TopDISCscore", "transformation_label": "String Indexer"}], "feature": "TopDISCscore", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "Steadiness", "missing": "0", "distinct": "5"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "TopDISCscore"}, {"transformationsData": [{"feature_label": "LowDISCscore", "transformation_label": "String Indexer"}], "feature": "LowDISCscore", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "Steadiness", "missing": "0", "distinct": "5"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "LowDISCscore"}, {"transformationsData": [{"feature_label": "TopMotivator", "transformation_label": "String Indexer"}], "feature": "TopMotivator", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "Utilitarian", "missing": "0", "distinct": "7"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "TopMotivator"}, {"transformationsData": [{"feature_label": "LowMotivator", "transformation_label": "String Indexer"}], "feature": "LowMotivator", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "Utilitarian", "missing": "0", "distinct": "6"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "LowMotivator"}, {"transformationsData": [{"feature_label": "TopSkill", "transformation_label": "String Indexer"}], "feature": "TopSkill", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "Teamwork", "missing": "0", "distinct": "10"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "TopSkill"}, {"transformationsData": [{"feature_label": "LowSkill", "transformation_label": "String Indexer"}], "feature": "LowSkill", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": " ", "max": "WrittenCommunication", "missing": "0", "distinct": "9"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "LowSkill"}]}))

	#transformationPostExecutionHook(churnautofe)

except Exception as ex: 
	logging.error(ex)


***TRAIN MODEL***

In [None]:
#%run churnHooks.ipynb
try:
	#mlPreExecutionHook()

	dataAutoML=functionRegression(churnautofe, ["ClassSignupsID", "SurveySent", "INTERNSHIP_SCORE", "SurveysTaken", "SurveysGiven", "QuestionsScored", "Work Ethic", "Availability", "Team Player", "Quality of Work", "Trust_2", "Respectful", "SurveyScore", "BaseCamp_FinalScore", "IPBC_FinalScore", "BaseCamp_Score", "InternID", "InternTechGroupID", "Intern_MM_WkOrder", "IPBC_Part2", "IPBC_Part3", "Intern_stringindexer", "DaysInInternship_stringindexer", "Attendance_stringindexer", "BaseCampCommentScore_stringindexer", "BaseCampTotalComments_stringindexer", "GoalResponseLen_stringindexer", "IPBC_Score_stringindexer", "InternEmail_stringindexer", "InternStartDate_stringindexer", "InternEndDate_stringindexer", "InternTechGroupName_stringindexer", "InternManager_stringindexer", "PName_stringindexer", "L_URL_stringindexer", "Intern_MM_Wk_stringindexer", "Intern_MM_Yr_stringindexer", "Intern_LastActivitySection_stringindexer", "Intern_MM_HW_stringindexer", "Intern_MM_Videos_stringindexer", "Intern_MM_VideosScore_stringindexer", "Intern_MM_AutoInt_stringindexer", "Intern_MM_Phone_stringindexer", "Intern_MM_LoginPer_stringindexer", "Intern_MM_ActivityScore_stringindexer", "Intern_MM_ParticipationPer_stringindexer", "Intern_MM_Mentor_stringindexer", "Intern_MM_UserComments_stringindexer", "Intern_MM_ActivityScoreChk_stringindexer", "Intern_MM_UserCommentsChk_stringindexer", "Intern_MM_PlacementDate_stringindexer", "Intern_MM_JRPChk_stringindexer", "Intern_MM_SubChk_stringindexer", "Intern_MM_AutoIntChk_stringindexer", "Intern_MM_PhoneChk_stringindexer", "Intern_MM_Wk_ActivityScore_stringindexer", "Intern_MM_WklyComments_stringindexer", "Intern_MM_LastCommentDate_stringindexer", "Intern_CC_Answer_stringindexer", "Intern_CC_CorrectAnswer_stringindexer", "Intern_CC_VideosHosted_stringindexer", "Intern_CC_QualfyVideo_stringindexer", "Intern_COE_Participation_stringindexer", "Intern_COE_ProjectUpdates_stringindexer", "IPBC_Part1_stringindexer", "IPBC_Score_Pre_Total_stringindexer", "Phase_stringindexer", "InternLevelGroup_stringindexer", "CurrentSprintTask_stringindexer", "StudentProfile_stringindexer", "ProjectName_2_stringindexer", "ProjectKPI_Link_stringindexer", "Gender_stringindexer", "Dominance_stringindexer", "Influencer_stringindexer", "Steadiness_stringindexer", "Compliance_stringindexer", "Theoretical_stringindexer", "Utilitarian_stringindexer", "Aesthetic_stringindexer", "Social_stringindexer", "Individualistic_stringindexer", "Traditional_stringindexer", "Flexibility_stringindexer", "FuturisticThinking_stringindexer", "GoalOrientation_stringindexer", "InterpersonalSkills_stringindexer", "Leadership_stringindexer", "Negotiation_stringindexer", "PersonalResponsibility_stringindexer", "Persuasion_stringindexer", "PlanningOrganizing_stringindexer", "Presenting_stringindexer", "Teamwork_stringindexer", "WrittenCommunication_stringindexer", "UnderstandingOthers_stringindexer", "PracticalThinking_stringindexer", "SystemsJudgment_stringindexer", "SenseOfSelf_stringindexer", "RoleAwareness_stringindexer", "SelfDirection_stringindexer", "UnderstandingOthersBias_stringindexer", "ReportDate_stringindexer", "TopDISCscore_stringindexer", "LowDISCscore_stringindexer", "TopMotivator_stringindexer", "LowMotivator_stringindexer", "TopSkill_stringindexer", "LowSkill_stringindexer"], "internisactive")

	#mlPostExecutionHook(dataAutoML)

except Exception as ex: 
	logging.error(ex)
#spark.stop()


***PREDICT ON TRAINED MODEL***

In [None]:
import pandas as pd
import numpy as np
import sklearn.metrics

try:
    model=dataAutoML ['model']
    X_test=dataAutoML['X_test']
    y_test=dataAutoML['y_test']
    label=dataAutoML['label']
    columnNames=dataAutoML['columnNames']
    if label in columnNames:
        columnNames.remove(label)
    predicted=label+"_predicted"
    y_predicted=model.predict(X_test)
    df =pd.DataFrame(X_test , columns=columnNames)
    df[label]=y_test
    df[predicted]=y_predicted
    columnNames.insert(0,predicted)
    columnNames.insert(0,label)
    df = df[columnNames]
    R2 = np.round(sklearn.metrics.r2_score(y_test, y_predicted), 1)
    Mean_Squared_Error = np.round(sklearn.metrics.mean_squared_error(y_test, y_predicted), 1)
    Mean_Absolute_Error = np.round(sklearn.metrics.mean_absolute_error(y_test, y_predicted), 1)
    display(" R2 score of Prediction on test data    : %s"%R2)
    display(" Mean Squared Error of Prediction on test data    : %s"%Mean_Squared_Error)
    display(" Mean Absolute Error of Prediction on test data   : %s"%Mean_Absolute_Error)
    display(df.head())
except Exception as ex:
    logging.error(ex)

spark.stop()

