***GENERATED CODE FOR lvtourismdata PIPELINE.***

***DON'T EDIT THIS CODE.***

***CONNECTOR FUNCTIONS TO READ DATA.***

In [None]:
import os
import datetime
import logging
import warnings
warnings.filterwarnings('ignore')
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)


class HDFSConnector:

    def fetch(spark, config):
        ################### INPUT HADOOP HOST PORT TO CONNECT WITH ###############################
        hdfs_server = str(os.environ['HDFS_SERVER'])
        hdfs_port = int(os.environ['HDFS_PORT'])
        df = spark.read.options(header='true', inferschema='true').csv(
            f"hdfs://{hdfs_server}:{hdfs_port}{eval(config)['url']}", header='true')
        display(df.limit(2).toPandas())
        return df

    def put(df, spark, config):
        return df.write.format('csv').options(header='true' if eval(config)["is_header"] == "Use Header Line" else 'false',
                                              delimiter=eval(config)["delimiter"]).save(("%s %s") % (datetime.datetime.now().strftime("%Y-%m-%d %H.%M.%S")+"_", eval(config)['url']))


***TRANSFORMATIONS FUNCTIONS THAT WILL BE APPLIED ON DATA***

In [None]:
import json
from pyspark.sql.types import IntegerType
from pyspark.ml.feature import StringIndexer
from os import listdir
import pyspark.sql.functions as F
from pyspark.sql.functions import when, col, regexp_replace
from pyspark.sql.types import StringType, DoubleType, IntegerType, TimestampType
from pyspark.sql.functions import mean, stddev, min, max, col


class CleanseData:
    # def __init__(self,df):
    #     #print()

    def cleanValueForFE(self, value):
        if value == None:
            return ""
        elif str(value) == 'nan':
            return "nan"
        else:
            return value

    def replaceByMean(self, feature, df, mean_=-1):
        df1 = df
        df1 = df1.dropna()
        meanValue = self.cleanValueForFE(df1.select(
            mean(col(feature.name)).alias('mean')).collect()[0]["mean"])
        df = df.fillna(meanValue, subset=[feature.name])
        df.withColumn(feature.name, when(col(feature.name) == " ",
                      meanValue).otherwise(col(feature.name).cast("Integer")))
        return df

    def replaceByMax(self, feature, df, max_=-1):
        df1 = df
        df1 = df1.dropna()
        maxValue = self.cleanValueForFE(df1.select(
            max(col(feature.name)).alias('max')).collect()[0]["max"])
        df = df.fillna(maxValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", maxValue).otherwise(col(feature.name)))
        return df

    def replaceByMin(self, feature, df, min_=-1):
        df1 = df
        df1 = df1.dropna()
        minValue = self.cleanValueForFE(df1.select(
            min(col(feature.name)).alias('min')).collect()[0]["min"])
        df = df.fillna(minValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", minValue).otherwise(col(feature.name)))
        return df

    def replaceByStandardDeviation(self, feature, df, stddev_=-1):
        df1 = df
        df1 = df1.dropna()
        stddevValue = self.cleanValueForFE(df1.select(
            stddev(col(feature.name)).alias('stddev')).collect()[0]["stddev"])
        df = df.fillna(stddevValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", stddevValue).otherwise(col(feature.name)))
        return df

    def replaceDateRandomly(self, feature, df):
        df1 = df
        df1 = df1.dropna()
        fillValue = self.cleanValueForFE(
            df.where(col(feature.name).isNotNull()).head(1)[0][feature.name])
        df = df.fillna(str(fillValue), subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", fillValue).otherwise(col(feature.name)))
        # print("CleanseData:replaceDateRandomly Schema : ", df.#printSchema())
        return df

    def replaceNullValues(self, fList, df):
        featuresList = df.schema.fields
        for featureObj in fList:
            for feat in featuresList:
                if featureObj["feature"] in feat.name:
                    featureName = feat
                    if "mean" in featureObj["replaceby"]:
                        df = self.replaceByMean(featureName, df)
                    elif "max" in featureObj["replaceby"]:
                        df = self.replaceByMax(featureName, df)
                    elif "min" in featureObj["replaceby"]:
                        df = self.replaceByMin(featureName, df)
                    elif "stddev" in featureObj["replaceby"]:
                        df = self.replaceByStandardDeviation(featureName, df)
                    elif "random" in featureObj["replaceby"]:
                        df = self.replaceDateRandomly(featureName, df)
        return df


def StringIndexerTransform(df, params, transformationData={}):
    dfReturn = df
    feature = params["feature"]

    dfReturn = dfReturn.fillna({feature: ''})
    outcol = feature + "_stringindexer"
    indexer = StringIndexer(
        inputCol=feature, outputCol=outcol, handleInvalid="skip")
    indexed = indexer.fit(dfReturn).transform(dfReturn)
    dfReturn = indexed
    distinct_values_list = dfReturn.select(
        outcol).distinct().rdd.map(lambda r: r[0]).collect()
    len_distinct_values_list = len(distinct_values_list)
    if len_distinct_values_list <= 4:
        changed_type_df = dfReturn.withColumn(
            outcol, dfReturn[outcol].cast(IntegerType()))
        return changed_type_df
    return dfReturn


class TransformationMain:
    # TODO: change df argument in run with following
    def run(transformationDF, config):
        configObj = json.loads(config)
        featureData = configObj["FE"]
        transformationDF = CleanseData().replaceNullValues(featureData, transformationDF)
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Date', 'transformation_label': 'String Indexer'}], 'feature': 'Date', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
                                                  'count': '84', 'mean': '', 'stddev': '', 'min': 'Apr-18', 'max': 'Sep-24', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Date'}, {'feature_label': 'Date', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Date')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Visitor_Volume', 'transformation_label': 'String Indexer'}], 'feature': 'Visitor_Volume', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '1,065,100', 'max': '3,749,800', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Visitor_Volume'}, {'feature_label': 'Visitor_Volume', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Visitor_Volume')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Visitor_Volume_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Visitor_Volume_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '-1,771,600', 'max': '90,300', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Visitor_Volume_YOY'}, {'feature_label': 'Visitor_Volume_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Visitor_Volume_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Convention_Attendance', 'transformation_label': 'String Indexer'}], 'feature': 'Convention_Attendance', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '0.0', 'stddev': '0.0', 'min': '0', 'max': 'N/A', 'missing': '0', 'distinct': '77'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Convention_Attendance'}, {'feature_label': 'Convention_Attendance', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Convention_Attendance')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Convention_Attendance_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Convention_Attendance_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '0.0', 'stddev': '0.0', 'min': '-10,100', 'max': 'N/A', 'missing': '0', 'distinct': '68'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Convention_Attendance_YOY'}, {'feature_label': 'Convention_Attendance_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Convention_Attendance_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Available_Room_Inventory', 'transformation_label': 'String Indexer'}], 'feature': 'Available_Room_Inventory', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '123,684', 'max': '95,396', 'missing': '0', 'distinct': '76'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Available_Room_Inventory'}, {'feature_label': 'Available_Room_Inventory', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Available_Room_Inventory')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Available_Room_Inventory_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Available_Room_Inventory_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '408.12', 'stddev': '324.45', 'min': '-1,194', 'max': '955', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Available_Room_Inventory_...'}, {'feature_label': 'Available_Room_Inventory_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Available_Room_Inventory_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Total_Occupancy', 'transformation_label': 'String Indexer'}], 'feature': 'Total_Occupancy', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '1.70%', 'max': '92.70%', 'missing': '0', 'distinct': '68'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Total_Occupancy'}, {'feature_label': 'Total_Occupancy', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Total_Occupancy')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Total_Occupancy_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Total_Occupancy_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '-0.20%', 'max': '7.70%', 'missing': '0', 'distinct': '67'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Total_Occupancy_YOY'}, {'feature_label': 'Total_Occupancy_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Total_Occupancy_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': '    Weekend_Occupancy', 'transformation_label': 'String Indexer'}], 'feature': '    Weekend_Occupancy', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '1.60%', 'max': '98.00%', 'missing': '0', 'distinct': '69'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': '    Weekend_Occupancy'}, {'feature_label': '    Weekend_Occupancy', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('    Weekend_Occupancy')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Weekend_Occupancy_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Weekend_Occupancy_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '-0.20%', 'max': '9.70%', 'missing': '0', 'distinct': '60'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Weekend_Occupancy_YOY'}, {'feature_label': 'Weekend_Occupancy_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Weekend_Occupancy_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': '    Midweek_Occupancy', 'transformation_label': 'String Indexer'}], 'feature': '    Midweek_Occupancy', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '1.70%', 'max': '90.80%', 'missing': '0', 'distinct': '70'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': '    Midweek_Occupancy'}, {'feature_label': '    Midweek_Occupancy', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('    Midweek_Occupancy')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Midweek_Occupancy_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Midweek_Occupancy_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '-0.10%', 'max': '9.20%', 'missing': '0', 'distinct': '71'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Midweek_Occupancy_YOY'}, {'feature_label': 'Midweek_Occupancy_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Midweek_Occupancy_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': '    Strip_Occupancy', 'transformation_label': 'String Indexer'}], 'feature': '    Strip_Occupancy', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '1.00%', 'max': '94.00%', 'missing': '0', 'distinct': '72'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': '    Strip_Occupancy'}, {'feature_label': '    Strip_Occupancy', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('    Strip_Occupancy')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Strip_Occupancy_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Strip_Occupancy_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '-0.30%', 'max': '8.80%', 'missing': '0', 'distinct': '70'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Strip_Occupancy_YOY'}, {'feature_label': 'Strip_Occupancy_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Strip_Occupancy_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': '    Downtown_Occupancy', 'transformation_label': 'String Indexer'}], 'feature': '    Downtown_Occupancy', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '0.00%', 'max': '88.10%', 'missing': '0', 'distinct': '69'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': '    Downtown_Occupancy'}, {'feature_label': '    Downtown_Occupancy', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('    Downtown_Occupancy')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Average_Daily_Room_Rate_ADR', 'transformation_label': 'String Indexer'}], 'feature': 'Average_Daily_Room_Rate_ADR', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$100.09 ', 'max': '$99.24 ', 'missing': '0', 'distinct': '83'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Average_Daily_Room_Rate_A...'}, {'feature_label': 'Average_Daily_Room_Rate_ADR', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Average_Daily_Room_Rate_ADR')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Average_Daily_Room_Rate_ADR_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Average_Daily_Room_Rate_ADR_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$0.88 ', 'max': '-$79.82 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Average_Daily_Room_Rate_A...'}, {'feature_label': 'Average_Daily_Room_Rate_ADR_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Average_Daily_Room_Rate_ADR_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': '    Strip_ADR', 'transformation_label': 'String Indexer'}], 'feature': '    Strip_ADR', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$100.32 ', 'max': '$96.51 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': '    Strip_ADR'}, {'feature_label': '    Strip_ADR', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('    Strip_ADR')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Strip_ADR_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Strip_ADR_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$1.09 ', 'max': '-$96.37 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Strip_ADR_YOY'}, {'feature_label': 'Strip_ADR_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Strip_ADR_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': '    Downtown_ADR', 'transformation_label': 'String Indexer'}], 'feature': '    Downtown_ADR', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$100.23 ', 'max': '$99.90 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': '    Downtown_ADR'}, {'feature_label': '    Downtown_ADR', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('    Downtown_ADR')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': ' Downtown_ADR_YOY', 'transformation_label': 'String Indexer'}], 'feature': ' Downtown_ADR_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$0.17 ', 'max': '-$9.79 ', 'missing': '0', 'distinct': '82'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': ' Downtown_ADR_YOY'}, {'feature_label': ' Downtown_ADR_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(' Downtown_ADR_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Revenue_Per_Available_Room_RevPAR', 'transformation_label': 'String Indexer'}], 'feature': 'Revenue_Per_Available_Room_RevPAR', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$1.03 ', 'max': '$97.08 ', 'missing': '0', 'distinct': '82'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Revenue_Per_Available_Roo...'}, {'feature_label': 'Revenue_Per_Available_Room_RevPAR', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Revenue_Per_Available_Room_RevPAR')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Revenue_Per_Available_Room_RevPAR_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Revenue_Per_Available_Room_RevPAR_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$0.28 ', 'max': '-$9.51 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Revenue_Per_Available_Roo...'}, {'feature_label': 'Revenue_Per_Available_Room_RevPAR_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Revenue_Per_Available_Room_RevPAR_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': '    Strip_RevPAR', 'transformation_label': 'String Indexer'}], 'feature': '    Strip_RevPAR', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$0.57 ', 'max': '$95.27 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': '    Strip_RevPAR'}, {'feature_label': '    Strip_RevPAR', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('    Strip_RevPAR')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': ' Strip_RevPARYOY', 'transformation_label': 'String Indexer'}], 'feature': ' Strip_RevPARYOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$0.12 ', 'max': '-$92.17 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': ' Strip_RevPARYOY'}, {'feature_label': ' Strip_RevPARYOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(' Strip_RevPARYOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': '    Downtown_RevPAR', 'transformation_label': 'String Indexer'}], 'feature': '    Downtown_RevPAR', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$0.78 ', 'max': '$98.94 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': '    Downtown_RevPAR'}, {'feature_label': '    Downtown_RevPAR', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('    Downtown_RevPAR')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Downtown_RevPAR_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Downtown_RevPAR_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$0.29 ', 'max': '-$9.18 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Downtown_RevPAR_YOY'}, {'feature_label': 'Downtown_RevPAR_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Downtown_RevPAR_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Total_Room_Nights_Occupied', 'transformation_label': 'String Indexer'}], 'feature': 'Total_Room_Nights_Occupied', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '1,171,200', 'max': '75,300', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Total_Room_Nights_Occupie...'}, {'feature_label': 'Total_Room_Nights_Occupied', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Total_Room_Nights_Occupied')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Total_Room_Nights_Occupied_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Total_Room_Nights_Occupied_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '-137,500', 'max': '953,300', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Total_Room_Nights_Occupie...'}, {'feature_label': 'Total_Room_Nights_Occupied_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Total_Room_Nights_Occupied_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Total_En_Deplaned_Passengers', 'transformation_label': 'String Indexer'}], 'feature': 'Total_En_Deplaned_Passengers', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '1,041,860', 'max': '5,479,904', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Total_En_Deplaned_Passeng...'}, {'feature_label': 'Total_En_Deplaned_Passengers', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Total_En_Deplaned_Passengers')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Total_En_Deplaned_Passengers_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Total_En_Deplaned_Passengers_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '-103,283', 'max': '99,542', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Total_En_Deplaned_Passeng...'}, {'feature_label': 'Total_En_Deplaned_Passengers_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Total_En_Deplaned_Passengers_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Avg_Daily _Auto T_raffic_All _Major _Highways', 'transformation_label': 'String Indexer'}], 'feature': 'Avg_Daily _Auto T_raffic_All _Major _Highways', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '103,828', 'max': '85,194', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Avg_Daily _Auto T_raffic_...'}, {'feature_label': 'Avg_Daily _Auto T_raffic_All _Major _Highways', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Avg_Daily _Auto T_raffic_All _Major _Highways')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': '    Avg_Daily_Auto_Traffic_I-15 at NV_CA_Border', 'transformation_label': 'String Indexer'}], 'feature': '    Avg_Daily_Auto_Traffic_I-15 at NV_CA_Border', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '17,083', 'max': '56,713', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': '    Avg_Daily_Auto_Traffi...'}, {'feature_label': '    Avg_Daily_Auto_Traffic_I-15 at NV_CA_Border', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            '    Avg_Daily_Auto_Traffic_I-15 at NV_CA_Border')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Gaming_Revenue_Clark_County', 'transformation_label': 'String Indexer'}], 'feature': 'Gaming_Revenue_Clark_County', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$1,002,371,942.00 ', 'max': '$998,021,238.00 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Gaming_Revenue_Clark_Coun...'}, {'feature_label': 'Gaming_Revenue_Clark_County', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Gaming_Revenue_Clark_County')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Gaming_Revenue_Clark_County_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Gaming_Revenue_Clark_County_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$1,046,485,286.15 ', 'max': '-$97,554,790.65 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Gaming_Revenue_Clark_Coun...'}, {'feature_label': 'Gaming_Revenue_Clark_County_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Gaming_Revenue_Clark_County_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': '    Gaming_Revenue_Las_Vegas_Strip', 'transformation_label': 'String Indexer'}], 'feature': '    Gaming_Revenue_Las_Vegas_Strip', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$238,183,142.00 ', 'max': '$905,325,716.00 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': '    Gaming_Revenue_Las_Ve...'}, {'feature_label': '    Gaming_Revenue_Las_Vegas_Strip', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            '    Gaming_Revenue_Las_Vegas_Strip')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Gaming_Revenue_Las_Vegas_Strip_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Gaming_Revenue_Las_Vegas_Strip_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$110,270,748.55 ', 'max': '-$85,691,396.64 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Gaming_Revenue_Las_Vegas_...'}, {'feature_label': 'Gaming_Revenue_Las_Vegas_Strip_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Gaming_Revenue_Las_Vegas_Strip_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': '    Gaming_Revenue_Downtown', 'transformation_label': 'String Indexer'}], 'feature': '    Gaming_Revenue_Downtown', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$1,991,486.00 ', 'max': '$97,546,944.00 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': '    Gaming_Revenue_Downto...'}, {'feature_label': '    Gaming_Revenue_Downtown', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            '    Gaming_Revenue_Downtown')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Gaming_Revenue_Downtown_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Gaming_Revenue_Downtown_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$1,178,884.01 ', 'max': '-$9,759,701.68 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Gaming_Revenue_Downtown_Y...'}, {'feature_label': 'Gaming_Revenue_Downtown_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Gaming_Revenue_Downtown_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': '    Gaming_Revenue_Boulder_Strip', 'transformation_label': 'String Indexer'}], 'feature': '    Gaming_Revenue_Boulder_Strip', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$14,972.00 ', 'max': '-$48,212.00 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': '    Gaming_Revenue_Boulde...'}, {'feature_label': '    Gaming_Revenue_Boulder_Strip', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            '    Gaming_Revenue_Boulder_Strip')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Gaming_Revenue_Boulder_Strip_YOY', 'transformation_label': 'String Indexer'}], 'feature': 'Gaming_Revenue_Boulder_Strip_YOY', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$1,209,939.83 ', 'max': '-$8,885,559.91 ', 'missing': '0', 'distinct': '84'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Gaming_Revenue_Boulder_St...'}, {'feature_label': 'Gaming_Revenue_Boulder_Strip_YOY', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Gaming_Revenue_Boulder_Strip_YOY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': "Room_Tax _LVCVA's_Portion1", 'transformation_label': 'String Indexer'}], 'feature': "Room_Tax _LVCVA's_Portion1", 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '84', 'mean': '', 'stddev': '', 'min': '$1,012,587 ', 'max': 'NULL', 'missing': '0', 'distinct': '73'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': "Room_Tax _LVCVA's_Portion..."}, {'feature_label': "Room_Tax _LVCVA's_Portion1", 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Room_Tax _LVCVA's_Portion1')
        display(transformationDF.limit(2).toPandas())
        return transformationDF


***AUTOML FUNCTIONS***

In [None]:
from sklearn.model_selection import train_test_split
from tpot import TPOTRegressor
import pyspark


def functionRegression(sparkDF, listOfFeatures, label):
    sparkDF.persist(pyspark.StorageLevel.MEMORY_AND_DISK)
    df = sparkDF.toPandas()
    X = (df.drop(label, axis=1))[listOfFeatures].values
    y = df[label].values
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=1, test_size=0.1)
    tpotModel = TPOTRegressor(verbosity=3, generations=10, max_time_mins=5,
                              n_jobs=-1, random_state=25, population_size=15, use_dask=True)
    tpotModel.fit(X_train, y_train)
    display(" Error rate of Model : %s" % tpotModel.score(X_test, y_test))
    data = {'model': tpotModel,
            'X_test': X_test,
            'y_test': y_test,
            'label': label,
            'columnNames': listOfFeatures}
    return data


***READING DATAFRAME***

In [None]:
############## CREATE SPARK SESSION ############################ ENTER YOUR SPARK MASTER IP AND PORT TO CONNECT TO SERVER ################
from pyspark.sql import SparkSession
spark = SparkSession.builder.master('local[1]').getOrCreate()
#%run lvtourismdataHooks.ipynb
try:
	#sourcePreExecutionHook()

	lvdata = HDFSConnector.fetch(spark, "{'url': '/FileStore/platform/uploadedSourceFiles/LasVegas Tourism Data.csv', 'filename': 'LasVegas Tourism Data.csv', 'delimiter': ',', 'file_type': 'Delimeted', 'dbfs_token': '', 'dbfs_domain': '', 'is_header': 'Use Header Line', 'server_url': '/nexusMax/NexusMaxPlatform/uploads/platform/', 'results_url': 'http://ml.colaberry.com:44040/api/read/hdfs'}")

except Exception as ex: 
	logging.error(ex)
#spark.stop()


***TRANSFORMING DATAFRAME***

In [None]:
#%run lvtourismdataHooks.ipynb
try:
	#transformationPreExecutionHook()

	lvtourismdataautofe = TransformationMain.run(lvdata,json.dumps( {"FE": [{"transformationsData": [{"feature_label": "Date", "transformation_label": "String Indexer"}], "feature": "Date", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "Apr-18", "max": "Sep-24", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Date"}, {"transformationsData": [{"feature_label": "Visitor_Volume", "transformation_label": "String Indexer"}], "feature": "Visitor_Volume", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "1,065,100", "max": "3,749,800", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Visitor_Volume"}, {"transformationsData": [{"feature_label": "Visitor_Volume_YOY", "transformation_label": "String Indexer"}], "feature": "Visitor_Volume_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "-1,771,600", "max": "90,300", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Visitor_Volume_YOY"}, {"transformationsData": [{"feature_label": "Convention_Attendance", "transformation_label": "String Indexer"}], "feature": "Convention_Attendance", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "0.0", "stddev": "0.0", "min": "0", "max": "N/A", "missing": "0", "distinct": "77"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Convention_Attendance"}, {"transformationsData": [{"feature_label": "Convention_Attendance_YOY", "transformation_label": "String Indexer"}], "feature": "Convention_Attendance_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "0.0", "stddev": "0.0", "min": "-10,100", "max": "N/A", "missing": "0", "distinct": "68"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Convention_Attendance_YOY"}, {"transformationsData": [{"feature_label": "Available_Room_Inventory", "transformation_label": "String Indexer"}], "feature": "Available_Room_Inventory", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "123,684", "max": "95,396", "missing": "0", "distinct": "76"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Available_Room_Inventory"}, {"transformationsData": [{"feature_label": "Available_Room_Inventory_YOY", "transformation_label": "String Indexer"}], "feature": "Available_Room_Inventory_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "408.12", "stddev": "324.45", "min": "-1,194", "max": "955", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Available_Room_Inventory_..."}, {"transformationsData": [{"feature_label": "Total_Occupancy", "transformation_label": "String Indexer"}], "feature": "Total_Occupancy", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "1.70%", "max": "92.70%", "missing": "0", "distinct": "68"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Total_Occupancy"}, {"transformationsData": [{"feature_label": "Total_Occupancy_YOY", "transformation_label": "String Indexer"}], "feature": "Total_Occupancy_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "-0.20%", "max": "7.70%", "missing": "0", "distinct": "67"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Total_Occupancy_YOY"}, {"transformationsData": [{"feature_label": "    Weekend_Occupancy", "transformation_label": "String Indexer"}], "feature": "    Weekend_Occupancy", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "1.60%", "max": "98.00%", "missing": "0", "distinct": "69"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "    Weekend_Occupancy"}, {"transformationsData": [{"feature_label": "Weekend_Occupancy_YOY", "transformation_label": "String Indexer"}], "feature": "Weekend_Occupancy_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "-0.20%", "max": "9.70%", "missing": "0", "distinct": "60"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Weekend_Occupancy_YOY"}, {"transformationsData": [{"feature_label": "    Midweek_Occupancy", "transformation_label": "String Indexer"}], "feature": "    Midweek_Occupancy", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "1.70%", "max": "90.80%", "missing": "0", "distinct": "70"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "    Midweek_Occupancy"}, {"transformationsData": [{"feature_label": "Midweek_Occupancy_YOY", "transformation_label": "String Indexer"}], "feature": "Midweek_Occupancy_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "-0.10%", "max": "9.20%", "missing": "0", "distinct": "71"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Midweek_Occupancy_YOY"}, {"transformationsData": [{"feature_label": "    Strip_Occupancy", "transformation_label": "String Indexer"}], "feature": "    Strip_Occupancy", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "1.00%", "max": "94.00%", "missing": "0", "distinct": "72"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "    Strip_Occupancy"}, {"transformationsData": [{"feature_label": "Strip_Occupancy_YOY", "transformation_label": "String Indexer"}], "feature": "Strip_Occupancy_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "-0.30%", "max": "8.80%", "missing": "0", "distinct": "70"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Strip_Occupancy_YOY"}, {"transformationsData": [{"feature_label": "    Downtown_Occupancy", "transformation_label": "String Indexer"}], "feature": "    Downtown_Occupancy", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "0.00%", "max": "88.10%", "missing": "0", "distinct": "69"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "    Downtown_Occupancy"}, {"transformationsData": [{"feature_label": "Average_Daily_Room_Rate_ADR", "transformation_label": "String Indexer"}], "feature": "Average_Daily_Room_Rate_ADR", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$100.09 ", "max": "$99.24 ", "missing": "0", "distinct": "83"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Average_Daily_Room_Rate_A..."}, {"transformationsData": [{"feature_label": "Average_Daily_Room_Rate_ADR_YOY", "transformation_label": "String Indexer"}], "feature": "Average_Daily_Room_Rate_ADR_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$0.88 ", "max": "-$79.82 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Average_Daily_Room_Rate_A..."}, {"transformationsData": [{"feature_label": "    Strip_ADR", "transformation_label": "String Indexer"}], "feature": "    Strip_ADR", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$100.32 ", "max": "$96.51 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "    Strip_ADR"}, {"transformationsData": [{"feature_label": "Strip_ADR_YOY", "transformation_label": "String Indexer"}], "feature": "Strip_ADR_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$1.09 ", "max": "-$96.37 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Strip_ADR_YOY"}, {"transformationsData": [{"feature_label": "    Downtown_ADR", "transformation_label": "String Indexer"}], "feature": "    Downtown_ADR", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$100.23 ", "max": "$99.90 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "    Downtown_ADR"}, {"transformationsData": [{"feature_label": " Downtown_ADR_YOY", "transformation_label": "String Indexer"}], "feature": " Downtown_ADR_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$0.17 ", "max": "-$9.79 ", "missing": "0", "distinct": "82"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": " Downtown_ADR_YOY"}, {"transformationsData": [{"feature_label": "Revenue_Per_Available_Room_RevPAR", "transformation_label": "String Indexer"}], "feature": "Revenue_Per_Available_Room_RevPAR", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$1.03 ", "max": "$97.08 ", "missing": "0", "distinct": "82"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Revenue_Per_Available_Roo..."}, {"transformationsData": [{"feature_label": "Revenue_Per_Available_Room_RevPAR_YOY", "transformation_label": "String Indexer"}], "feature": "Revenue_Per_Available_Room_RevPAR_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$0.28 ", "max": "-$9.51 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Revenue_Per_Available_Roo..."}, {"transformationsData": [{"feature_label": "    Strip_RevPAR", "transformation_label": "String Indexer"}], "feature": "    Strip_RevPAR", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$0.57 ", "max": "$95.27 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "    Strip_RevPAR"}, {"transformationsData": [{"feature_label": " Strip_RevPARYOY", "transformation_label": "String Indexer"}], "feature": " Strip_RevPARYOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$0.12 ", "max": "-$92.17 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": " Strip_RevPARYOY"}, {"transformationsData": [{"feature_label": "    Downtown_RevPAR", "transformation_label": "String Indexer"}], "feature": "    Downtown_RevPAR", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$0.78 ", "max": "$98.94 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "    Downtown_RevPAR"}, {"transformationsData": [{"feature_label": "Downtown_RevPAR_YOY", "transformation_label": "String Indexer"}], "feature": "Downtown_RevPAR_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$0.29 ", "max": "-$9.18 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Downtown_RevPAR_YOY"}, {"transformationsData": [{"feature_label": "Total_Room_Nights_Occupied", "transformation_label": "String Indexer"}], "feature": "Total_Room_Nights_Occupied", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "1,171,200", "max": "75,300", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Total_Room_Nights_Occupie..."}, {"transformationsData": [{"feature_label": "Total_Room_Nights_Occupied_YOY", "transformation_label": "String Indexer"}], "feature": "Total_Room_Nights_Occupied_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "-137,500", "max": "953,300", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Total_Room_Nights_Occupie..."}, {"transformationsData": [{"feature_label": "Total_En_Deplaned_Passengers", "transformation_label": "String Indexer"}], "feature": "Total_En_Deplaned_Passengers", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "1,041,860", "max": "5,479,904", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Total_En_Deplaned_Passeng..."}, {"transformationsData": [{"feature_label": "Total_En_Deplaned_Passengers_YOY", "transformation_label": "String Indexer"}], "feature": "Total_En_Deplaned_Passengers_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "-103,283", "max": "99,542", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Total_En_Deplaned_Passeng..."}, {"transformationsData": [{"feature_label": "Avg_Daily _Auto T_raffic_All _Major _Highways", "transformation_label": "String Indexer"}], "feature": "Avg_Daily _Auto T_raffic_All _Major _Highways", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "103,828", "max": "85,194", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Avg_Daily _Auto T_raffic_..."}, {"transformationsData": [{"feature_label": "    Avg_Daily_Auto_Traffic_I-15 at NV_CA_Border", "transformation_label": "String Indexer"}], "feature": "    Avg_Daily_Auto_Traffic_I-15 at NV_CA_Border", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "17,083", "max": "56,713", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "    Avg_Daily_Auto_Traffi..."}, {"transformationsData": [{"feature_label": "Gaming_Revenue_Clark_County", "transformation_label": "String Indexer"}], "feature": "Gaming_Revenue_Clark_County", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$1,002,371,942.00 ", "max": "$998,021,238.00 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Gaming_Revenue_Clark_Coun..."}, {"transformationsData": [{"feature_label": "Gaming_Revenue_Clark_County_YOY", "transformation_label": "String Indexer"}], "feature": "Gaming_Revenue_Clark_County_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$1,046,485,286.15 ", "max": "-$97,554,790.65 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Gaming_Revenue_Clark_Coun..."}, {"transformationsData": [{"feature_label": "    Gaming_Revenue_Las_Vegas_Strip", "transformation_label": "String Indexer"}], "feature": "    Gaming_Revenue_Las_Vegas_Strip", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$238,183,142.00 ", "max": "$905,325,716.00 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "    Gaming_Revenue_Las_Ve..."}, {"transformationsData": [{"feature_label": "Gaming_Revenue_Las_Vegas_Strip_YOY", "transformation_label": "String Indexer"}], "feature": "Gaming_Revenue_Las_Vegas_Strip_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$110,270,748.55 ", "max": "-$85,691,396.64 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Gaming_Revenue_Las_Vegas_..."}, {"transformationsData": [{"feature_label": "    Gaming_Revenue_Downtown", "transformation_label": "String Indexer"}], "feature": "    Gaming_Revenue_Downtown", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$1,991,486.00 ", "max": "$97,546,944.00 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "    Gaming_Revenue_Downto..."}, {"transformationsData": [{"feature_label": "Gaming_Revenue_Downtown_YOY", "transformation_label": "String Indexer"}], "feature": "Gaming_Revenue_Downtown_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$1,178,884.01 ", "max": "-$9,759,701.68 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Gaming_Revenue_Downtown_Y..."}, {"transformationsData": [{"feature_label": "    Gaming_Revenue_Boulder_Strip", "transformation_label": "String Indexer"}], "feature": "    Gaming_Revenue_Boulder_Strip", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$14,972.00 ", "max": "-$48,212.00 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "    Gaming_Revenue_Boulde..."}, {"transformationsData": [{"feature_label": "Gaming_Revenue_Boulder_Strip_YOY", "transformation_label": "String Indexer"}], "feature": "Gaming_Revenue_Boulder_Strip_YOY", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$1,209,939.83 ", "max": "-$8,885,559.91 ", "missing": "0", "distinct": "84"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Gaming_Revenue_Boulder_St..."}, {"transformationsData": [{"feature_label": "Room_Tax _LVCVA's_Portion1", "transformation_label": "String Indexer"}], "feature": "Room_Tax _LVCVA's_Portion1", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "84", "mean": "", "stddev": "", "min": "$1,012,587 ", "max": "NULL", "missing": "0", "distinct": "73"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Room_Tax _LVCVA's_Portion..."}]}))

	#transformationPostExecutionHook(lvtourismdataautofe)

except Exception as ex: 
	logging.error(ex)


***TRAIN MODEL***

In [None]:
#%run lvtourismdataHooks.ipynb
try:
	#mlPreExecutionHook()

	dataAutoML=functionRegression(lvtourismdataautofe, ["Date_stringindexer", "Visitor_Volume_stringindexer", "Visitor_Volume_YOY_stringindexer", "Convention_Attendance_stringindexer", "Available_Room_Inventory_stringindexer", "Available_Room_Inventory_YOY_stringindexer", "Total_Occupancy_stringindexer", "Total_Occupancy_YOY_stringindexer", "    Weekend_Occupancy_stringindexer", "Weekend_Occupancy_YOY_stringindexer", "    Midweek_Occupancy_stringindexer", "Midweek_Occupancy_YOY_stringindexer", "    Strip_Occupancy_stringindexer", "Strip_Occupancy_YOY_stringindexer", "    Downtown_Occupancy_stringindexer", "Average_Daily_Room_Rate_ADR_stringindexer", "Average_Daily_Room_Rate_ADR_YOY_stringindexer", "    Strip_ADR_stringindexer", "Strip_ADR_YOY_stringindexer", "    Downtown_ADR_stringindexer", " Downtown_ADR_YOY_stringindexer", "Revenue_Per_Available_Room_RevPAR_stringindexer", "Revenue_Per_Available_Room_RevPAR_YOY_stringindexer", "    Strip_RevPAR_stringindexer", " Strip_RevPARYOY_stringindexer", "    Downtown_RevPAR_stringindexer", "Downtown_RevPAR_YOY_stringindexer", "Total_Room_Nights_Occupied_stringindexer", "Total_Room_Nights_Occupied_YOY_stringindexer", "Total_En_Deplaned_Passengers_stringindexer", "Total_En_Deplaned_Passengers_YOY_stringindexer", "Avg_Daily _Auto T_raffic_All _Major _Highways_stringindexer", "    Avg_Daily_Auto_Traffic_I-15 at NV_CA_Border_stringindexer", "Gaming_Revenue_Clark_County_stringindexer", "Gaming_Revenue_Clark_County_YOY_stringindexer", "    Gaming_Revenue_Las_Vegas_Strip_stringindexer", "Gaming_Revenue_Las_Vegas_Strip_YOY_stringindexer", "    Gaming_Revenue_Downtown_stringindexer", "Gaming_Revenue_Downtown_YOY_stringindexer", "    Gaming_Revenue_Boulder_Strip_stringindexer", "Gaming_Revenue_Boulder_Strip_YOY_stringindexer", "Room_Tax _LVCVA's_Portion1_stringindexer"], "Convention_Attendance_YOY_stringindexer")

	#mlPostExecutionHook(dataAutoML)

except Exception as ex: 
	logging.error(ex)
#spark.stop()


***PREDICT ON TRAINED MODEL***

In [None]:
import pandas as pd
import numpy as np
import sklearn.metrics

try:
    model=dataAutoML ['model']
    X_test=dataAutoML['X_test']
    y_test=dataAutoML['y_test']
    label=dataAutoML['label']
    columnNames=dataAutoML['columnNames']
    if label in columnNames:
        columnNames.remove(label)
    predicted=label+"_predicted"
    y_predicted=model.predict(X_test)
    df =pd.DataFrame(X_test , columns=columnNames)
    df[label]=y_test
    df[predicted]=y_predicted
    columnNames.insert(0,predicted)
    columnNames.insert(0,label)
    df = df[columnNames]
    R2 = np.round(sklearn.metrics.r2_score(y_test, y_predicted), 1)
    Mean_Squared_Error = np.round(sklearn.metrics.mean_squared_error(y_test, y_predicted), 1)
    Mean_Absolute_Error = np.round(sklearn.metrics.mean_absolute_error(y_test, y_predicted), 1)
    display(" R2 score of Prediction on test data    : %s"%R2)
    display(" Mean Squared Error of Prediction on test data    : %s"%Mean_Squared_Error)
    display(" Mean Absolute Error of Prediction on test data   : %s"%Mean_Absolute_Error)
    display(df.head())
except Exception as ex:
    logging.error(ex)

spark.stop()

