***GENERATED CODE FOR attritionclassify PIPELINE.***

***DON'T EDIT THIS CODE.***

***CONNECTOR FUNCTIONS TO READ DATA.***

In [None]:
import os
import datetime
import logging
import warnings
warnings.filterwarnings('ignore')
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)


class HDFSConnector:

    def fetch(spark, config):
        ################### INPUT HADOOP HOST PORT TO CONNECT WITH ###############################
        hdfs_server = str(os.environ['HDFS_SERVER'])
        hdfs_port = int(os.environ['HDFS_PORT'])
        df = spark.read.options(header='true', inferschema='true').csv(
            f"hdfs://{hdfs_server}:{hdfs_port}{eval(config)['url']}", header='true')
        display(df.limit(2).toPandas())
        return df

    def put(df, spark, config):
        return df.write.format('csv').options(header='true' if eval(config)["is_header"] == "Use Header Line" else 'false',
                                              delimiter=eval(config)["delimiter"]).save(("%s %s") % (datetime.datetime.now().strftime("%Y-%m-%d %H.%M.%S")+"_", eval(config)['url']))


***TRANSFORMATIONS FUNCTIONS THAT WILL BE APPLIED ON DATA***

In [None]:
import json
from pyspark.ml.feature import Binarizer
from pyspark.sql.functions import round
from pyspark.sql.types import IntegerType
from pyspark.ml.feature import StringIndexer
from pyspark.sql.functions import col, when
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import mean, stddev, min, max, col


class CleanseData:
    # def __init__(self,df):
    #     #print()

    def cleanValueForFE(self, value):
        if value == None:
            return ""
        elif str(value) == 'nan':
            return "nan"
        else:
            return value

    def replaceByMean(self, feature, df, mean_=-1):
        df1 = df
        df1 = df1.dropna()
        meanValue = self.cleanValueForFE(df1.select(
            mean(col(feature.name)).alias('mean')).collect()[0]["mean"])
        df = df.fillna(meanValue, subset=[feature.name])
        df.withColumn(feature.name, when(col(feature.name) == " ",
                      meanValue).otherwise(col(feature.name).cast("Integer")))
        return df

    def replaceByMax(self, feature, df, max_=-1):
        df1 = df
        df1 = df1.dropna()
        maxValue = self.cleanValueForFE(df1.select(
            max(col(feature.name)).alias('max')).collect()[0]["max"])
        df = df.fillna(maxValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", maxValue).otherwise(col(feature.name)))
        return df

    def replaceByMin(self, feature, df, min_=-1):
        df1 = df
        df1 = df1.dropna()
        minValue = self.cleanValueForFE(df1.select(
            min(col(feature.name)).alias('min')).collect()[0]["min"])
        df = df.fillna(minValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", minValue).otherwise(col(feature.name)))
        return df

    def replaceByStandardDeviation(self, feature, df, stddev_=-1):
        df1 = df
        df1 = df1.dropna()
        stddevValue = self.cleanValueForFE(df1.select(
            stddev(col(feature.name)).alias('stddev')).collect()[0]["stddev"])
        df = df.fillna(stddevValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", stddevValue).otherwise(col(feature.name)))
        return df

    def replaceDateRandomly(self, feature, df):
        df1 = df
        df1 = df1.dropna()
        fillValue = self.cleanValueForFE(
            df.where(col(feature.name).isNotNull()).head(1)[0][feature.name])
        df = df.fillna(str(fillValue), subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", fillValue).otherwise(col(feature.name)))
        # print("CleanseData:replaceDateRandomly Schema : ", df.#printSchema())
        return df

    def replaceNullValues(self, fList, df):
        featuresList = df.schema.fields
        for featureObj in fList:
            for feat in featuresList:
                if featureObj["feature"] in feat.name:
                    featureName = feat
                    if "mean" in featureObj["replaceby"]:
                        df = self.replaceByMean(featureName, df)
                    elif "max" in featureObj["replaceby"]:
                        df = self.replaceByMax(featureName, df)
                    elif "min" in featureObj["replaceby"]:
                        df = self.replaceByMin(featureName, df)
                    elif "stddev" in featureObj["replaceby"]:
                        df = self.replaceByStandardDeviation(featureName, df)
                    elif "random" in featureObj["replaceby"]:
                        df = self.replaceDateRandomly(featureName, df)
        return df


def StringIndexerTransform(df, params, transformationData={}):
    dfReturn = df
    feature = params["feature"]

    dfReturn = dfReturn.fillna({feature: ''})
    outcol = feature + "_stringindexer"
    indexer = StringIndexer(
        inputCol=feature, outputCol=outcol, handleInvalid="skip")
    indexed = indexer.fit(dfReturn).transform(dfReturn)
    dfReturn = indexed
    distinct_values_list = dfReturn.select(
        outcol).distinct().rdd.map(lambda r: r[0]).collect()
    len_distinct_values_list = len(distinct_values_list)
    if len_distinct_values_list <= 4:
        changed_type_df = dfReturn.withColumn(
            outcol, dfReturn[outcol].cast(IntegerType()))
        return changed_type_df
    return dfReturn


def BinarizerTransform(df, params, transformationData={}):
    dfReturn = df
    transform_params = params
    feature = transform_params['feature']
    outcol = feature + "_binarizer"
    dfReturn = dfReturn.withColumn("feature_cast", dfReturn[feature].cast("double")).drop(feature)\
        .withColumnRenamed("feature_cast", feature)

    dfReturn = dfReturn.fillna({feature: 0.0})
    binarizer = Binarizer(threshold=float(
        transformationData['threshold']), inputCol=feature, outputCol=outcol)
    binarizedDataFrame = binarizer.transform(dfReturn)

    # binarizedDataFrame=binarizedDataFrame.drop(feature).withColumnRenamed(outcol,feature)

    dfReturn = binarizedDataFrame
    dfReturn = dfReturn.withColumn(feature, round(dfReturn[feature], 2))

    return dfReturn


class TransformationMain:
    # TODO: change df argument in run with following
    def run(transformationDF, config):
        configObj = json.loads(config)
        featureData = configObj["FE"]
        transformationDF = CleanseData().replaceNullValues(featureData, transformationDF)
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'LAST NAME', 'transformation_label': 'String Indexer'}], 'feature': 'LAST NAME', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
                                                  'count': '500', 'mean': '', 'stddev': '', 'min': 'ADDISON', 'max': 'ZERINGUE', 'missing': '0', 'distinct': '363'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'LAST NAME'}, {'feature_label': 'LAST NAME', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('LAST NAME')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'FIRST NAME', 'transformation_label': 'String Indexer'}], 'feature': 'FIRST NAME', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': 'AALIYAH', 'max': 'ZAKYRIAHA', 'missing': '0', 'distinct': '392'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'FIRST NAME'}, {'feature_label': 'FIRST NAME', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('FIRST NAME')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'DEAPRTMENT NAME', 'transformation_label': 'String Indexer'}], 'feature': 'DEAPRTMENT NAME', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': 'ANIMAL CONTROL & RESCUE CENTER', 'max': 'TRANSPORTATION AND DRAINAGE', 'missing': '0', 'distinct': '31'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'DEAPRTMENT NAME'}, {'feature_label': 'DEAPRTMENT NAME', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('DEAPRTMENT NAME')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PAY LOCATION DESCRIPTION', 'transformation_label': 'String Indexer'}], 'feature': 'PAY LOCATION DESCRIPTION', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': 'AIRPORT-ADMINISTRATION', 'max': 'TRAN & DRAIN-TRAFFIC ENG', 'missing': '0', 'distinct': '83'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'PAY LOCATION DESCRIPTION'}, {'feature_label': 'PAY LOCATION DESCRIPTION', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'PAY LOCATION DESCRIPTION')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'JOB TITLE', 'transformation_label': 'String Indexer'}], 'feature': 'JOB TITLE', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': '311 CALL CENTER REPRESENTATIVE', 'max': 'WASTEWATER SERVICE INSPECTOR', 'missing': '0', 'distinct': '171'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'JOB TITLE'}, {'feature_label': 'JOB TITLE', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('JOB TITLE')
        transformationDF = BinarizerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ANNUAL SALARY', 'threshold': 31270.17, 'transformation_label': 'Binarizer'}], 'feature': 'ANNUAL SALARY', 'type': 'real', 'selected': 'True', 'replaceby': 'mean', 'stats': {
            'count': '486', 'mean': '31270.47', 'stddev': '27515.03', 'min': '0.0', 'max': '129554.88', 'missing': '14'}, 'transformation': [{'transformation': 'Binarizer', 'selectedAsDefault': 1}], 'updatedLabel': 'ANNUAL SALARY'}, {'feature_label': 'ANNUAL SALARY', 'threshold': 31270.17, 'transformation_label': 'Binarizer'})
        transformationDF = transformationDF.drop('ANNUAL SALARY')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'PERSONNEL STATUS DESCRIPTION', 'transformation_label': 'String Indexer'}], 'feature': 'PERSONNEL STATUS DESCRIPTION', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': 'EMERGENCY APPOINTMENT', 'max': 'TEMPORARY', 'missing': '0', 'distinct': '9'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'PERSONNEL STATUS DESCRIPT...'}, {'feature_label': 'PERSONNEL STATUS DESCRIPTION', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'PERSONNEL STATUS DESCRIPTION')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'EMPLOYMENT STATUS', 'transformation_label': 'String Indexer'}], 'feature': 'EMPLOYMENT STATUS', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': 'Active', 'max': 'Inactive', 'missing': '0', 'distinct': '2'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'EMPLOYMENT STATUS'}, {'feature_label': 'EMPLOYMENT STATUS', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('EMPLOYMENT STATUS')
        display(transformationDF.limit(2).toPandas())
        return transformationDF


***AUTOML FUNCTIONS***

In [None]:
from tpot import TPOTClassifier
from sklearn.model_selection import train_test_split
import pyspark


def functionClassification(sparkDF, listOfFeatures, label):
    sparkDF.persist(pyspark.StorageLevel.MEMORY_AND_DISK)
    df = (sparkDF.toPandas())
    X = (df.drop(label, axis=1))[listOfFeatures].values
    y = df[label].values
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=1, test_size=0.1)
    tpotModel = TPOTClassifier(verbosity=3, n_jobs=-1, generations=10, max_time_mins=5,
                               population_size=15, use_dask=True)
    tpotModel.fit(X_train, y_train)
    display(" Accuracy of Model : %s" % tpotModel.score(X_test, y_test))
    data = {'model': tpotModel,
            'X_test': X_test,
            'y_test': y_test,
            'label': label,
            'columnNames': listOfFeatures}
    return data


***READING DATAFRAME***

In [None]:
############## CREATE SPARK SESSION ############################ ENTER YOUR SPARK MASTER IP AND PORT TO CONNECT TO SERVER ################
from pyspark.sql import SparkSession
spark = SparkSession.builder.master('local[1]').getOrCreate()
#%run attritionclassifyHooks.ipynb
try:
	#sourcePreExecutionHook()

	hrcityparish = HDFSConnector.fetch(spark, "{'url': '/FileStore/platform/uploadedSourceFiles/HR_CityParish1.csv', 'filename': 'HR_CityParish1.csv', 'delimiter': ',', 'file_type': 'Delimeted', 'is_header': 'Use Header Line', 'domain': 'http://172.31.59.158', 'port': '40070', 'dirPath': '/FileStore/platform', 'server_url': '/nexusMax/NexusMaxPlatform/uploads/platform/'}")
	#sourcePostExecutionHook(hrcityparish)

except Exception as ex: 
	logging.error(ex)
#spark.stop()


***TRANSFORMING DATAFRAME***

In [None]:
#%run attritionclassifyHooks.ipynb
try:
	#transformationPreExecutionHook()

	attritionclassifyautofe = TransformationMain.run(hrcityparish,json.dumps( {"FE": [{"transformationsData": [{"transformation_label": "novalue"}], "feature": "UNIQUE ID", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "250.5", "stddev": "144.48", "min": "1", "max": "500", "missing": "0"}, "updatedLabel": "UNIQUE ID"}, {"transformationsData": [{"feature_label": "LAST NAME", "transformation_label": "String Indexer"}], "feature": "LAST NAME", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "ADDISON", "max": "ZERINGUE", "missing": "0", "distinct": "363"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "LAST NAME"}, {"transformationsData": [{"feature_label": "FIRST NAME", "transformation_label": "String Indexer"}], "feature": "FIRST NAME", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "AALIYAH", "max": "ZAKYRIAHA", "missing": "0", "distinct": "392"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "FIRST NAME"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "DEPARTMENT NUMBER", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "4676.4", "stddev": "2306.9", "min": "100", "max": "7800", "missing": "0"}, "updatedLabel": "DEPARTMENT NUMBER"}, {"transformationsData": [{"feature_label": "DEAPRTMENT NAME", "transformation_label": "String Indexer"}], "feature": "DEAPRTMENT NAME", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "ANIMAL CONTROL & RESCUE CENTER", "max": "TRANSPORTATION AND DRAINAGE", "missing": "0", "distinct": "31"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "DEAPRTMENT NAME"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "PAY LOCATION CODE", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "4697.15", "stddev": "2314.34", "min": "102", "max": "7831", "missing": "0"}, "updatedLabel": "PAY LOCATION CODE"}, {"transformationsData": [{"feature_label": "PAY LOCATION DESCRIPTION", "transformation_label": "String Indexer"}], "feature": "PAY LOCATION DESCRIPTION", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "AIRPORT-ADMINISTRATION", "max": "TRAN & DRAIN-TRAFFIC ENG", "missing": "0", "distinct": "83"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "PAY LOCATION DESCRIPTION"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "JOB CODE", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "3944.57", "stddev": "2309.55", "min": "1022", "max": "9515", "missing": "0"}, "updatedLabel": "JOB CODE"}, {"transformationsData": [{"feature_label": "JOB TITLE", "transformation_label": "String Indexer"}], "feature": "JOB TITLE", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "311 CALL CENTER REPRESENTATIVE", "max": "WASTEWATER SERVICE INSPECTOR", "missing": "0", "distinct": "171"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "JOB TITLE"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "PAY RANGE", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "358", "mean": "2296.13", "stddev": "1981.66", "min": "1040", "max": "7235", "missing": "142"}, "updatedLabel": "PAY RANGE"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "PAY STEP", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "358", "mean": "11.04", "stddev": "6.05", "min": "1", "max": "21", "missing": "142"}, "updatedLabel": "PAY STEP"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "YEARS OF SERVICE", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "500", "mean": "9.85", "stddev": "9.43", "min": "0.0", "max": "46.6", "missing": "0"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "YEARS OF SERVICE"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "SCHEDULED HOURS", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "486", "mean": "55.11", "stddev": "39.81", "min": "0.0", "max": "112.0", "missing": "14"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "SCHEDULED HOURS"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "HOURLY RATE", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "483", "mean": "22.25", "stddev": "16.13", "min": "1.7308", "max": "125.4308", "missing": "17"}, "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "updatedLabel": "HOURLY RATE"}, {"transformationsData": [{"feature_label": "ANNUAL SALARY", "threshold": 31270.17, "transformation_label": "Binarizer"}], "feature": "ANNUAL SALARY", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "486", "mean": "31270.47", "stddev": "27515.03", "min": "0.0", "max": "129554.88", "missing": "14"}, "transformation": [{"transformation": "Binarizer", "selectedAsDefault": 1}], "updatedLabel": "ANNUAL SALARY"}, {"transformationsData": [{"feature_label": "PERSONNEL STATUS DESCRIPTION", "transformation_label": "String Indexer"}], "feature": "PERSONNEL STATUS DESCRIPTION", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "EMERGENCY APPOINTMENT", "max": "TEMPORARY", "missing": "0", "distinct": "9"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "PERSONNEL STATUS DESCRIPT..."}, {"transformationsData": [{"feature_label": "EMPLOYMENT STATUS", "transformation_label": "String Indexer"}], "feature": "EMPLOYMENT STATUS", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "Active", "max": "Inactive", "missing": "0", "distinct": "2"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "EMPLOYMENT STATUS"}]}))

	#transformationPostExecutionHook(attritionclassifyautofe)

except Exception as ex: 
	logging.error(ex)


***TRAIN MODEL***

In [None]:
#%run attritionclassifyHooks.ipynb
try:
	#mlPreExecutionHook()

	dataAutoML=functionClassification(attritionclassifyautofe, ["UNIQUE ID", "DEPARTMENT NUMBER", "PAY LOCATION CODE", "JOB CODE", "PAY RANGE", "PAY STEP", "YEARS OF SERVICE", "SCHEDULED HOURS", "HOURLY RATE", "LAST NAME_stringindexer", "FIRST NAME_stringindexer", "DEAPRTMENT NAME_stringindexer", "PAY LOCATION DESCRIPTION_stringindexer", "JOB TITLE_stringindexer", "ANNUAL SALARY_binarizer", "PERSONNEL STATUS DESCRIPTION_stringindexer"], "EMPLOYMENT STATUS_stringindexer")

	#mlPostExecutionHook(dataAutoML)

except Exception as ex: 
	logging.error(ex)
#spark.stop()


***PREDICT ON TRAINED MODEL***

In [None]:
import pandas as pd
import numpy as np
import sklearn.metrics

try:
    model=dataAutoML['model']
    X_test=dataAutoML['X_test']
    y_test=dataAutoML['y_test']
    label=dataAutoML['label']
    columnNames=dataAutoML['columnNames']
    if label in columnNames:
        columnNames.remove(label)
    predicted=label+"_predicted"
    y_predicted=model.predict(X_test)
    df =pd.DataFrame(X_test , columns=columnNames)
    df[label]=y_test
    df[predicted]=y_predicted
    columnNames.insert(0,predicted)
    columnNames.insert(0,label)
    Accuracy = np.round((100 * sklearn.metrics.accuracy_score(y_true=y_test, y_pred=y_predicted)), 1)
    F1= np.round(
            (100 * sklearn.metrics.f1_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    Precision= np.round((
                100 * sklearn.metrics.precision_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    Recall = np.round((
                100 * sklearn.metrics.recall_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    display(" Accuracy of Prediction on test data    : %s"%Accuracy)
    display(" F1 score of Prediction on test data    : %s"%F1)
    display(" Precision of Prediction on test data   : %s"%Precision)
    display(" Recall of Prediction on test data      : %s"%Recall)
    display(df.head())
except Exception as ex:
    logging.error(ex)

spark.stop()

