***GENERATED CODE FOR classifyhairloss PIPELINE.***

***DON'T EDIT THIS CODE.***

***CONNECTOR FUNCTIONS TO READ DATA.***

In [None]:
import os
import datetime
import logging
import warnings
warnings.filterwarnings('ignore')
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)


class HDFSConnector:

    def fetch(spark, config):
        ################### INPUT HADOOP HOST PORT TO CONNECT WITH ###############################
        hdfs_server = str(os.environ['HDFS_SERVER'])
        hdfs_port = int(os.environ['HDFS_PORT'])
        df = spark.read.options(header='true', inferschema='true').csv(
            f"hdfs://{hdfs_server}:{hdfs_port}{eval(config)['url']}", header='true')
        display(df.limit(2).toPandas())
        return df

    def put(df, spark, config):
        return df.write.format('csv').options(header='true' if eval(config)["is_header"] == "Use Header Line" else 'false',
                                              delimiter=eval(config)["delimiter"]).save(("%s %s") % (datetime.datetime.now().strftime("%Y-%m-%d %H.%M.%S")+"_", eval(config)['url']))


***TRANSFORMATIONS FUNCTIONS THAT WILL BE APPLIED ON DATA***

In [None]:
import json
from pyspark.sql.types import IntegerType
from pyspark.ml.feature import StringIndexer
from pyspark.sql.functions import col, when
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import mean, stddev, min, max, col


class CleanseData:
    # def __init__(self,df):
    #     #print()

    def cleanValueForFE(self, value):
        if value == None:
            return ""
        elif str(value) == 'nan':
            return "nan"
        else:
            return value

    def replaceByMean(self, feature, df, mean_=-1):
        df1 = df
        df1 = df1.dropna()
        meanValue = self.cleanValueForFE(df1.select(
            mean(col(feature.name)).alias('mean')).collect()[0]["mean"])
        df = df.fillna(meanValue, subset=[feature.name])
        df.withColumn(feature.name, when(col(feature.name) == " ",
                      meanValue).otherwise(col(feature.name).cast("Integer")))
        return df

    def replaceByMax(self, feature, df, max_=-1):
        df1 = df
        df1 = df1.dropna()
        maxValue = self.cleanValueForFE(df1.select(
            max(col(feature.name)).alias('max')).collect()[0]["max"])
        df = df.fillna(maxValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", maxValue).otherwise(col(feature.name)))
        return df

    def replaceByMin(self, feature, df, min_=-1):
        df1 = df
        df1 = df1.dropna()
        minValue = self.cleanValueForFE(df1.select(
            min(col(feature.name)).alias('min')).collect()[0]["min"])
        df = df.fillna(minValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", minValue).otherwise(col(feature.name)))
        return df

    def replaceByStandardDeviation(self, feature, df, stddev_=-1):
        df1 = df
        df1 = df1.dropna()
        stddevValue = self.cleanValueForFE(df1.select(
            stddev(col(feature.name)).alias('stddev')).collect()[0]["stddev"])
        df = df.fillna(stddevValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", stddevValue).otherwise(col(feature.name)))
        return df

    def replaceDateRandomly(self, feature, df):
        df1 = df
        df1 = df1.dropna()
        fillValue = self.cleanValueForFE(
            df.where(col(feature.name).isNotNull()).head(1)[0][feature.name])
        df = df.fillna(str(fillValue), subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", fillValue).otherwise(col(feature.name)))
        # print("CleanseData:replaceDateRandomly Schema : ", df.#printSchema())
        return df

    def replaceNullValues(self, fList, df):
        featuresList = df.schema.fields
        for featureObj in fList:
            for feat in featuresList:
                if featureObj["feature"] in feat.name:
                    featureName = feat
                    if "mean" in featureObj["replaceby"]:
                        df = self.replaceByMean(featureName, df)
                    elif "max" in featureObj["replaceby"]:
                        df = self.replaceByMax(featureName, df)
                    elif "min" in featureObj["replaceby"]:
                        df = self.replaceByMin(featureName, df)
                    elif "stddev" in featureObj["replaceby"]:
                        df = self.replaceByStandardDeviation(featureName, df)
                    elif "random" in featureObj["replaceby"]:
                        df = self.replaceDateRandomly(featureName, df)
        return df


def StringIndexerTransform(df, params, transformationData={}):
    dfReturn = df
    feature = params["feature"]

    dfReturn = dfReturn.fillna({feature: ''})
    outcol = feature + "_stringindexer"
    indexer = StringIndexer(
        inputCol=feature, outputCol=outcol, handleInvalid="skip")
    indexed = indexer.fit(dfReturn).transform(dfReturn)
    dfReturn = indexed
    distinct_values_list = dfReturn.select(
        outcol).distinct().rdd.map(lambda r: r[0]).collect()
    len_distinct_values_list = len(distinct_values_list)
    if len_distinct_values_list <= 4:
        changed_type_df = dfReturn.withColumn(
            outcol, dfReturn[outcol].cast(IntegerType()))
        return changed_type_df
    return dfReturn


class TransformationMain:
    # TODO: change df argument in run with following
    def run(transformationDF, config):
        configObj = json.loads(config)
        featureData = configObj["FE"]
        transformationDF = CleanseData().replaceNullValues(featureData, transformationDF)
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Medical Conditions', 'transformation_label': 'String Indexer'}], 'feature': 'Medical Conditions', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
                                                  'count': '500', 'mean': '', 'stddev': '', 'min': 'Alopecia Areata ', 'max': 'Thyroid Problem', 'missing': '0', 'distinct': '10'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Medical Conditions'}, {'feature_label': 'Medical Conditions', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Medical Conditions')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Medications & Treatments', 'transformation_label': 'String Indexer'}], 'feature': 'Medications & Treatments', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': 'Accutane', 'max': 'Steroids', 'missing': '0', 'distinct': '10'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Medications & Treatments'}, {'feature_label': 'Medications & Treatments', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Medications & Treatments')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Nutritional Deficiencies ', 'transformation_label': 'String Indexer'}], 'feature': 'Nutritional Deficiencies ', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': 'Biotin Deficiency ', 'max': 'Zinc Deficiency', 'missing': '0', 'distinct': '10'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Nutritional Deficiencies '}, {'feature_label': 'Nutritional Deficiencies ', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop(
            'Nutritional Deficiencies ')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'Stress', 'transformation_label': 'String Indexer'}], 'feature': 'Stress', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '500', 'mean': '', 'stddev': '', 'min': 'High', 'max': 'Moderate', 'missing': '0', 'distinct': '3'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'Stress'}, {'feature_label': 'Stress', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('Stress')
        display(transformationDF.limit(2).toPandas())
        return transformationDF


***AUTOML FUNCTIONS***

In [None]:
from tpot import TPOTClassifier
from sklearn.model_selection import train_test_split
import pyspark


def functionClassification(sparkDF, listOfFeatures, label):
    sparkDF.persist(pyspark.StorageLevel.MEMORY_AND_DISK)
    df = (sparkDF.toPandas())
    X = (df.drop(label, axis=1))[listOfFeatures].values
    y = df[label].values
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=1, test_size=0.1)
    tpotModel = TPOTClassifier(verbosity=3, n_jobs=-1, generations=10, max_time_mins=5,
                               population_size=15, use_dask=True)
    tpotModel.fit(X_train, y_train)
    display(" Accuracy of Model : %s" % tpotModel.score(X_test, y_test))
    data = {'model': tpotModel,
            'X_test': X_test,
            'y_test': y_test,
            'label': label,
            'columnNames': listOfFeatures}
    return data


***READING DATAFRAME***

In [None]:
############## CREATE SPARK SESSION ############################ ENTER YOUR SPARK MASTER IP AND PORT TO CONNECT TO SERVER ################
from pyspark.sql import SparkSession
spark = SparkSession.builder.master('local[1]').getOrCreate()
#%run classifyhairlossHooks.ipynb
try:
	#sourcePreExecutionHook()

	predicthairfall = HDFSConnector.fetch(spark, "{'url': '/FileStore/platform/uploadedSourceFiles/Predict Hair Fall11.csv', 'filename': 'Predict Hair Fall11.csv', 'delimiter': ',', 'file_type': 'Delimeted', 'is_header': 'Use Header Line', 'domain': 'http://172.31.59.158', 'port': '40070', 'dirPath': '/FileStore/platform', 'server_url': '/nexusMax/NexusMaxPlatform/uploads/platform/'}")
	#sourcePostExecutionHook(predicthairfall)

except Exception as ex: 
	logging.error(ex)
#spark.stop()


***TRANSFORMING DATAFRAME***

In [None]:
#%run classifyhairlossHooks.ipynb
try:
	#transformationPreExecutionHook()

	classifyhairlossautofe = TransformationMain.run(predicthairfall,json.dumps( {"FE": [{"transformationsData": [{"transformation_label": "novalue"}], "feature": "Id", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "131164.07", "stddev": "12718.98", "min": "110003", "max": "152680", "missing": "0"}, "updatedLabel": "Id"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Genetics", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.5", "stddev": "0.5", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Genetics"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Hormonal Changes", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.49", "stddev": "0.5", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Hormonal Changes"}, {"transformationsData": [{"feature_label": "Medical Conditions", "transformation_label": "String Indexer"}], "feature": "Medical Conditions", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "Alopecia Areata ", "max": "Thyroid Problem", "missing": "0", "distinct": "10"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Medical Conditions"}, {"transformationsData": [{"feature_label": "Medications & Treatments", "transformation_label": "String Indexer"}], "feature": "Medications & Treatments", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "Accutane", "max": "Steroids", "missing": "0", "distinct": "10"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Medications & Treatments"}, {"transformationsData": [{"feature_label": "Nutritional Deficiencies ", "transformation_label": "String Indexer"}], "feature": "Nutritional Deficiencies ", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "Biotin Deficiency ", "max": "Zinc Deficiency", "missing": "0", "distinct": "10"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Nutritional Deficiencies "}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Nutritional Deficiencies Index", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "3.63", "stddev": "2.69", "min": "0", "max": "9", "missing": "0"}, "updatedLabel": "Nutritional Deficiencies ..."}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Genetics_Nutrition", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "4.13", "stddev": "2.73", "min": "0", "max": "10", "missing": "0"}, "updatedLabel": "Genetics_Nutrition"}, {"transformationsData": [{"feature_label": "Stress", "transformation_label": "String Indexer"}], "feature": "Stress", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "500", "mean": "", "stddev": "", "min": "High", "max": "Moderate", "missing": "0", "distinct": "3"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "Stress"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Age", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "34.03", "stddev": "9.35", "min": "18", "max": "50", "missing": "0"}, "updatedLabel": "Age"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Age Label", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "1.91", "stddev": "0.82", "min": "1", "max": "3", "missing": "0"}, "updatedLabel": "Age Label"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Poor Hair Care Habits ", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.51", "stddev": "0.5", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Poor Hair Care Habits "}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Environmental Factors", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.5", "stddev": "0.5", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Environmental Factors"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Smoking", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.53", "stddev": "0.5", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Smoking"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Weight Loss ", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.46", "stddev": "0.5", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Weight Loss "}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Hair Loss", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.49", "stddev": "0.5", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Hair Loss"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Life style", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "2.98", "stddev": "1.24", "min": "0", "max": "6", "missing": "0"}, "updatedLabel": "Life style"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Life style & Stress Level", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "3.97", "stddev": "1.46", "min": "0", "max": "8", "missing": "0"}, "updatedLabel": "Life style & Stress Level"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Age Group", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "2.91", "stddev": "0.82", "min": "2", "max": "4", "missing": "0"}, "updatedLabel": "Age Group"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Stress Level", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.99", "stddev": "0.82", "min": "0", "max": "2", "missing": "0"}, "updatedLabel": "Stress Level"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Stress & EF", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "1.49", "stddev": "0.97", "min": "0", "max": "3", "missing": "0"}, "updatedLabel": "Stress & EF"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Alopecia Areata ", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.1", "stddev": "0.3", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Alopecia Areata "}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Thyroid Problem", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.22", "stddev": "0.42", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Thyroid Problem"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Eczema", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.08", "stddev": "0.27", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Eczema"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Dermatosis", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.08", "stddev": "0.27", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Dermatosis"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Ringworm", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.07", "stddev": "0.25", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Ringworm"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Psoriasis", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.11", "stddev": "0.31", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Psoriasis"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Scalp Infection", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.09", "stddev": "0.29", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Scalp Infection"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Androgenetic Alopecia", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.09", "stddev": "0.29", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Androgenetic Alopecia"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Seborrheic Dermatitis", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.08", "stddev": "0.27", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Seborrheic Dermatitis"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Dermatitis", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.09", "stddev": "0.28", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Dermatitis"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Any Nutritional deficiencies", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "1.0", "stddev": "0.0", "min": "1", "max": "1", "missing": "0"}, "updatedLabel": "Any Nutritional deficienc..."}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Steroids", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.11", "stddev": "0.31", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Steroids"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Rogaine", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.12", "stddev": "0.33", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Rogaine"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Chemotherapy", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.09", "stddev": "0.29", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Chemotherapy"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Antifungal Cream", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.09", "stddev": "0.29", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Antifungal Cream"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Antidepressants ", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.12", "stddev": "0.33", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Antidepressants "}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Accutane", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.1", "stddev": "0.29", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Accutane"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Antibiotics", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.09", "stddev": "0.29", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Antibiotics"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Blood Pressure Medication", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.11", "stddev": "0.32", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Blood Pressure Medication"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Heart Medication ", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.09", "stddev": "0.29", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Heart Medication "}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Immunomodulators", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.08", "stddev": "0.27", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Immunomodulators"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Any M&T", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "1.0", "stddev": "0.0", "min": "1", "max": "1", "missing": "0"}, "updatedLabel": "Any M&T"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Selenium deficiency", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.08", "stddev": "0.27", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Selenium deficiency"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Iron deficiency", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.08", "stddev": "0.27", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Iron deficiency"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Vitamin E Deficiency", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.1", "stddev": "0.3", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Vitamin E Deficiency"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Magnesium deficiency", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.09", "stddev": "0.29", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Magnesium deficiency"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Zinc Deficiency", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.16", "stddev": "0.37", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Zinc Deficiency"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Biotin Deficiency ", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.12", "stddev": "0.32", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Biotin Deficiency "}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Vitamin D Deficiency", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.11", "stddev": "0.31", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Vitamin D Deficiency"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Omega-3 fatty acids", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.09", "stddev": "0.28", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Omega-3 fatty acids"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Protein deficiency", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.09", "stddev": "0.29", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Protein deficiency"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "Vitamin A Deficiency", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "500", "mean": "0.09", "stddev": "0.28", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "Vitamin A Deficiency"}]}))

	#transformationPostExecutionHook(classifyhairlossautofe)

except Exception as ex: 
	logging.error(ex)


***TRAIN MODEL***

In [None]:
#%run classifyhairlossHooks.ipynb
try:
	#mlPreExecutionHook()

	dataAutoML=functionClassification(classifyhairlossautofe, ["Id", "Genetics", "Hormonal Changes", "Nutritional Deficiencies Index", "Genetics_Nutrition", "Age", "Age Label", "Poor Hair Care Habits ", "Environmental Factors", "Smoking", "Weight Loss ", "Life style", "Life style & Stress Level", "Age Group", "Stress Level", "Stress & EF", "Alopecia Areata ", "Thyroid Problem", "Eczema", "Dermatosis", "Ringworm", "Psoriasis", "Scalp Infection", "Androgenetic Alopecia", "Seborrheic Dermatitis", "Dermatitis", "Any Nutritional deficiencies", "Steroids", "Rogaine", "Chemotherapy", "Antifungal Cream", "Antidepressants ", "Accutane", "Antibiotics", "Blood Pressure Medication", "Heart Medication ", "Immunomodulators", "Any M&T", "Selenium deficiency", "Iron deficiency", "Vitamin E Deficiency", "Magnesium deficiency", "Zinc Deficiency", "Biotin Deficiency ", "Vitamin D Deficiency", "Omega-3 fatty acids", "Protein deficiency", "Vitamin A Deficiency", "Medical Conditions_stringindexer", "Medications & Treatments_stringindexer", "Nutritional Deficiencies _stringindexer", "Stress_stringindexer"], "Hair Loss")

	#mlPostExecutionHook(dataAutoML)

except Exception as ex: 
	logging.error(ex)
#spark.stop()


***PREDICT ON TRAINED MODEL***

In [None]:
import pandas as pd
import numpy as np
import sklearn.metrics

try:
    model=dataAutoML['model']
    X_test=dataAutoML['X_test']
    y_test=dataAutoML['y_test']
    label=dataAutoML['label']
    columnNames=dataAutoML['columnNames']
    if label in columnNames:
        columnNames.remove(label)
    predicted=label+"_predicted"
    y_predicted=model.predict(X_test)
    df =pd.DataFrame(X_test , columns=columnNames)
    df[label]=y_test
    df[predicted]=y_predicted
    columnNames.insert(0,predicted)
    columnNames.insert(0,label)
    Accuracy = np.round((100 * sklearn.metrics.accuracy_score(y_true=y_test, y_pred=y_predicted)), 1)
    F1= np.round(
            (100 * sklearn.metrics.f1_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    Precision= np.round((
                100 * sklearn.metrics.precision_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    Recall = np.round((
                100 * sklearn.metrics.recall_score(y_true=y_test, y_pred=y_predicted, average="weighted")), 1)
    display(" Accuracy of Prediction on test data    : %s"%Accuracy)
    display(" F1 score of Prediction on test data    : %s"%F1)
    display(" Precision of Prediction on test data   : %s"%Precision)
    display(" Recall of Prediction on test data      : %s"%Recall)
    display(df.head())
except Exception as ex:
    logging.error(ex)

spark.stop()

