In [1]:
storage_account_name = "<Storage account name>"
storage_account_key = "<Storage account key>"
container = "<Container name>"

In [2]:
dbutils.fs.mount(
 source = "wasbs://{0}@{1}.blob.core.windows.net".format(container, storage_account_name),
 mount_point = "/mnt/data",
 extra_configs = {"fs.azure.account.key.{0}.blob.core.windows.net".format(storage_account_name): storage_account_key}
)

In [3]:
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.evaluation import BinaryClassificationEvaluator

In [4]:
data = spark.read \
  .option("header", "true") \
  .option("inferSchema", "true") \
  .option("delimiter", ",") \
  .csv("/mnt/data/heart.csv".format(container, storage_account_name))

data.show()

In [5]:
feature_inputs = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak", "slope", "ca", "thal"]

vectors = VectorAssembler(inputCols=feature_inputs, outputCol="features")

In [6]:
vector_data = vectors.transform(data)

features = vector_data.select(["features", "target"])

In [7]:
features = features.withColumnRenamed("target", "label")

In [8]:
features = features.withColumn("label", features["label"].cast("double"))

In [9]:
(training_data, test_data) = features.randomSplit([0.7, 0.3])

In [10]:
lr = LogisticRegression(labelCol="label", featuresCol="features")

In [11]:
model = lr.fit(training_data)

In [12]:
model_summary = model.summary

print("AUC:", model_summary.areaUnderROC)

In [13]:
evaluator = BinaryClassificationEvaluator()

In [14]:
predictions = model.transform(test_data)

In [15]:
evaluator.evaluate(predictions)

In [16]:
display(model, training_data, "ROC")

False Positive Rate,True Positive Rate,Threshold
0.0,0.0,0.9964419552239666
0.0,0.0181818181818181,0.9964419552239666
0.0,0.0363636363636363,0.9961240845650032
0.0,0.0545454545454545,0.9957672945091514
0.0,0.0727272727272727,0.995420982533002
0.0,0.0909090909090909,0.9943105379067676
0.0,0.109090909090909,0.9927084164294
0.0,0.1272727272727272,0.990700593463728
0.0,0.1454545454545454,0.9896445959960192
0.0,0.1636363636363636,0.982317581925775
