In [43]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import when
from pyspark.ml.feature import VectorAssembler, StandardScaler
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.evaluation import BinaryClassificationEvaluator

In [44]:
spark = SparkSession.builder.getOrCreate()

In [45]:
Planet_Training = spark.read.csv("DataUAP/Classification/Planet_Training.csv", header=True, inferSchema=True)
Planet_Training = Planet_Training.select("Temperature", "Water", "Atmosphere Color", "Habitable")
Planet_Training = Planet_Training.na.drop()

In [46]:
Planet_Testing = spark.read.csv("DataUAP/Classification/Planet_Testing.csv", header=True, inferSchema=True)
Planet_Testing = Planet_Testing.select("Temperature", "Water", "Atmosphere Color", "Habitable")
Planet_Testing = Planet_Testing.na.drop()

In [67]:
def parse(df):
    df = df.withColumn("Atmosphere Color", when(df["Atmosphere Color"]=="Red",0).
                    when(df["Atmosphere Color"]=="Blue",1).
                    when(df["Atmosphere Color"]=="Yellow",2))
    
    df = df.withColumn("Water", when(df["Water"]=="Low",0).
                    when(df["Water"]=="Medium",1).
                    when(df["Water"]=="High",2))
    
    cols = df.columns
    cols.remove("Habitable")
    df = VectorAssembler(inputCols = cols, outputCol = "Features").transform(df)
    
    scaler = StandardScaler(inputCol = "Features", outputCol = "Scaled_Features")
    df = scaler.fit(df).transform(df)
    
    return df

In [68]:
Planet_Training = parse(Planet_Training)
Planet_Testing = parse(Planet_Testing)

IllegalArgumentException: 'Output column Features already exists.'

In [69]:
model = LogisticRegression(featuresCol = "Scaled_Features", labelCol = "Habitable", maxIter=10).fit(Planet_Training)

In [62]:
prediction = model.transform(Planet_Testing)

In [63]:
evaluator = BinaryClassificationEvaluator(labelCol="Habitable")
acc = evaluator.evaluate(prediction) * 100
print("Accuracy: {}%".format(acc))

Accuracy: 91.71043337232418%
