In [1]:
import numpy as np 
import pandas as pd 
import pyspark.sql.types as T
import pyspark.sql.functions as F
from pyspark.sql import SparkSession
from pyspark.sql.window import Window

In [2]:
# initialize spark session
spark = SparkSession.builder \
            .master("local[*]") \
            .appName("ShortNSimple") \
            .getOrCreate()
spark

In [3]:
# read the Iris dataset
data = spark.createDataFrame(pd.read_csv("datasets/iris.csv"))
data.show(5, truncate=False)

+------------+-----------+------------+-----------+-----------+
|sepal_length|sepal_width|petal_length|petal_width|class      |
+------------+-----------+------------+-----------+-----------+
|5.1         |3.5        |1.4         |0.2        |Iris-setosa|
|4.9         |3.0        |1.4         |0.2        |Iris-setosa|
|4.7         |3.2        |1.3         |0.2        |Iris-setosa|
|4.6         |3.1        |1.5         |0.2        |Iris-setosa|
|5.0         |3.6        |1.4         |0.2        |Iris-setosa|
+------------+-----------+------------+-----------+-----------+
only showing top 5 rows



**https://pediaa.com/what-is-the-difference-between-sepals-and-petals/**
![](https://pediaa.com/wp-content/uploads/2019/05/What-is-the-Difference-Between-Sepals-and-Petals_Figure1.jpg)

**Image source - https://www.datacamp.com/community/tutorials/machine-learning-in-r**
![Image source - https://www.datacamp.com/community/tutorials/machine-learning-in-r](https://s3.amazonaws.com/assets.datacamp.com/blog_assets/Machine+Learning+R/plot_675020181.png)


### Assign Flower type using when conditions

In [9]:
data = data.withColumn(
    "predicted_label",
    F.when(
        (F.col("petal_length")<=2.0) & (F.col("petal_width")<=0.6),
        F.lit("Iris-setosa")
    ).when(
        (F.col("petal_length")<=5.0) & (F.col("petal_width")<=1.6),
        F.lit("Iris-versicolor")
    ).otherwise(
        F.lit("Iris-virginica")
    )
)

In [10]:
data.show(150, truncate=False)

+------------+-----------+------------+-----------+---------------+---------------+
|sepal_length|sepal_width|petal_length|petal_width|class          |predicted_label|
+------------+-----------+------------+-----------+---------------+---------------+
|5.1         |3.5        |1.4         |0.2        |Iris-setosa    |Iris-setosa    |
|4.9         |3.0        |1.4         |0.2        |Iris-setosa    |Iris-setosa    |
|4.7         |3.2        |1.3         |0.2        |Iris-setosa    |Iris-setosa    |
|4.6         |3.1        |1.5         |0.2        |Iris-setosa    |Iris-setosa    |
|5.0         |3.6        |1.4         |0.2        |Iris-setosa    |Iris-setosa    |
|5.4         |3.9        |1.7         |0.4        |Iris-setosa    |Iris-setosa    |
|4.6         |3.4        |1.4         |0.3        |Iris-setosa    |Iris-setosa    |
|5.0         |3.4        |1.5         |0.2        |Iris-setosa    |Iris-setosa    |
|4.4         |2.9        |1.4         |0.2        |Iris-setosa    |Iris-seto