In [53]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, count
import pyspark.pandas as pd
from pyspark.sql import functions as F
from pyspark.sql.window import Window
import pyspark.sql.types as types

In [54]:
spark = SparkSession.builder \
        .appName("Uncovered Area") \
        .config("spark.jars", "../jdbc/mssql-jdbc-12.6.1.jre8.jar") \
        .getOrCreate()

In [55]:
server_name = "mssql"
port = "1433"
database_name = "Data"
url = f"jdbc:sqlserver://{server_name}:{port};databaseName={database_name}"

table_name = "ARProperties"
username = "SA"
password = "YourStrongPassword123"

df = spark.read \
        .format("jdbc") \
        .option("url", url) \
        .option("dbtable", table_name) \
        .option("user", username) \
        .option("password", password) \
        .option("encrypt", "false") \
        .option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
        .load()

In [56]:
df = df.filter(~col("property_type").isin("Terrain"))

In [57]:
df = df.withColumn("surface_total", col("surface_total").cast("int"))
df = df.withColumn("surface_covered", col("surface_covered").cast("int"))

In [58]:
def calculate_uncovered_area(surface_total, surface_covered):
    if surface_total is None or surface_covered is None:
        return None
    elif surface_total <= surface_covered:
        return None
    else:
        return surface_total - surface_covered
    
calculate_uncovered_area_udf = F.udf(calculate_uncovered_area)

In [59]:
df = df.withColumn("surface_area_uncovered", calculate_uncovered_area_udf(col("surface_total"), col("surface_covered")))
df = df.filter(col("surface_area_uncovered").isNotNull())

In [60]:
df.select("id", "property_type", "surface_total", "surface_covered", "surface_area_uncovered").show(20)

+--------------------+-------------------+-------------+---------------+----------------------+
|                  id|      property_type|surface_total|surface_covered|surface_area_uncovered|
+--------------------+-------------------+-------------+---------------+----------------------+
|4Yxm18AoXOe0v5Dxz...|          Apartment|           63|             49|                    14|
|tld2CbMqj3l/CrYSJ...|          Apartment|           90|             87|                     3|
|+UXNClE+BfmLoxl3/...|          Apartment|           90|             87|                     3|
|pC5JkluyzV+EFoEIk...|              House|         2815|            459|                  2356|
|0Gq0cAPakjVmW6x5g...|          Apartment|           38|             36|                     2|
|UuE5DRVW0BKJE2nAg...|      Village House|         1500|             60|                  1440|
|kDb3k24QzbWd4lNky...|          Apartment|           39|             38|                     1|
|kKhQd6uvRE8ZXq8PI...|          Apartmen

In [61]:
database_name = "Data_Clean"
table_name = "Uncovered_Area"
url = f"jdbc:sqlserver://{server_name}:{port};databaseName={database_name}"
df.write \
    .format("jdbc") \
    .option("url", url) \
    .option("dbtable", table_name) \
    .option("user", username) \
    .option("password", password) \
    .option("encrypt", "false") \
    .option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
    .mode("overwrite") \
    .save()