In [5]:
import sys
import os
from pathlib import Path

from config.database_config import db_config
from pyspark.sql import SparkSession

project_root = Path().resolve().parent
os.environ["HADOOP_HOME"] = str(project_root)

sys.path.insert(0, str(project_root))

In [None]:
jdbc_driver = str(project_root / "drivers" / "mssql-jdbc-13.2.1.jre8.jar")

spark = SparkSession.builder \
    .appName("RetailDataAnalysis") \
    .master("local[*]") \
    .config("spark.driver.memory", "4g") \
    .config("spark.sql.shuffle.partitions", "8") \
    .config("spark.jars", jdbc_driver) \
    .getOrCreate()

spark.sparkContext.setLogLevel("WARN")

In [None]:
jdbc_url = f"jdbc:sqlserver://{db_config.SQL_HOST}:{db_config.SQL_PORT};databaseName={db_config.SQL_DATABASE};encrypt=false;trustServerCertificate=true"
connection_properties = db_config.connection_properties

In [None]:
customers = spark.read.jdbc(url=jdbc_url, table="dbo.customers", properties=connection_properties)
customers.createOrReplaceTempView("customers")

spark.sql("SELECT * FROM customers LIMIT 10").show()

+-----------+----------------+-------------------+----------+--------------------+----------+-------+-------+-------+---+------+------+----------------+
|customer_id|            name|              email|     phone|             address|      city|  state|zipcode|country|age|gender|income|customer_segment|
+-----------+----------------+-------------------+----------+--------------------+----------+-------+-------+-------+---+------+------+----------------+
|      70466|James Villarreal|Michael89@gmail.com|4989225865|4372 Graham Coves...|Portsmouth|England|  25862|     UK| 33|Female|  High|         Regular|
|      50882|      Tracy Chen|  Harold4@gmail.com|6354395676|818 Stewart Lock ...|Portsmouth|England|  59231|     UK| 21|  Male|  High|         Regular|
|      33092|  Jasmine Lawson|  Tonya75@gmail.com|3459747635|903 Roman Forge S...|Portsmouth|England|  98242|     UK| 21|Female|  High|         Regular|
|      44057|   Edward Barnes|Allison33@gmail.com|8907123725|41714 Martin Broo...|