##### **Databricks Bronze Layer:** Read .CSV from volume healthcare_dataset in bronze schema

In [0]:
from pyspark.sql import SparkSession

#### **Define your single entity**

In [0]:
entity = "healthcare"

#### **Static (batch) read to infer schema**

In [0]:
df_batch = (
    spark.read.format("csv")
    .option("header", True)
    .option("inferSchema", True)
    .load(f"/Volumes/Patient_Risk_Prediction/bronze/healthcare_dataset/")
)

### **Clean column names**

In [0]:
def clean_column(col_name):
    return col_name.strip().lower().replace(" ", "_").replace("-", "_").replace("/", "_")

df_batch = df_batch.toDF(*[clean_column(c) for c in df_batch.columns])

#### **Capture the inferred schema**

In [0]:
schema_entity = df_batch.schema

#### **Now create a streaming DataFrame (structured streaming)**

In [0]:
df_stream = (
    spark.readStream.format("csv")
    .option("header", True)
    .schema(schema_entity)
    .load(f"/Volumes/Patient_Risk_Prediction/bronze/healthcare_dataset/")
)

#### **Write to Bronze Delta Table (append mode, once trigger)**

In [0]:
(
    df_stream.writeStream.format("delta")
    .outputMode("append")
    .option("checkpointLocation", f"/Volumes/Patient_Risk_Prediction/bronze/healthcare_dataset/{entity}")
    .trigger(once=True)
    .toTable(f"Patient_Risk_Prediction.bronze.{entity}")
)

<pyspark.sql.connect.streaming.query.StreamingQuery at 0xffe8d77b7ec0>

In [0]:
print(f"✅ Bronze table created successfully: Patient_Risk_Prediction.bronze.{entity}")

✅ Bronze table created successfully: Patient_Risk_Prediction.bronze.healthcare


In [0]:
%sql
SELECT COUNT(*) FROM Patient_Risk_Prediction.bronze.healthcare;

COUNT(*)
55500


In [0]:
%sql
SELECT * FROM Patient_Risk_Prediction.bronze.healthcare LIMIT 10;

name,age,gender,blood_type,medical_condition,date_of_admission,doctor,hospital,insurance_provider,billing_amount,room_number,admission_type,discharge_date,medication,test_results
Bobby JacksOn,30,Male,B-,Cancer,2024-01-31,Matthew Smith,Sons and Miller,Blue Cross,18856.281305978155,328,Urgent,2024-02-02,Paracetamol,Normal
LesLie TErRy,62,Male,A+,Obesity,2019-08-20,Samantha Davies,Kim Inc,Medicare,33643.327286577885,265,Emergency,2019-08-26,Ibuprofen,Inconclusive
DaNnY sMitH,76,Female,A-,Obesity,2022-09-22,Tiffany Mitchell,Cook PLC,Aetna,27955.096078842456,205,Emergency,2022-10-07,Aspirin,Normal
andrEw waTtS,28,Female,O+,Diabetes,2020-11-18,Kevin Wells,"Hernandez Rogers and Vang,",Medicare,37909.78240987528,450,Elective,2020-12-18,Ibuprofen,Abnormal
adrIENNE bEll,43,Female,AB+,Cancer,2022-09-19,Kathleen Hanna,White-White,Aetna,14238.317813937623,458,Urgent,2022-10-09,Penicillin,Abnormal
EMILY JOHNSOn,36,Male,A+,Asthma,2023-12-20,Taylor Newton,Nunez-Humphrey,UnitedHealthcare,48145.11095104189,389,Urgent,2023-12-24,Ibuprofen,Normal
edwArD EDWaRDs,21,Female,AB-,Diabetes,2020-11-03,Kelly Olson,Group Middleton,Medicare,19580.87234486093,389,Emergency,2020-11-15,Paracetamol,Inconclusive
CHrisTInA MARtinez,20,Female,A+,Cancer,2021-12-28,Suzanne Thomas,"Powell Robinson and Valdez,",Cigna,45820.46272159459,277,Emergency,2022-01-07,Paracetamol,Inconclusive
JASmINe aGuIlaR,82,Male,AB+,Asthma,2020-07-01,Daniel Ferguson,Sons Rich and,Cigna,50119.222791548505,316,Elective,2020-07-14,Aspirin,Abnormal
ChRISTopher BerG,58,Female,AB-,Cancer,2021-05-23,Heather Day,Padilla-Walker,UnitedHealthcare,19784.63106221073,249,Elective,2021-06-22,Paracetamol,Inconclusive
