Saving the link to the original dataset [here](https://www.kaggle.com/datasets/shivan118/healthcare-analytics/discussion?sort=undefined)

### Select the Correct Schema First

In [0]:
%sql
SHOW CATALOGS;

In [0]:
%sql
SELECT current_catalog();

In [0]:
%sql
USE CATALOG healthcare;

In [0]:
%sql
SELECT current_catalog();

In [0]:
%sql
SHOW SCHEMAS;

In [0]:
%sql
USE SCHEMA camp_data;

### Read the Data Files

In [0]:
%python
first_camp = spark.read.csv(
    "/Volumes/healthcare/camp_data/data_files/First_Health_Camp_Attended.csv",
    header=True,
    inferSchema=True
)
display(first_camp.limit(25))

In [0]:
%python
second_camp = spark.read.csv(
    "/Volumes/healthcare/camp_data/data_files/Second_Health_Camp_Attended.csv",
    header=True,
    inferSchema=True
)
display(second_camp.limit(25))

In [0]:
%python
third_camp = spark.read.csv(
    "/Volumes/healthcare/camp_data/data_files/Third_Health_Camp_Attended.csv",
    header=True,
    inferSchema=True
)

# for some reason the table reads the column without an underscore in between the words of 'Health' & 'Score'
second_camp = second_camp.withColumnRenamed("Health Score", "Health_Score")

display(third_camp.limit(25))

In [0]:
patient_profiles = spark.read.csv(
    "/Volumes/healthcare/camp_data/data_files/Patient_Profile.csv",
    header=True,
    inferSchema=True
) 

display(patient_profiles.limit(25))

In [0]:
health_camp_details = spark.read.csv(
    "/Volumes/healthcare/camp_data/data_files/Health_Camp_Detail.csv",
    header=True,
    inferSchema=True
)

display(health_camp_details.limit(25))

### Pass the ingested data to the next notebook

In [0]:
# Ideally the below code should have been the approach to passing tables to downstream tasks, but because temp views are not allowed to be stored in a serverless compute, we can't do this.
# Another approach is saving the tables as Delta tables somewhere and reading them in the downstream tasks.
# However, because of the free edition's limitations, we will just save these tables to a staging schema and then read from there in the donwstream tasks
# But finally we will save the complete tables in our production schema ('camp_data')

# first_camp.createOrReplaceGlobalTempView('first_camp_temp_view')
# second_camp.createOrReplaceGlobalTempView('second_camp_temp_view')
# third_camp.createOrReplaceGlobalTempView('third_camp_temp_view')
# patient_profiles.createOrReplaceGlobalTempView('patient_profiles_temp_view')
# health_camp_details.createOrReplaceGlobalTempView('health_camp_details_temp_view')

In [0]:
spark.catalog.setCurrentDatabase("staging")
first_camp.write.mode("overwrite").saveAsTable("first_camp")
second_camp.write.mode("overwrite").saveAsTable("second_camp")
third_camp.write.mode("overwrite").saveAsTable("third_camp")
patient_profiles.write.mode("overwrite").saveAsTable("patient_profiles")
health_camp_details.write.mode("overwrite").saveAsTable("health_camp_details")