In [0]:
dbutils.fs.ls("/Volumes/workspace/default/climate_raw")


[FileInfo(path='dbfs:/Volumes/workspace/default/climate_raw/GlobalLandTemperaturesByCountry.csv', name='GlobalLandTemperaturesByCountry.csv', size=22680393, modificationTime=1769533772000),
 FileInfo(path='dbfs:/Volumes/workspace/default/climate_raw/GlobalLandTemperaturesByMajorCity.csv', name='GlobalLandTemperaturesByMajorCity.csv', size=14138385, modificationTime=1769533747000),
 FileInfo(path='dbfs:/Volumes/workspace/default/climate_raw/GlobalLandTemperaturesByState.csv', name='GlobalLandTemperaturesByState.csv', size=30770160, modificationTime=1769533793000),
 FileInfo(path='dbfs:/Volumes/workspace/default/climate_raw/GlobalTemperatures.csv', name='GlobalTemperatures.csv', size=205875, modificationTime=1769533704000)]

In [0]:
global_temp_df = (
    spark.read
         .option("header", "true")
         .option("inferSchema", "true")
         .csv("/Volumes/workspace/default/climate_raw/GlobalTemperatures.csv")
)


In [0]:
global_temp_df.printSchema()
global_temp_df.show(5, truncate=False)


root
 |-- dt: date (nullable = true)
 |-- LandAverageTemperature: double (nullable = true)
 |-- LandAverageTemperatureUncertainty: double (nullable = true)
 |-- LandMaxTemperature: double (nullable = true)
 |-- LandMaxTemperatureUncertainty: double (nullable = true)
 |-- LandMinTemperature: double (nullable = true)
 |-- LandMinTemperatureUncertainty: double (nullable = true)
 |-- LandAndOceanAverageTemperature: double (nullable = true)
 |-- LandAndOceanAverageTemperatureUncertainty: double (nullable = true)

+----------+----------------------+---------------------------------+------------------+-----------------------------+------------------+-----------------------------+------------------------------+-----------------------------------------+
|dt        |LandAverageTemperature|LandAverageTemperatureUncertainty|LandMaxTemperature|LandMaxTemperatureUncertainty|LandMinTemperature|LandMinTemperatureUncertainty|LandAndOceanAverageTemperature|LandAndOceanAverageTemperatureUncertainty|
+---

In [0]:
(
    global_temp_df
        .write
        .format("delta")
        .mode("overwrite")
        .saveAsTable("bronze_global_temperatures")
)


In [0]:
spark.sql("SHOW TABLES").show(truncate=False)
spark.sql("SELECT * FROM bronze_global_temperatures LIMIT 5").show(truncate=False)


+--------+--------------------------+-----------+
|database|tableName                 |isTemporary|
+--------+--------------------------+-----------+
|default |bronze_global_temperatures|false      |
+--------+--------------------------+-----------+

+----------+----------------------+---------------------------------+------------------+-----------------------------+------------------+-----------------------------+------------------------------+-----------------------------------------+
|dt        |LandAverageTemperature|LandAverageTemperatureUncertainty|LandMaxTemperature|LandMaxTemperatureUncertainty|LandMinTemperature|LandMinTemperatureUncertainty|LandAndOceanAverageTemperature|LandAndOceanAverageTemperatureUncertainty|
+----------+----------------------+---------------------------------+------------------+-----------------------------+------------------+-----------------------------+------------------------------+-----------------------------------------+
|1750-01-01|3.034000000000

In [0]:
country_temp_df = (
    spark.read
         .option("header", "true")
         .option("inferSchema", "true")
         .csv("/Volumes/workspace/default/climate_raw/GlobalLandTemperaturesByCountry.csv")
)


In [0]:
country_temp_df.printSchema()
country_temp_df.show(5, truncate=False)



root
 |-- dt: date (nullable = true)
 |-- AverageTemperature: double (nullable = true)
 |-- AverageTemperatureUncertainty: double (nullable = true)
 |-- Country: string (nullable = true)

+----------+------------------+-----------------------------+-------+
|dt        |AverageTemperature|AverageTemperatureUncertainty|Country|
+----------+------------------+-----------------------------+-------+
|1743-11-01|4.3839999999999995|2.294                        |Åland  |
|1743-12-01|NULL              |NULL                         |Åland  |
|1744-01-01|NULL              |NULL                         |Åland  |
|1744-02-01|NULL              |NULL                         |Åland  |
|1744-03-01|NULL              |NULL                         |Åland  |
+----------+------------------+-----------------------------+-------+
only showing top 5 rows


In [0]:
(
    country_temp_df
        .write
        .format("delta")
        .mode("overwrite")
        .saveAsTable("bronze_land_temperatures_country")
)


In [0]:
state_temp_df = (
    spark.read
         .option("header", "true")
         .option("inferSchema", "true")
         .csv("/Volumes/workspace/default/climate_raw/GlobalLandTemperaturesByState.csv")
)

state_temp_df.printSchema()
state_temp_df.show(5, truncate=False)

(
    state_temp_df
        .write
        .format("delta")
        .mode("overwrite")
        .saveAsTable("bronze_land_temperatures_state")
)


root
 |-- dt: date (nullable = true)
 |-- AverageTemperature: double (nullable = true)
 |-- AverageTemperatureUncertainty: double (nullable = true)
 |-- State: string (nullable = true)
 |-- Country: string (nullable = true)

+----------+------------------+-----------------------------+-----+-------+
|dt        |AverageTemperature|AverageTemperatureUncertainty|State|Country|
+----------+------------------+-----------------------------+-----+-------+
|1855-05-01|25.544            |1.171                        |Acre |Brazil |
|1855-06-01|24.228            |1.103                        |Acre |Brazil |
|1855-07-01|24.371            |1.044                        |Acre |Brazil |
|1855-08-01|25.427            |1.073                        |Acre |Brazil |
|1855-09-01|25.675            |1.014                        |Acre |Brazil |
+----------+------------------+-----------------------------+-----+-------+
only showing top 5 rows


In [0]:
city_temp_df = (
    spark.read
         .option("header", "true")
         .option("inferSchema", "true")
         .csv("/Volumes/workspace/default/climate_raw/GlobalLandTemperaturesByMajorCity.csv")
)

city_temp_df.printSchema()
city_temp_df.show(5, truncate=False)

(
    city_temp_df
        .write
        .format("delta")
        .mode("overwrite")
        .saveAsTable("bronze_land_temperatures_city")
)


root
 |-- dt: date (nullable = true)
 |-- AverageTemperature: double (nullable = true)
 |-- AverageTemperatureUncertainty: double (nullable = true)
 |-- City: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- Latitude: string (nullable = true)
 |-- Longitude: string (nullable = true)

+----------+------------------+-----------------------------+-------+-------------+--------+---------+
|dt        |AverageTemperature|AverageTemperatureUncertainty|City   |Country      |Latitude|Longitude|
+----------+------------------+-----------------------------+-------+-------------+--------+---------+
|1849-01-01|26.704            |1.435                        |Abidjan|Côte D'Ivoire|5.63N   |3.23W    |
|1849-02-01|27.434            |1.362                        |Abidjan|Côte D'Ivoire|5.63N   |3.23W    |
|1849-03-01|28.101            |1.612                        |Abidjan|Côte D'Ivoire|5.63N   |3.23W    |
|1849-04-01|26.14             |1.3869999999999998           |Abidjan|Côte D'I

In [0]:
spark.sql("SHOW TABLES").show(truncate=False)


+--------+--------------------------------+-----------+
|database|tableName                       |isTemporary|
+--------+--------------------------------+-----------+
|default |bronze_global_temperatures      |false      |
|default |bronze_land_temperatures_city   |false      |
|default |bronze_land_temperatures_country|false      |
|default |bronze_land_temperatures_state  |false      |
+--------+--------------------------------+-----------+

