In [0]:
from pyspark.sql import DataFrame
from pyspark.sql.utils import AnalysisException

# ==============================================================================
# Configurações e constantes
# ==============================================================================
VOLUME_PATH = "/Volumes/workspace/default/mvp_engenharia/"
FILE_NAME = "global_climate_change_2020_2025.csv"
FULL_FILE_PATH = f"{VOLUME_PATH}{FILE_NAME}"

# ==============================================================================
# Funções de apoio
# ==============================================================================

def list_directory_contents(path: str) -> None:
    """
    Lists files in the specified directory for validation purposes.
    Raises an exception if the path is inaccessible.
    """
    try:
        files = dbutils.fs.ls(path)
        print(f"Contents of {path}:")
        for file_info in files:
            print(f" - {file_info.name}")
            
    except Exception as e:
        raise RuntimeError(f"Failed to access directory: {path}. Details: {str(e)}")

def ingest_csv_data(file_path: str) -> DataFrame:
    """
    Reads a CSV file into a Spark DataFrame using schema inference.
    """
    try:
        df = (spark.read
              .format("csv")
              .option("header", "true")
              .option("inferSchema", "true")
              .load(file_path))
        
        return df
        
    except AnalysisException as e:
        raise RuntimeError(f"Spark failed to read file at {file_path}. Ensure file exists and format is correct.") from e

# ==============================================================================
# Execução principal
# ==============================================================================


list_directory_contents(VOLUME_PATH)


df_raw_climate = ingest_csv_data(FULL_FILE_PATH)


print(f"Successfully loaded data from: {FULL_FILE_PATH}")
print(f"Total records: {df_raw_climate.count()}")

display(df_raw_climate)
df_raw_climate.printSchema()