In [1]:
# --- Cell 1: Notebook Header, Logging Configuration, and Library Imports ---

"""
Notebook: 02_transform_earthquake_data.ipynb

Purpose:
This notebook is responsible for cleaning, transforming, and enriching raw earthquake data
from the Bronze layer, and persisting the refined data into the Silver layer of the Lakehouse.
The Silver layer typically contains enterprise-wide, clean, conformed, and semi-enriched data,
ready for direct reporting, advanced analytics, and further specialized enrichment in the Gold layer.

Dependencies:
- Python 3.x
- pyspark library (for distributed processing and DataFrame operations)

Execution Environment:
This script is designed to run within an Apache Spark environment,
specifically optimized for platforms like Azure Fabric where a SparkSession
('spark') is typically pre-initialized and available globally.
"""

# Configure a basic logging system for effective monitoring in production environments.
# This setup allows capturing informational messages, warnings, and errors throughout
# the script's execution, which is crucial for debugging and operational oversight.
import logging
# Ensure the logger is configured only once if this cell might be run multiple times in a session.
# Basic configuration sets up a handler that prints log messages to the console, which Fabric captures.
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Standard PySpark libraries are imported for DataFrame operations and built-in functions.
from pyspark.sql import SparkSession            # The entry point for Spark functionality.
from pyspark.sql import functions as F          # Provides access to Spark SQL functions (e.g., F.col, F.year).
from pyspark.sql.types import TimestampType, IntegerType, DoubleType, StringType, BooleanType # Specific Spark data types for schema definition and casting.

logger.info("All necessary libraries have been imported successfully for data transformation.")

StatementMeta(, f21af01b-40ba-416d-93ca-65d214b6f00e, 3, Finished, Available, Finished)

2025-06-12 19:39:01,902 - INFO - All necessary libraries have been imported successfully for data transformation.


In [2]:
# --- Cell 2: Configuration Parameters ---

# This section defines all key parameters for the data transformation process.
# Centralizing these values ensures easy modification and maintainability,
# supporting environmental configuration and promoting best practices for parameter management.

# Source Table Name (Bronze Layer):
# The fully qualified name of the Delta table in the Bronze layer from which raw earthquake data
# will be read. This is the raw, immutable landing zone from the ingestion step.
BRONZE_TABLE_NAME = "bronze_usgs_earthquakes"

# Target Table Name (Silver Layer):
# The fully qualified name of the Delta table in the Silver layer where cleaned, transformed,
# and partially enriched data will be stored. This layer is conformed and ready for consumption.
SILVER_TABLE_NAME = "silver_earthquakes_cleaned"

# Base File Path for Silver Layer (Optional for file-based storage):
# This defines the base path for storing data as files, often in Delta Lake format.
# While the primary target is a managed Delta table, saving to files can be used for
# archiving, cross-platform compatibility, or specific downstream tools that prefer direct file access.
SILVER_FILE_PATH_BASE = "Files/silver/earthquakes_cleaned"

# Processing Timestamp:
# Captures the exact UTC timestamp when this particular transformation run initiated.
# Using `F.current_timestamp()` ensures this timestamp is generated consistently
# within the Spark execution environment and is automatically aligned with Spark's time context.
# This timestamp is crucial for auditing, data lineage, and can be used for incremental loading strategies.
PROCESSING_TIMESTAMP_UTC = F.current_timestamp()

logger.info("Transformation Configuration Loaded:")
logger.info(f"  Reading from Bronze table: {BRONZE_TABLE_NAME}")
logger.info(f"  Writing to Silver table: {SILVER_TABLE_NAME}")
logger.info(f"  Processing Timestamp (captured during Spark execution): {PROCESSING_TIMESTAMP_UTC}") # Note: This will be a Spark Literal for internal use.

StatementMeta(, f21af01b-40ba-416d-93ca-65d214b6f00e, 4, Finished, Available, Finished)

2025-06-12 19:39:02,341 - INFO - Transformation Configuration Loaded:
2025-06-12 19:39:02,341 - INFO -   Reading from Bronze table: bronze_usgs_earthquakes
2025-06-12 19:39:02,342 - INFO -   Writing to Silver table: silver_earthquakes_cleaned
2025-06-12 19:39:02,344 - INFO -   Processing Timestamp (captured during Spark execution): Column<'current_timestamp()'>


In [3]:
# --- Cell 3: Initialize Spark Session and Load Bronze Data ---

# This cell is responsible for initializing the Spark session, which is the entry point
# for all Spark functionality, and then loading the raw earthquake data from the Bronze layer.

try:
    # In Azure Fabric notebooks, the 'spark' session is typically pre-initialized and available globally.
    # However, using `SparkSession.builder.getOrCreate()` is a robust pattern as it
    # either retrieves the existing session or creates a new one if necessary, making the script
    # more portable across different Spark environments.
    if 'spark' not in globals() or not isinstance(spark, SparkSession):
        logger.info("SparkSession 'spark' not found or not an instance of SparkSession. Attempting to get or create a new one.")
        spark = SparkSession.builder \
                            .appName("EarthquakeDataTransformation") \
                            .getOrCreate()
        logger.info("Spark Session initialized or retrieved successfully.")
    else:
        logger.info("Spark Session 'spark' is already initialized and available.")

    # Load data from the Bronze layer Delta table into a Spark DataFrame.
    # The Bronze layer is expected to contain raw, untransformed data as ingested from the source.
    df_bronze = spark.table(BRONZE_TABLE_NAME)
    
    # Log the number of records read and display the schema for immediate verification.
    # This helps confirm that the correct data has been loaded and its structure is as expected.
    logger.info(f"Successfully read {df_bronze.count()} records from Bronze table: {BRONZE_TABLE_NAME}.")
    logger.info("Bronze DataFrame Schema:")
    df_bronze.printSchema()

    # Check if the Bronze table is empty.
    # If the input data is empty, subsequent transformation steps might result in an empty Silver layer.
    if df_bronze.count() == 0:
        logger.warning(f"Bronze table '{BRONZE_TABLE_NAME}' is empty. No data to transform.")


except Exception as e:
    # Critical error handling: If the Bronze table cannot be loaded, the transformation cannot proceed.
    logger.error(f"FATAL ERROR: Failed to read Bronze table '{BRONZE_TABLE_NAME}'. Transformation cannot proceed. Error: {e}", exc_info=True)
    # Raising an exception here will cause the pipeline activity to fail, triggering alerts
    # in an orchestrated environment, which is the desired behavior for unrecoverable errors.
    raise Exception(f"Transformation failed due to an error loading Bronze data: {e}")

StatementMeta(, f21af01b-40ba-416d-93ca-65d214b6f00e, 5, Finished, Available, Finished)

2025-06-12 19:39:02,682 - INFO - Spark Session 'spark' is already initialized and available.


root
 |-- id: string (nullable = true)
 |-- mag: double (nullable = true)
 |-- place: string (nullable = true)
 |-- time: long (nullable = true)
 |-- updated: long (nullable = true)
 |-- tz: string (nullable = true)
 |-- url: string (nullable = true)
 |-- detail: string (nullable = true)
 |-- felt: long (nullable = true)
 |-- cdi: double (nullable = true)
 |-- mmi: double (nullable = true)
 |-- alert: string (nullable = true)
 |-- status: string (nullable = true)
 |-- tsunami: long (nullable = true)
 |-- sig: long (nullable = true)
 |-- net: string (nullable = true)
 |-- code: string (nullable = true)
 |-- ids: string (nullable = true)
 |-- sources: string (nullable = true)
 |-- types: string (nullable = true)
 |-- nst: long (nullable = true)
 |-- dmin: double (nullable = true)
 |-- rms: double (nullable = true)
 |-- gap: double (nullable = true)
 |-- magType: string (nullable = true)
 |-- type: string (nullable = true)
 |-- title: string (nullable = true)
 |-- longitude: double (nulla

In [4]:
# --- Cell 4: Data Cleaning, Type Casting, Validation, and Deduplication ---

# This cell performs crucial data quality and standardization steps to prepare the raw
# Bronze data for the Silver layer. This includes correcting data types, validating data
# integrity, and removing duplicate records.

# 1. Type Casting and Column Renaming:
# This step converts raw data types (often string or generic numeric) into precise Spark types
# and renames columns to a consistent, more descriptive naming convention suitable for analytics.
df_cleaned = df_bronze.withColumn("event_timestamp_utc", (F.col("time") / 1000).cast(TimestampType())) \
    .withColumn("updated_timestamp_utc", (F.col("updated") / 1000).cast(TimestampType())) \
    .withColumn("magnitude", F.col("mag").cast(DoubleType())) \
    .withColumn("depth_km", F.col("depth").cast(DoubleType())) \
    .withColumn("latitude", F.col("latitude").cast(DoubleType())) \
    .withColumn("longitude", F.col("longitude").cast(DoubleType())) \
    .withColumn("tsunami_warning", (F.col("tsunami") == 1).cast("double")) \
    .withColumn("significance", F.col("sig").cast(IntegerType())) \
    .withColumn("felt_reports", F.col("felt").cast(IntegerType())) \
    .withColumn("nst_stations", F.col("nst").cast(IntegerType())) \
    .withColumn("rms_travel_time", F.col("rms").cast(DoubleType())) \
    .withColumn("gap_azimuthal", F.col("gap").cast(DoubleType()))

logger.info("Initial type casting and column renaming complete, preparing for Silver layer schema.")

# 2. Select Final Columns for Silver Layer:
# After initial cleaning and casting, this step explicitly selects the columns that will be
# present in the Silver layer. This ensures a well-defined and consistent schema, removing
# unnecessary raw columns and finalizing naming conventions.
df_selected = df_cleaned.select(
    F.col("id").alias("event_id"),          # Renamed from 'id' to 'event_id' for clarity.
    "event_timestamp_utc",                  # Derived from 'time', cast to TimestampType.
    "updated_timestamp_utc",                # Derived from 'updated', cast to TimestampType.
    "magnitude",                            # Renamed from 'mag'.
    "depth_km",                             # Renamed from 'depth'.
    "latitude",
    "longitude",
    "place",
    F.col("type").alias("event_type"),      # Renamed from 'type' to 'event_type' to avoid keyword conflicts.
    "magType",
    "tsunami_warning",                      # Derived from 'tsunami', cast to BooleanType.
    "significance",                         # Renamed from 'sig'.
    "felt_reports",                         # Renamed from 'felt'.
    "nst_stations",                         # Renamed from 'nst'.
    "rms_travel_time",                      # Renamed from 'rms'.
    "gap_azimuthal",                        # Renamed from 'gap'.
    "alert",                                # Keeping original name 'alert' as it is descriptive.
    "status",
    "url",
    "title",
    "ingestion_timestamp_utc"               # Carry forward original ingestion timestamp for data lineage.
)

logger.info(f"Selected {len(df_selected.columns)} columns for the Silver layer schema.")

# 3. Data Validation and Filtering:
# Apply essential data quality checks to filter out invalid or erroneous records.
# This ensures that only high-quality, reliable data proceeds to the Silver layer,
# preventing downstream issues from malformed or out-of-range values.
initial_count_before_validation = df_selected.count()
df_validated = df_selected.filter(
    (F.col("magnitude").isNotNull()) & (F.col("magnitude").between(-2.0, 10.0)) & # Filter out invalid magnitudes.
    (F.col("latitude").isNotNull()) & (F.col("latitude").between(-90.0, 90.0)) &   # Validate latitude range.
    (F.col("longitude").isNotNull()) & (F.col("longitude").between(-180.0, 180.0)) & # Validate longitude range.
    (F.col("depth_km").isNotNull()) & (F.col("depth_km") >= 0) & (F.col("depth_km") < 1000) & # Validate depth range (non-negative, realistic max).
    (F.col("event_timestamp_utc").isNotNull()) & # Ensure event timestamp is present.
    (F.col("event_id").isNotNull())              # Ensure event ID is present (primary key candidate).
)
records_removed_by_validation = initial_count_before_validation - df_validated.count()
if records_removed_by_validation > 0:
    logger.warning(f"Removed {records_removed_by_validation} records due to data validation rules. Remaining records: {df_validated.count()}.")
else:
    logger.info("All records passed data validation checks. No records were removed.")

# 4. Deduplication:
# Remove duplicate earthquake records based on a unique identifier (`event_id`),
# keeping the most recently updated version (`updated_timestamp_utc`).
from pyspark.sql.window import Window # Import Window class for window functions.
# Define a window specification: Partition by 'event_id' and order by 'updated_timestamp_utc' in descending order.
window_spec = Window.partitionBy("event_id").orderBy(F.col("updated_timestamp_utc").desc())
# Apply the window function to assign a row number within each partition.
# Then filter to keep only the first row (most recent update) for each 'event_id' and drop the row number column.
df_deduplicated = df_validated.withColumn("rn", F.row_number().over(window_spec)).filter(F.col("rn") == 1).drop("rn")

logger.info(f"Records after cleaning, validation, and deduplication: {df_deduplicated.count()}.")
if df_deduplicated.count() == 0:
    logger.warning("No records remaining after cleaning, validation, and deduplication. The Silver layer will be empty.")

StatementMeta(, f21af01b-40ba-416d-93ca-65d214b6f00e, 6, Finished, Available, Finished)

2025-06-12 19:39:18,267 - INFO - Initial type casting and column renaming complete, preparing for Silver layer schema.
2025-06-12 19:39:18,300 - INFO - Selected 21 columns for the Silver layer schema.
2025-06-12 19:39:23,583 - INFO - Records after cleaning, validation, and deduplication: 24506.


In [5]:
# --- Cell 5: Feature Engineering and Enrichment ---

# This cell focuses on creating new derived features and enriching existing data within the
# Spark DataFrame. These additions typically make the data more useful for downstream analytical
# purposes and enhance its value, moving beyond just raw values.

# 1. Categorical Feature Creation:
# These new columns categorize numerical values into more human-readable and analytically useful
# categorical bins.
# - `magnitude_category`: Classifies earthquakes into descriptive categories based on their magnitude
#                         (e.g., Micro, Minor, Light, Moderate, Strong, Major, Great).
# - `depth_category`: Categorizes earthquakes by depth (Shallow, Intermediate, Deep),
#                     which is relevant for geological analysis.
# - `hemisphere_ns`, `hemisphere_ew`: Determines the geographical hemisphere based on latitude/longitude,
#                                      useful for regional analysis.
df_enriched = df_deduplicated \
    .withColumn("magnitude_category",
        F.when(F.col("magnitude") < 3.0, "Micro")
         .when(F.col("magnitude") < 4.0, "Minor")
         .when(F.col("magnitude") < 5.0, "Light")
         .when(F.col("magnitude") < 6.0, "Moderate")
         .when(F.col("magnitude") < 7.0, "Strong")
         .when(F.col("magnitude") < 8.0, "Major")
         .otherwise("Great")
    ) \
    .withColumn("depth_category",
        F.when(F.col("depth_km") <= 70, "Shallow")
         .when(F.col("depth_km") <= 300, "Intermediate")
         .otherwise("Deep")
    ) \
    .withColumn("hemisphere_ns", F.when(F.col("latitude") >= 0, "Northern").otherwise("Southern")) \
    .withColumn("hemisphere_ew", F.when(F.col("longitude") >= 0, "Eastern").otherwise("Western"))

# 2. Time-based Feature Extraction:
# Extract granular time components (year, month, day, hour, day of week) from the `event_timestamp_utc`.
# These are common dimensions used for time-series analysis, aggregation, and filtering.
df_enriched = df_enriched \
    .withColumn("year", F.year(F.col("event_timestamp_utc"))) \
    .withColumn("month", F.month(F.col("event_timestamp_utc"))) \
    .withColumn("day", F.dayofmonth(F.col("event_timestamp_utc"))) \
    .withColumn("hour", F.hour(F.col("event_timestamp_utc"))) \
    .withColumn("day_of_week", F.dayofweek(F.col("event_timestamp_utc"))) # Note: In Spark, Sunday=1, Saturday=7.

# 3. Geo-spatial Enrichment (Simplified Country/Region Extraction):
# This attempts to extract a country or major region from the 'place' string using a regular expression.
# It's a pragmatic approach for initial data, but with known limitations due to the variability
# of the 'place' field in the source API.
#
# This is a simplification. For robust geographical enrichment in production-grade
# data pipelines, consider the following more advanced and accurate approaches:
# - **Reverse Geocoding Services:** Integrate with external APIs (e.g., Google Maps API, HERE API, Geonames)
#   to get precise country, state, city, and administrative area information from latitude/longitude coordinates.
# - **Spatial Joins:** Perform spatial joins with a pre-existing geospatial dataset (e.g., shapefiles of country
#   boundaries, administrative regions) using dedicated geospatial libraries in Spark (e.g., GeoSpark, Sedona)
#   or by implementing custom User Defined Functions (UDFs).
# - **Advanced Pattern Matching/NLP:** Develop a more sophisticated rule-based system or leverage Natural Language
#   Processing (NLP) techniques to parse the `place` string, as its format can be highly inconsistent.
df_enriched = df_enriched.withColumn(
    "extracted_region_detail",
    F.trim(F.regexp_extract(F.col("place"), r",\s*(.*)$", 1)) # Extracts text after the last comma.
)
df_enriched = df_enriched.withColumn(
    "extracted_country",
    F.when(F.col("extracted_region_detail") != "", F.col("extracted_region_detail"))
     .otherwise(F.trim(F.col("place"))) # If no comma is found, use the entire 'place' string as a fallback.
)
logger.warning("Simplified country/region extraction from 'place' string has been performed. For production environments, robust geocoding services or spatial joins are highly recommended for accuracy.")

# 4. Add Silver Processing Timestamp:
# Adds a timestamp indicating exactly when this record was processed into the Silver layer.
# This column (`silver_processing_timestamp_utc`) is vital for data lineage, auditing,
# and understanding the freshness of the data in the Silver layer.
df_enriched = df_enriched.withColumn("silver_processing_timestamp_utc", PROCESSING_TIMESTAMP_UTC)

logger.info("Feature engineering and enrichment complete.")
logger.info("Sample of enriched data (first 5 records, showing new derived columns):")
df_enriched.select("event_id", "magnitude_category", "depth_category", "extracted_country", "year", "month").show(5, truncate=False)

StatementMeta(, f21af01b-40ba-416d-93ca-65d214b6f00e, 7, Finished, Available, Finished)

2025-06-12 19:39:26,299 - INFO - Feature engineering and enrichment complete.
2025-06-12 19:39:26,300 - INFO - Sample of enriched data (first 5 records, showing new derived columns):


+------------+------------------+--------------+-----------------+----+-----+
|event_id    |magnitude_category|depth_category|extracted_country|year|month|
+------------+------------------+--------------+-----------------+----+-----+
|ak0247kxj8pd|Minor             |Shallow       |Alaska           |2024|6    |
|ak0247kzewoq|Minor             |Shallow       |Alaska           |2024|6    |
|ak0247kzvgsw|Micro             |Shallow       |Alaska           |2024|6    |
|ak0247l1bkwu|Micro             |Shallow       |Alaska           |2024|6    |
|ak0247l3pf99|Light             |Intermediate  |Alaska           |2024|6    |
+------------+------------------+--------------+-----------------+----+-----+
only showing top 5 rows



In [6]:
# --- Cell 6: Save to Silver Layer (Delta Table) ---

# This cell is responsible for persisting the transformed and enriched data into the Silver layer
# of the Lakehouse. The Silver layer serves as a curated, high-quality data source optimized
# for analytical queries and consumption by downstream applications.

# Determine the number of records to write. This check prevents attempting to write an empty DataFrame.
ecords_to_write_count = 0
if 'df_enriched' in locals(): # Check if the df_enriched DataFrame exists and is not empty.
    records_to_write_count = df_enriched.count()

if records_to_write_count > 0: # Proceed only if there are records to write.
    try:
        # Write the Spark DataFrame to a Delta table in the Silver layer.
        # This leverages Delta Lake's capabilities for reliability, schema enforcement,
        # and performance within the Lakehouse environment (e.g., Azure Fabric).
        #
        # `format("delta")`: Specifies the Delta Lake format, enabling ACID transactions,
        #                    schema evolution, and time travel.
        # `mode("overwrite")`: Replaces the entire table with the new data. This is suitable
        #                      for full refreshes, common in initial loads or daily full snapshots
        #                      where the Bronze layer is fully re-ingested.
        #                      For incremental updates (processing only new or changed data),
        #                      `append` or `merge` (using `MERGE INTO` SQL or DataFrame API)
        #                      would be more appropriate.
        # `option("overwriteSchema", "true")`: Allows the target Delta table's schema to be
        #                                       updated if the incoming DataFrame's schema differs.
        #                                       While useful for adapting to source schema changes,
        #                                       use with caution in production, especially for Silver/Gold
        #                                       layers where schema stability is often a priority.
        # `partitionBy("year", "month")`: Partitions the data within the Delta table by the `year`
        #                                  and `month` of the earthquake event. This is a crucial
        #                                  optimization strategy for time-series data, as it significantly
        #                                  improves query performance when filtering by date ranges.
        # `saveAsTable(SILVER_TABLE_NAME)`: Registers the data as a named table within the Lakehouse
        #                                    metadata catalog, making it easily discoverable and queryable
        #                                    via Spark SQL or other integrated tools.
        logger.info(f"Writing {df_enriched.count()} records to Silver table: {SILVER_TABLE_NAME}, partitioned by 'year' and 'month'.")
        df_enriched.write \
                   .format("delta") \
                   .mode("overwrite") \
                   .option("overwriteSchema", "true") \
                   .partitionBy("year", "month") \
                   .saveAsTable(SILVER_TABLE_NAME)

        logger.info(f"Successfully wrote {df_enriched.count()} records to Silver table: {SILVER_TABLE_NAME}.")


    except Exception as e:
        logger.error(f"FATAL ERROR: An error occurred while saving data to Silver table '{SILVER_TABLE_NAME}': {e}", exc_info=True)
        raise Exception(f"Transformation failed during Silver layer write: {e}")
else:
    logger.warning("Skipping write to Silver layer as the enriched DataFrame is empty. No data was available to be written.")


StatementMeta(, f21af01b-40ba-416d-93ca-65d214b6f00e, 8, Finished, Available, Finished)

2025-06-12 19:39:29,945 - INFO - Writing 24506 records to Silver table: silver_earthquakes_cleaned, partitioned by 'year' and 'month'.
2025-06-12 19:39:43,486 - INFO - Successfully wrote 24506 records to Silver table: silver_earthquakes_cleaned.


In [7]:
# --- Cell 7: Display Sample from Silver Table (Optional Verification) ---

# This cell performs a quick verification step by querying the newly created
# or updated Silver Delta table and displaying a sample of its contents.
# This helps confirm that the data was written correctly, its schema is as expected
# after transformations, and that it is accessible within the Lakehouse environment.
# This step is optional but highly recommended during development and for validating deployments.

# Check if `df_enriched` was successfully processed and records were intended to be written.
# This avoids attempting to query an empty or non-existent table if previous steps failed.
if 'records_to_write_count' in locals() and records_to_write_count > 0:
    try:
        # Ensure SparkSession is available to query the table.
        # This check is crucial if this cell were to be run in isolation or
        # after a Spark session might have expired or been reset.
        if 'spark' not in globals() or not isinstance(spark, SparkSession):
            logger.error("SparkSession 'spark' is not initialized for table verification. Attempting to get or create one.")
            spark = SparkSession.builder.appName("EarthquakeSilverVerification").getOrCreate()
            
        logger.info(f"Displaying a sample of 5 records from the Silver table '{SILVER_TABLE_NAME}' for verification.")
        # Retrieve the table as a Spark DataFrame using `spark.table()` and show its first 5 rows.
        # `truncate=False` ensures that column values are not truncated in the output,
        # providing a full view of the data for thorough verification.
        spark.table(SILVER_TABLE_NAME).show(5, truncate=False)
        
        # Log the schema and total count for further verification.
        # This provides a programmatic confirmation of the table's final structure and size.
        logger.info(f"Silver table '{SILVER_TABLE_NAME}' schema:")
        spark.table(SILVER_TABLE_NAME).printSchema()
        logger.info(f"Total records in Silver table '{SILVER_TABLE_NAME}': {spark.table(SILVER_TABLE_NAME).count()}")

    except Exception as e:
        logger.error(f"An error occurred while trying to read and display data from Silver table '{SILVER_TABLE_NAME}': {e}", exc_info=True)
else:
    logger.info("Skipping Silver table display as no data was processed and written to the table in prior steps.")

StatementMeta(, f21af01b-40ba-416d-93ca-65d214b6f00e, 9, Finished, Available, Finished)

2025-06-12 19:39:44,638 - INFO - Displaying a sample of 5 records from the Silver table 'silver_earthquakes_cleaned' for verification.
2025-06-12 19:39:48,389 - INFO - Total records in Silver table 'silver_earthquakes_cleaned': 24506


+------------+-----------------------+-----------------------+---------+--------+--------+---------+-----------------------------------------+----------+-------+---------------+------------+------------+------------+---------------+-------------+-----+--------+--------------------------------------------------------------+-------------------------------------------------+--------------------------+------------------+--------------+-------------+-------------+----+-----+---+----+-----------+-----------------------+-----------------+-------------------------------+
+------------+-----------------------+-----------------------+---------+--------+--------+---------+-----------------------------------------+----------+-------+---------------+------------+------------+------------+---------------+-------------+-----+--------+--------------------------------------------------------------+-------------------------------------------------+--------------------------+------------------+----------