# Earthquake data preprocessing

## A. Load and Inspect Data
This section involves reading the dataset and understanding its structure and content.

In [None]:
# Import necessary library
import pandas as pd

# A. Load and Inspect Data
# Step 1: Load data from the CSV file
# Replace the file path with the actual location of your earthquake data CSV file
file_path = "path_to_file/earthquake_data.csv"  # Adjust file path as needed
data = pd.read_csv(file_path)

# Step 2: Inspect the structure of the dataset
# Display information about the columns, data types, and non-null counts
print("Dataset Information:")
print(data.info())

# Step 3: Preview the first few rows of the dataset
# Display the first 5 rows to understand the content and format of the data
print("\nDataset Preview:")
print(data.head())

## B. Data Cleaning

Steps include handling missing values, filtering relevant rows, and formatting data for analysis.

In [3]:
# B. Data Cleaning
# Step 1: Drop rows with missing values in critical columns
# Critical columns: 'longitude', 'latitude', 'magnitude', and 'depth'
# These columns are essential for earthquake analysis
data.dropna(subset=['longitude', 'latitude', 'magnitude', 'depth'], inplace=True)
print("\nRows with missing critical values dropped.")

# Step 2: Filter only earthquake events
# The dataset might contain other event types; we are only interested in earthquakes
# Filter rows where 'eventtype' is equal to 'earthquake'
data = data[data['eventtype'] == 'earthquake']
print("\nFiltered dataset to include only earthquake events.")

# Step 3: Convert the 'origintime' column to datetime format
# Ensure the 'origintime' column is in datetime format for temporal analysis
data['origintime'] = pd.to_datetime(data['origintime'], errors='coerce')
print("\nConverted 'origintime' column to datetime format.")


## C. Final Dataset Review
The cleaned dataset is inspected again to ensure all preprocessing steps have been correctly applied.

In [None]:
# Final step: Display the cleaned dataset's structure and preview
print("\nCleaned Dataset Information:")
print(data.info())
print("\nCleaned Dataset Preview:")
print(data.head())
