In [None]:
# Step 1: Install dependencies
!pip install scikit-learn matplotlib seaborn



In [6]:
# Step 2: Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

# Step 3: Load dataset
df = pd.read_csv("earthquake_data.csv")

print(" Original Data (first 5 rows):")
print(df.head())
print("\n Initial info:")
print(df.info())

# Step 4: Clean data
print("\n Cleaning data...")

# Drop missing values
df = df.dropna()

# Convert timestamps to datetime
df['Time'] = pd.to_datetime(df['Time'], unit='ms')

# Drop duplicates
df = df.drop_duplicates()

# Remove invalid magnitudes (<= 0)
df = df[df['Magnitude'] > 0]

print(f" After cleaning: {len(df)} rows remaining.")

#  Step 5: Normalize numerical columns
scaler = MinMaxScaler()
df[['Latitude', 'Longitude', 'Depth']] = scaler.fit_transform(df[['Latitude', 'Longitude', 'Depth']])

print("\n Normalized value summaries:")
print(df[['Latitude', 'Longitude', 'Depth', 'Magnitude']].describe())

#  Step 6: Check for duplicates or missing data
print("\n Duplicates remaining:", df.duplicated().sum())
print(" Missing values after cleaning:\n", df.isnull().sum())

#  Step 7: Save cleaned data
df.to_csv("cleaned_earthquake_data.csv", index=False)
print("\n Cleaned dataset saved as 'cleaned_earthquake_data.csv'")

 Original Data (first 5 rows):
            Time  Magnitude  Longitude  Latitude    Depth
0  1762431290137        4.7   -91.8044   14.0356   43.450
1  1762431193282        4.7   -94.1135   17.4573  176.349
2  1762430573776        4.3   123.9564   -9.0682   94.509
3  1762426440665        4.9   142.6505   51.3854   10.000
4  1762424774245        4.4  -111.9240   27.8252   10.000

 Initial info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Time       1000 non-null   int64  
 1   Magnitude  1000 non-null   float64
 2   Longitude  1000 non-null   float64
 3   Latitude   1000 non-null   float64
 4   Depth      1000 non-null   float64
dtypes: float64(4), int64(1)
memory usage: 39.2 KB
None

 Cleaning data...
 After cleaning: 1000 rows remaining.

 Normalized value summaries:
          Latitude    Longitude        Depth    Magnitude
count  1000.000000 