In [4]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


file_path = '/content/Earthquakes_database.csv'
earthquakes = pd.read_csv(file_path)

print("Initial dataset:")
print(earthquakes.head())

print("\nMissing values:")
print(earthquakes.isnull().sum())


earthquakes.dropna(inplace=True)

earthquakes['Date'] = pd.to_datetime(earthquakes['Date'])
earthquakes['Time'] = earthquakes['Date'].dt.time
earthquakes['Date'] = earthquakes['Date'].dt.date

earthquakes['Magnitude Category'] = pd.cut(earthquakes['Magnitude'], bins=[0, 5, 7, 10], labels=['Minor', 'Moderate', 'Major'])


pivot_table = pd.pivot_table(earthquakes, index='Year', columns='Magnitude Category', values='Magnitude', aggfunc='count')


plt.figure(figsize=(12, 6))
earthquakes.groupby('Year').size().plot(kind='line')
plt.title('Earthquake Count over the Years')
plt.xlabel('Year')
plt.ylabel('Count')
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 6))
sns.countplot(x='Magnitude Category', data=earthquakes, palette='viridis')
plt.title('Earthquake Count by Magnitude Category')
plt.xlabel('Magnitude Category')
plt.ylabel('Count')
plt.show()

plt.figure(figsize=(10, 6))
sns.scatterplot(x='Magnitude', y='Depth', data=earthquakes, hue='Magnitude Category', palette='Set2')
plt.title('Magnitude vs Depth')
plt.xlabel('Magnitude')
plt.ylabel('Depth (km)')
plt.show()


plt.figure(figsize=(8, 8))
earthquakes['Type'].value_counts().plot(kind='pie', autopct='%1.1f%%', startangle=90, cmap='Paired')
plt.title('Distribution of Earthquake Types')
plt.ylabel('')
plt.show()


earthquakes['Month'] = earthquakes['Date'].apply(lambda x: x.month)
earthquakes['Year'] = earthquakes['Date'].apply(lambda x: x.year)

resampled_data = earthquakes.resample('Y', on='Date')['Magnitude'].mean()


print("\nProcessed dataset:")
print(earthquakes.head())

processed_file_path = '/content/Earthquakes_processed.csv'
earthquakes.to_csv(processed_file_path, index=False)
print(f"\nProcessed dataset saved to {processed_file_path}")
