In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("quakes-cleaned.csv")

required_columns = ['latitude', 'longitude', 'mag', 'time', 'place']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
    raise ValueError(f"Missing required columns in the dataset: {missing_columns}")
df['time'] = pd.to_datetime(df['time'], errors='coerce')

# 1. Total number of earthquakes

In [None]:
total_earthquakes = df.shape[0]
print(f"Total Earthquakes: {total_earthquakes}")

# 2. Highest magnitude earthquake

In [None]:
max_magnitude = df['mag'].max()
max_magnitude_event = df[df['mag'] == max_magnitude]
print(f"Highest Magnitude Earthquake:\n{max_magnitude_event}")

# 3. Time of the earliest earthquake

In [None]:
earliest_earthquake = df['time'].min()
print(f"Earliest Earthquake Time: {earliest_earthquake}")

# 4. Regions Where Earthquakes Rarely Exceed a Minor Magnitude

In [None]:
df['is_minor'] = df['mag'] < 4.0
place_minor_percentage = (
    df.groupby('place')['is_minor']
    .mean()
    .reset_index(name='minor_quake_percentage')
    .sort_values(by='minor_quake_percentage', ascending=False)
)

# 5. High-Risk Zones (Clustering-like approach using rounded latitude and longitude)

In [None]:
df['rounded_lat'] = df['latitude'].round(1)
df['rounded_lon'] = df['longitude'].round(1)

high_risk_zones = (
    df.groupby(['rounded_lat', 'rounded_lon'])
    .size()
    .reset_index(name='earthquake_count')
    .sort_values(by='earthquake_count', ascending=False)
)

print("\nTop 5 High-Risk Zones (by earthquake count):")
print(high_risk_zones.head(5))

# 6. The Hour When Earthquakes Are Most Likely to Occur

In [None]:
df['hour_of_day'] = df['time'].dt.hour
hourly_distribution = df['hour_of_day'].value_counts().sort_index()
total_earthquakes = hourly_distribution.sum()
hourly_probabilities = (hourly_distribution / total_earthquakes) * 100

# 7. Identify the hour with the highest probability

In [None]:
most_likely_hour = hourly_probabilities.idxmax()
highest_probability = hourly_probabilities.max()
print(f"\nThe hour when earthquakes are most likely to occur is {most_likely_hour}:00, "
      f"with a probability of {highest_probability:.2f}%.")

# 8. Most Affected Places (Earthquake Frequency by Place)

In [None]:
place_counts = df['place'].value_counts().sort_values(ascending=False)
most_affected_places = place_counts.head(5)
print("\nMost Affected Places:")
print(most_affected_places)

# 9. Highlight places where > 90% of earthquakes are minor

In [None]:
quiet_zones = place_minor_percentage[place_minor_percentage['minor_quake_percentage'] > 0.9]
print("\nRegions where earthquakes rarely exceed a minor magnitude (Quiet Zones):")
print(quiet_zones)

# 10. Average magnitude of all earthquakes

In [None]:
avg_magnitude = df['mag'].mean()
print(f"Average Magnitude: {avg_magnitude}")

# 11. Classification of earthquakes into minor, moderate, major, and great earthquakes

In [None]:
def classify_magnitude(mag):
    if mag < 4.0:
        return "Minor"
    elif 4.0 <= mag < 6.0:
        return "Moderate"
    elif 6.0 <= mag < 8.0:
        return "Major"
    else:
        return "Great"

df['magnitude_class'] = df['mag'].apply(classify_magnitude)
classification_counts = df['magnitude_class'].value_counts()
print("Classification of Earthquakes by Magnitude:")
print(classification_counts)

# 12. Identification of urban centers in high-risk zones

In [None]:
urban_centers = pd.DataFrame({
    'city': ['CityA', 'CityB', 'CityC'],  # Replace with actual data
    'latitude': [34.05, 40.71, 51.51],
    'longitude': [-118.24, -74.01, -0.13]
})

# 13. Merge earthquake clusters with urban centers to find matches

In [None]:
urban_centers['high_risk_cluster'] = urban_centers.apply(
    lambda row: any(
        ((row['latitude'] - center[0])**2 + (row['longitude'] - center[1])**2) < 1.0
        for center in cluster_centers
    ), axis=1
)
high_risk_urban_centers = urban_centers[urban_centers['high_risk_cluster']]
print("Urban Centers in High-Risk Zones:")
print(high_risk_urban_centers)

# 14. The Most Seismic Activity in a Local Area

In [None]:
most_seismic_area = high_risk_zones.iloc[0]
print(f"\nMost Seismic Area: Latitude {most_seismic_area['latitude']}, Longitude {most_seismic_area['longitude']} with {most_seismic_area['count']} events.")