In [1]:
# Title: Chapter 2 Exercises page 111
# Author: Stefanie Molin
# Date: 13 September 2024 
# Modified By: Andres Melendez
# Description: The following are Exercises 1-6 in the Hands-On Data Analysis (2nd Edition), page 111. 

In [2]:
import pandas as pd

def calculate_summary_statistics(dataframe, place):
    """
    Filter the DataFrame for earthquakes in a given place and return the filtered DataFrame.

    Parameters:
    dataframe (pd.DataFrame): The DataFrame containing earthquake data.
    place (str): The location to filter by (e.g., 'Japan').

    Returns:
    pd.DataFrame: The filtered DataFrame for the specified place.
    """
    # Filter the DataFrame for earthquakes in the specified place
    filtered_data = dataframe[dataframe['parsed_place'].str.contains(place)]
    
    # Return the filtered DataFrame
    return filtered_data

try:
    # Attempt to load the earthquake data
    df = pd.read_csv('data/parsed.csv')
except FileNotFoundError as e:
    # Handle the case where the file is not found
    print(f"Error: {e}. Please ensure the file path is correct.")
finally:
    print("File loading attempt completed.")



File loading attempt completed.


In [3]:
# Filter the DataFrame for Japan earthquakes with 'mb' magnitude type
# The filter checks if the 'parsed_place' contains 'Japan' and if the 'magType' equals 'mb'.
japan_earthquakes = calculate_summary_statistics(df, 'Japan')
japan_mb_earthquakes = japan_earthquakes[japan_earthquakes['magType'] == 'mb']

# Calculate the 95th percentile for the magnitude data
# Using the quantile method to determine the 95th percentile of the 'mag' column.
percentile_95 = japan_mb_earthquakes['mag'].quantile(0.95)

# Output the result
print(f"The 95th percentile of earthquake magnitudes in Japan with 'mb' magnitude type is: {percentile_95}")



The 95th percentile of earthquake magnitudes in Japan with 'mb' magnitude type is: 4.9


In [4]:
# Filter the DataFrame for earthquakes in Indonesia
# The filter uses 'parsed_place' to check for records that contain 'Indonesia'.
indonesia_earthquakes = calculate_summary_statistics(df, 'Indonesia')

# Calculate the total number of earthquakes in Indonesia
total_earthquakes = len(indonesia_earthquakes)

# Further filter for earthquakes that triggered tsunamis
# A 'tsunami' value of 1 indicates that a tsunami occurred with the earthquake.
tsunami_earthquakes = indonesia_earthquakes[indonesia_earthquakes['tsunami'] == 1]

# Calculate the number of such earthquakes
total_tsunami_earthquakes = len(tsunami_earthquakes)

# Calculate and return the percentage of earthquakes that were coupled with tsunamis
percentage_tsunami_earthquakes = (total_tsunami_earthquakes / total_earthquakes) * 100

percentage_tsunami_earthquakes


23.12925170068027

In [5]:
# Using the calculate_summary_statistics function to filter data for Nevada
nevada_earthquakes = calculate_summary_statistics(df, 'Nevada')

# Calculate the summary statistics for earthquakes in Nevada
# 'describe' is a built-in pandas method that provides summary statistics
summary_statistics_nevada = nevada_earthquakes['mag'].describe()

# Output the summary statistics
summary_statistics_nevada

count    681.000000
mean       0.500073
std        0.696710
min       -0.500000
25%       -0.100000
50%        0.400000
75%        0.900000
max        2.900000
Name: mag, dtype: float64

In [6]:
def is_ring_of_fire(location):
    """
    Check if the location is part of the Ring of Fire.

    Parameters:
    location (str): The location to check (e.g., 'Japan').

    Returns:
    bool: True if the location is on the Ring of Fire, False otherwise.
    """
    ring_of_fire_locations = [
        'Alaska', 'Antarctic', 'Bolivia', 'California', 'Canada', 'Chile',
        'Costa Rica', 'Ecuador', 'Fiji', 'Guatemala', 'Indonesia', 'Japan',
        'Kermadec Islands', 'Mexico', 'New Zealand', 'Peru', 'Philippines',
        'Russia', 'Taiwan', 'Tonga', 'Washington'
    ]
    # Check if any of the Ring of Fire locations are in the 'location' string
    return any(loc in location for loc in ring_of_fire_locations)

In [7]:
# Add a column to indicate if the earthquake occurred in a Ring of Fire location
df['Ring_of_Fire'] = df['parsed_place'].apply(is_ring_of_fire)

# Output the first few rows to check the new column and the summary statistics
df.head(), summary_statistics_nevada

(  alert  cdi      code                                             detail  \
 0   NaN  NaN  37389218  https://earthquake.usgs.gov/fdsnws/event/1/que...   
 1   NaN  NaN  37389202  https://earthquake.usgs.gov/fdsnws/event/1/que...   
 2   NaN  4.4  37389194  https://earthquake.usgs.gov/fdsnws/event/1/que...   
 3   NaN  NaN  37389186  https://earthquake.usgs.gov/fdsnws/event/1/que...   
 4   NaN  NaN  73096941  https://earthquake.usgs.gov/fdsnws/event/1/que...   
 
        dmin  felt    gap           ids   mag magType  ...           time  \
 0  0.008693   NaN   85.0  ,ci37389218,  1.35      ml  ...  1539475168010   
 1  0.020030   NaN   79.0  ,ci37389202,  1.29      ml  ...  1539475129610   
 2  0.021370  28.0   21.0  ,ci37389194,  3.42      ml  ...  1539475062610   
 3  0.026180   NaN   39.0  ,ci37389186,  0.44      ml  ...  1539474978070   
 4  0.077990   NaN  192.0  ,nc73096941,  2.16      md  ...  1539474716050   
 
                            title  tsunami        type  \
 0  M 1.

In [8]:
# Calculate the number of earthquakes in Ring of Fire locations
earthquakes_in_ring_of_fire = df[df['Ring_of_Fire'] == True].shape[0]

# Calculate the number of earthquakes outside Ring of Fire locations
earthquakes_outside_ring_of_fire = df[df['Ring_of_Fire'] == False].shape[0]

# Output the counts for Ring of Fire and non-Ring of Fire locations
print(f"Number of earthquakes in Ring of Fire locations: {earthquakes_in_ring_of_fire}")
print(f"Number of earthquakes outside Ring of Fire locations: {earthquakes_outside_ring_of_fire}")


Number of earthquakes in Ring of Fire locations: 7189
Number of earthquakes outside Ring of Fire locations: 2143


In [9]:
# Calculate the number of tsunamis that occurred along the Ring of Fire
tsunami_count_ring_of_fire = df[(df['Ring_of_Fire'] == True) & (df['tsunami'] == 1)].shape[0]

# Output the tsunami count
print(f"Number of tsunamis along the Ring of Fire: {tsunami_count_ring_of_fire}")


Number of tsunamis along the Ring of Fire: 45
