In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

from statsmodels.stats.weightstats import ztest as ztest

In [2]:
earthquake_df = pd.read_csv('../datasets/final_dataset/cleaned_earthquake_data.csv')
earthquake_df.head()

Unnamed: 0,latitude,longitude,depth,magnitude,magnitude_type,distance,gravity,force,year,month,hour,minutes,day_name
0,38.2484,38.1064,4.75,4.1,least damage,403639.392,9.8001,1.79766e+20,2024,7,21,41,Tuesday
1,18.085167,-66.650833,20.18,2.5,least damage,395082.122,9.7854,1.876376e+20,2024,7,21,10,Tuesday
2,60.5382,-151.8092,71.1,3.6,least damage,399158.905,9.8196,1.838243e+20,2024,7,19,53,Tuesday
3,34.1134,86.1206,10.0,4.0,least damage,404343.124,9.7966,1.791408e+20,2024,7,19,18,Tuesday
4,31.499167,-115.628667,8.38,2.72,least damage,397055.439,9.7944,1.857772e+20,2024,7,19,8,Tuesday


In [3]:
earthquake_df.shape

(314351, 13)

## 1. The mass of the Moon is approximately 81 times less than that of Earth. The centre of mass of the Earth-Moon system is located near the Earth due to its significantly larger mass. Gravitational force between earth and moon  play a role in the occurrence of earthquakes.

### Hypothesis Formulation

- Null Hypothesis (h0): The gravitational force between the Moon and Earth does not play a significant role in the occurrence of earthquakes.
- Alternative Hypothesis (H1): The gravitational force between Moon and Earth plays a significant role in the occurrence of earthquakes. 

In [4]:
# Determine the sample sizes
total_size = len(earthquake_df)  # Total number of rows in the dataset
print("Total size: ",total_size)

sample_size_20 = int(0.20 * total_size)  # 20% of the total data

# Select the samples
sample_20 = earthquake_df.sample(n=sample_size_20, random_state=42)

print("sample_25: ", sample_20.shape)
print("Sample size for Hypothesis Tesing is: ", len(sample_20))

Total size:  314351
sample_25:  (62870, 13)
Sample size for Hypothesis Tesing is:  62870


In [5]:
# Calculate the sample mean and standard deviation of the gravity
def get_mean(df, feature):
    mean_value = stats.tmean(df[feature])
    return mean_value;


sample_mean_force = np.round(get_mean(sample_20,'force'), 4)

# Population mean for Earth's gravitational force (assumed)
population_mean_force = np.round(np.mean(earthquake_df['force']),4)

print("Sample Mean: ",sample_mean_force)
print("Population Mean: ",population_mean_force)

Sample Mean:  1.9867652444066e+20
Population Mean:  1.986427977691459e+20


In [6]:
# Perform the Z-test
z_score, p_value = ztest(earthquake_df['force'], value=population_mean_force)

# Output results
print(f"Z-score: {z_score:.4f}")
print(f"P-value: {p_value:.4f}")

# Interpretation
alpha = 0.05  # Significance level
if p_value < alpha:
    print("Reject the null hypothesis. The gravitational force might have a significant effect on earthquake magnitude.")
else:
    print("Fail to reject the null hypothesis. The gravitational force does not have a significant effect on earthquake magnitude.")

Z-score: 0.0000
P-value: 1.0000
Fail to reject the null hypothesis. The gravitational force does not have a significant effect on earthquake magnitude.


## 2. The gravity of Earth exhibits variations from one location to another. These fluctuations in `gravitational acceleration (g)` across different places contribute to or influence seismic activities, such as earthquakes. analysis hypothesis for this statement.

#### Reference: https://kuscholarworks.ku.edu/server/api/core/bitstreams/1f9e5327-a52d-43c1-924c-4e812586162d/content

https://medium.com/@codewithpj/hypothesis-testing-with-python-t-test-z-test-and-p-values-code-examples-fa274dc58c36#:~:text=Z%2Dtest%20is%20used%20to,population%20standard%20deviation%20is%20known.

### Hypothesis Formulation

- Null Hypothesis (h0): Variations in gravitational acceleration across different locations do not have a statistically significant effect on earthquake magnitude. In other words, gravity does not influence seismic activity.

- Alternative Hypothesis (H1): Variations in gravitational acceleration across different locations have a statistically significant effect on earthquake magnitude. In other words, gravity does influence seismic activity.

#### Independent Sample T-test: used to compare the mean values of two different samples from two different populations.



In [7]:
# 2. Hypothesis Testing (T-test)
# Grouping data by gravity (e.g., low gravity vs. high gravity)
low_gravity = earthquake_df[earthquake_df['gravity'] < earthquake_df['gravity'].median()]['magnitude']
high_gravity = earthquake_df[earthquake_df['gravity'] > earthquake_df['gravity'].median()]['magnitude']
print("Median Gravity:", earthquake_df['gravity'].median())


t_stat, p_value = stats.ttest_ind(low_gravity, high_gravity, equal_var=False)
print(f"T-test Statistic: {t_stat:.4f}, P-value: {p_value:.4f}")

# Interpretation of p-value
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. Gravity variations have a significant effect on earthquake magnitude.")
else:
    print("Fail to reject the null hypothesis. Gravity variations do not have a significant effect on earthquake magnitude.")

Median Gravity: 9.7947
T-test Statistic: 154.1018, P-value: 0.0000
Reject the null hypothesis. Gravity variations have a significant effect on earthquake magnitude.


## 3. The distance between the Moon and Earth undergoes periodic changes. There a correlation between these variations in the Moon-Earth distance and the incidence of earthquakes.

### Hypothesis Formulation

- Null Hypothesis (h0): There is no significant correlation between variations in the Moon-Earth distance and the incidence of earthquakes. In other words, the Moon-Earth distance does not influence the occurrence of earthquakes.

- Alternative Hypothesis (H1): There is a significant correlation between variations in the Moon-Earth distance and the incidence of earthquakes. In other words, the Moon-Earth distance does influence the occurrence of earthquakes.

In [11]:
# Pearson correlation
correlation = earthquake_df[['distance', 'magnitude']].corr().iloc[0, 1]
print(f"Pearson Correlation between Moon-Earth Distance and Magnitude: {correlation:.4f}")

Pearson Correlation between Moon-Earth Distance and Magnitude: -0.0072


In [13]:
# Grouping data by Moon-Earth distance (low vs. high distance)
median_distance = earthquake_df['distance'].median()
low_distance = earthquake_df[earthquake_df['distance'] < median_distance]['magnitude']
high_distance = earthquake_df[earthquake_df['distance'] >= median_distance]['magnitude']
print("Median Distance:", earthquake_df['distance'].median())

Median Distance: 385622.088


In [10]:
# Perform T-test
t_stat, p_value = stats.ttest_ind(low_distance, high_distance, equal_var=False)
print(f"T-test Statistic: {t_stat:.4f}, P-value: {p_value:.4f}")

# Interpretation of p-value
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. Moon-Earth distance variations have a significant effect on earthquake magnitude.")
else:
    print("Fail to reject the null hypothesis. Moon-Earth distance variations do not have a significant effect on earthquake magnitude.")

T-test Statistic: 4.7329, P-value: 0.0000
Reject the null hypothesis. Moon-Earth distance variations have a significant effect on earthquake magnitude.
