In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv("earthquak_preporcessed.csv")

In [3]:
data.sample(10)

Unnamed: 0,title,magnitude,date_time,cdi,mmi,alert,tsunami,sig,net,nst,dmin,gap,magType,depth,latitude,longitude,location,continent,country
42,"M 7.0 - 95 km ENE of Kuril’sk, Russia",7.0,13-02-2020 10:33,5,6,yellow,1,770,us,0,4.501,25.0,mww,143.0,45.6161,148.959,"Kuril’sk, Russia",Asia,Russia
111,"M 6.8 - 54 km N of Vallenar, Chile",6.8,30-01-2013 20:15,6,7,green,0,771,us,596,0.0,19.3,mww,45.0,-28.094,-70.653,"Vallenar, Chile",South America,Chile
90,"M 6.7 - 21 km SSE of Kod?ri??, Nepal",6.7,26-04-2015 07:09,7,7,yellow,0,1016,us,0,0.727,13.0,mww,22.91,27.7711,86.0173,"Kod?ri??, Nepal",Asia,Nepal
14,"M 6.5 - 96 km SE of Lugu, Taiwan",6.5,17-09-2022 13:41,7,7,green,1,756,us,178,0.43,54.0,mww,10.0,23.029,121.348,"Lugu, Taiwan",Asia,Taiwan
22,"M 6.7 - 166 km W of Pariaman, Indonesia",6.7,13-03-2022 21:09,9,6,green,0,708,us,0,2.188,43.0,mww,28.0,-0.6831,98.6034,"Pariaman, Indonesia",Asia,Indonesia
33,"M 7.3 - Southern Qinghai, China",7.3,21-05-2021 18:04,9,9,orange,0,1025,us,0,4.655,9.0,mww,10.0,34.5861,98.2551,"Southern Qinghai, China",Asia,People's Republic of China
62,"M 6.6 - 81 km NNE of Ust’-Kamchatsk Staryy, Ru...",6.6,29-03-2017 04:09,6,7,yellow,1,671,us,0,4.591,14.0,mww,17.0,56.9401,162.786,"Ust’-Kamchatsk Staryy, Russia",Asia,Russia
108,"M 6.6 - 183 km SW of Belaya Gora, Russia",6.6,14-02-2013 13:13,0,7,green,1,670,us,697,0.0,15.3,mww,11.0,67.631,142.508,"Belaya Gora, Russia",Asia,Russia
106,"M 6.5 - 87 km SE of Ozernovskiy, Russia",6.5,01-03-2013 13:20,3,6,green,0,651,us,480,0.0,20.7,mww,29.0,50.958,157.408,"Ozernovskiy, Russia",Asia,Russia
68,"M 6.8 - 26 km W of Chauk, Myanmar",6.8,24-08-2016 10:34,6,6,yellow,0,981,us,0,1.802,17.0,mww,82.0,20.9228,94.569,"Chauk, Myanmar",Asia,Myanmar


In [4]:
data.isnull().sum()

title        0
magnitude    0
date_time    0
cdi          0
mmi          0
alert        0
tsunami      0
sig          0
net          0
nst          0
dmin         0
gap          0
magType      0
depth        0
latitude     0
longitude    0
location     0
continent    0
country      0
dtype: int64

In [5]:
converted_dates = pd.to_datetime(data["date_time"], format='%d-%m-%Y %H:%M')


formatted_dates = converted_dates.dt.strftime('%Y-%m-%d')

data["date_time"] = formatted_dates

# Linear regression for Future earthquake prediction

In [6]:

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from datetime import datetime, timedelta


In [7]:
data['timestamp'] = data['date_time'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d').timestamp())
data['year'] = pd.to_datetime(data['date_time']).dt.year
data['month'] = pd.to_datetime(data['date_time']).dt.month
data['day'] = pd.to_datetime(data['date_time']).dt.day
data['weekday'] = pd.to_datetime(data['date_time']).dt.weekday


In [8]:
features = ['latitude', 'longitude', 'timestamp', 'year', 'month', 'day', 'weekday', 'cdi', 'mmi', 'tsunami', 'sig', 'nst', 'dmin', 'gap', 'depth']
target = 'magnitude'

X = data[features]
y = data[target]


In [9]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


model = LinearRegression()
model.fit(X_scaled, y)




In [10]:
def predict_future_magnitude(model, date, latitude, longitude):
    future_date_timestamp = datetime.strptime(date, '%Y-%m-%d').timestamp()
    future_year = pd.to_datetime(date).year
    future_month = pd.to_datetime(date).month
    future_day = pd.to_datetime(date).day
    future_weekday = pd.to_datetime(date).weekday()

    future_features = [latitude, longitude, future_date_timestamp, future_year, future_month, future_day, future_weekday, 0, 0, 0, 0, 0, 0, 0, 0]

    scaled_features = scaler.transform([future_features])

    predicted_magnitude = model.predict(scaled_features)
    return predicted_magnitude[0]

future_date = '2024-01-01'
latitude_value = 40.7128
longitude_value = -74.0060

predicted_future_magnitude = predict_future_magnitude(model, future_date, latitude_value, longitude_value)
print(f"The predicted magnitude for {future_date} at latitude {latitude_value} and longitude {longitude_value} is: {predicted_future_magnitude}")


The predicted magnitude for 2024-01-01 at latitude 40.7128 and longitude -74.006 is: 5.02705222282823




# Logistic Regression for Tsunami prediction (probability of ocanic earthquake)

In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


features = ['latitude', 'longitude', 'timestamp', 'year', 'month', 'day', 'weekday',
            'cdi', 'mmi', 'magnitude', 'sig', 'nst', 'dmin', 'gap', 'depth']
target = 'tsunami'

X = data[features]
y = data[target]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


model = LogisticRegression()
model.fit(X_train, y_train)


y_pred = model.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.57
Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.33      0.44        12
           1       0.53      0.82      0.64        11

    accuracy                           0.57        23
   macro avg       0.60      0.58      0.54        23
weighted avg       0.60      0.57      0.54        23

Confusion Matrix:
[[4 8]
 [2 9]]


In [20]:

def predict_single_data(model, scaler, features, data_point):

    data_point_arr = [data_point[feature] for feature in features]
    data_point_arr = scaler.transform([data_point_arr])  #


    prediction = model.predict(data_point_arr)
    probability = model.predict_proba(data_point_arr)

    return prediction[0], probability[0][1]


individual_data_point = {
    'latitude': 40.7128,
    'longitude': -74.0060,
    'timestamp': 1640380800,
    'year': 2022,
    'month': 12,
    'day': 25,
    'weekday': 6,
    'cdi': 4.5,
    'mmi': 5.0,
    'magnitude': 7.2,
    'sig': 200,
    'nst': 50,
    'dmin': 0.2,
    'gap': 30,
    'depth': 15
}

prediction, probability = predict_single_data(model, scaler, features, individual_data_point)

print(f"Prediction: {'Tsunami in oceanic region' if prediction == 1 else 'No tsunami or non-oceanic region tsunami'}")
print(f"Probability of tsunami occurrence: {probability:.2f}")


Prediction: Tsunami in oceanic region
Probability of tsunami occurrence: 1.00


