MLP DATASET 1

In [2]:

import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
%run MLProject.ipynb
df = pd.read_csv('usgs_main.csv')
df1 = df.copy()
df1['time'] = pd.to_datetime(df1['time'])
df1 = df1.sort_values('time')
df1 = df1.dropna(subset=['latitude','longitude','depth','mag','time'])


dfweek_mlp = df1.set_index('time').resample('W').apply({
    'mag':'mean',
    'latitude':'mean',
    'longitude':'mean',
    'depth':'mean',
    'magType': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else x.iloc[0],
    'type': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else x.iloc[0],
    'status': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else x.iloc[0],
    'net': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else x.iloc[0]
})

dfweek_mlp = dfweek_mlp.reset_index(drop=True)
dfweek_mlp.index = dfweek_mlp.index + 1
dfweek_mlp.index.name = 'timeindex'

dfweek_mlp['futuremag'] = dfweek_mlp['mag'].shift(-1)
dfweek_mlp['futuredepth'] = dfweek_mlp['depth'].shift(-1)
dfweek_mlp['futurelat'] = dfweek_mlp['latitude'].shift(-1)
dfweek_mlp['futurelon'] = dfweek_mlp['longitude'].shift(-1)
dfweek_mlp = dfweek_mlp.dropna(subset=['futuremag','futuredepth','futurelat','futurelon'])


mlp_hiperparametreler = {
    'hidden_layer_sizes': [(50,), (100,),(150,), (50, 50), (100, 50),(150,100), (100, 100)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'lbfgs'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.001, 0.01, 0.1],
    'max_iter': [500, 1000, 1500],
    'shiftnum': [2, 3, 4]
}


train_mlp = dfweek_mlp[:int((4*len(dfweek_mlp))/5)]
test_mlp = dfweek_mlp[int(4*len(dfweek_mlp)/5):]


splitter_mlp = TimeSeriesSplit(n_splits=3)
grid_mlp = GridSearchCV(
    estimator=MLPEarthquakePredictor(random_state=42),
    param_grid=mlp_hiperparametreler,
    cv=splitter_mlp,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=1
)

grid_mlp.fit(train_mlp)
best_model_mlp = grid_mlp.best_estimator_

test_predictions_mlp = best_model_mlp.predict(test_mlp)
test_actual_mlp = test_mlp[['futuremag', 'futuredepth', 'futurelat', 'futurelon']].dropna()
min_len_mlp = min(len(test_predictions_mlp), len(test_actual_mlp))


target_names = ['futuremag', 'futuredepth', 'futurelat', 'futurelon']
target_labels = ['Magnitude', 'Depth', 'Latitude', 'Longitude']

for i, (name, label) in enumerate(zip(target_names, target_labels)):
    if i < test_predictions_mlp.shape[1] and i < test_actual_mlp.shape[1]:
        target_mse = mean_squared_error(
            test_actual_mlp.iloc[:min_len_mlp, i], 
            test_predictions_mlp[:min_len_mlp, i]
        )
        target_mae = mean_absolute_error(
            test_actual_mlp.iloc[:min_len_mlp, i], 
            test_predictions_mlp[:min_len_mlp, i]
        )
        target_r2 = r2_score(
            test_actual_mlp.iloc[:min_len_mlp, i], 
            test_predictions_mlp[:min_len_mlp, i]
        )
        
        print(f"   {label:12}: MSE={target_mse:.3f}, MAE={target_mae:.3f}, R²={target_r2:.3f}")

Fitting 3 folds for each of 2268 candidates, totalling 6804 fits
   Magnitude   : MSE=0.016, MAE=0.091, R²=-0.680
   Depth       : MSE=8.188, MAE=2.371, R²=-0.355
   Latitude    : MSE=2.200, MAE=1.202, R²=-0.428
   Longitude   : MSE=40.376, MAE=5.353, R²=-1.199




In [None]:

import joblib
import json
import os
os.makedirs('models', exist_ok=True)#bu kod bloğunda AI dan nasıl model kaydedilir öğrendim
joblib.dump(best_model_mlp, 'models/mlp_dataset1.pkl')
print(grid_mlp.best_params_)
mlp_best_params = {
    'dataset1_params': grid_mlp.best_params_,
}


{'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (50,), 'learning_rate_init': 0.001, 'max_iter': 500, 'shiftnum': 2, 'solver': 'adam'}


MLP DATASET 2

In [4]:
pn = pd.read_csv('Significant Earthquake Dataset 1900-2023.csv')
df2 = pn.copy()
df2 = df2.rename(columns={
    'Time':'time', 'Mag':'mag', 'Depth':'depth', 
    'Latitude':'latitude', 'Longitude':'longitude'
})

df2['time'] = pd.to_datetime(df2['time'])
df2 = df2.sort_values('time')
df2 = df2.dropna(subset=['latitude','longitude','depth','mag','time'])


dfyear_mlp = df2.set_index('time').resample('YE').apply({
    'mag':'mean',
    'latitude':'mean',
    'longitude':'mean',
    'depth':'mean',#daha önce de belirttiğim gibi magtype tarzı değişkenleri oluştururken yapay zeka yardımı aldım
    'MagType': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 and len(x) > 0 else 'Unknown',
    'Type': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 and len(x) > 0 else 'Unknown',
    'status': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 and len(x) > 0 else 'Unknown',
    'net': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 and len(x) > 0 else 'Unknown'
})

dfyear_mlp = dfyear_mlp.rename(columns={'MagType':'magType','Type':'type'})

dfyear_mlp = dfyear_mlp.reset_index(drop=True)
dfyear_mlp.index = dfyear_mlp.index + 1
dfyear_mlp.index.name = 'timeindex'

dfyear_mlp['futuremag'] = dfyear_mlp['mag'].shift(-1)
dfyear_mlp['futuredepth'] = dfyear_mlp['depth'].shift(-1)
dfyear_mlp['futurelat'] = dfyear_mlp['latitude'].shift(-1)
dfyear_mlp['futurelon'] = dfyear_mlp['longitude'].shift(-1)
dfyear_mlp = dfyear_mlp.dropna(subset=['futuremag','futuredepth','futurelat','futurelon'])

mlp_hiperparametreler_2 = {
    'hidden_layer_sizes': [(100,), (200,), (100, 50),(200,50) ,(200, 100), (150, 75),(200,75),(150,100)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'lbfgs'],
    'alpha': [0.001, 0.01, 0.1],
    'learning_rate_init': [0.001, 0.02, 0.04],
    'max_iter': [900, 1750, 2000],
    'shiftnum': [3, 4, 5]
}

train_mlp_2 = dfyear_mlp[:int((4*len(dfyear_mlp))/5)]
test_mlp_2 = dfyear_mlp[int(4*len(dfyear_mlp)/5):]

splitter_mlp_2 = TimeSeriesSplit(n_splits=3)
grid_mlp_2 = GridSearchCV(
    estimator=MLPEarthquakePredictor(random_state=42),
    param_grid=mlp_hiperparametreler_2,
    cv=splitter_mlp_2,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=1
)

grid_mlp_2.fit(train_mlp_2)
best_model_mlp_2 = grid_mlp_2.best_estimator_

test_predictions_mlp_2 = best_model_mlp_2.predict(test_mlp_2)
test_actual_mlp_2 = test_mlp_2[['futuremag', 'futuredepth', 'futurelat', 'futurelon']].dropna()
min_len_mlp_2 = min(len(test_predictions_mlp_2), len(test_actual_mlp_2))

for i, (name, label) in enumerate(zip(target_names, target_labels)):
    if i < test_predictions_mlp_2.shape[1] and i < test_actual_mlp_2.shape[1]:
        target_mse = mean_squared_error(
            test_actual_mlp_2.iloc[:min_len_mlp_2, i], 
            test_predictions_mlp_2[:min_len_mlp_2, i]
        )
        target_mae = mean_absolute_error(
            test_actual_mlp_2.iloc[:min_len_mlp_2, i], 
            test_predictions_mlp_2[:min_len_mlp_2, i]
        )
        target_r2 = r2_score(
            test_actual_mlp_2.iloc[:min_len_mlp_2, i], 
            test_predictions_mlp_2[:min_len_mlp_2, i]
        )
        
        print(f"   {label:12}: MSE={target_mse:.3f}, MAE={target_mae:.3f}, R²={target_r2:.3f}")

Fitting 3 folds for each of 2592 candidates, totalling 7776 fits
   Magnitude   : MSE=0.003, MAE=0.046, R²=-10.041
   Depth       : MSE=83.883, MAE=7.526, R²=0.257
   Latitude    : MSE=21.617, MAE=3.830, R²=-0.853
   Longitude   : MSE=567.805, MAE=19.930, R²=-2.208




In [13]:
joblib.dump(best_model_mlp_2, 'models/mlp_dataset2.pkl')
mlp_best_params['dataset2_params'] = grid_mlp_2.best_params_
print(grid_mlp_2.best_params_)

{'activation': 'relu', 'alpha': 0.001, 'hidden_layer_sizes': (100,), 'learning_rate_init': 0.001, 'max_iter': 900, 'shiftnum': 3, 'solver': 'adam'}


MLP EXTRA-BURADA YILLIK TİMESTEPLER İÇEREN DATASETİ HAFTALIK TİMESTEPLERE BÖLDÜM

In [6]:
extra= pd.read_csv('Significant Earthquake Dataset 1900-2023.csv')
df3 = extra.copy()
df3 = df3.rename(columns={
    'Time':'time', 'Mag':'mag', 'Depth':'depth', 
    'Latitude':'latitude', 'Longitude':'longitude'
})

df3['time'] = pd.to_datetime(df3['time'])
df3 = df3.sort_values('time')
df3 = df3.dropna(subset=['latitude','longitude','depth','mag','time'])


dfextra_mlp = df3.set_index('time').resample('W').apply({
    'mag':'mean',
    'latitude':'mean',
    'longitude':'mean',
    'depth':'mean',#üsttekiler gibi, aynı şekilde
    'MagType': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 and len(x) > 0 else 'Unknown',
    'Type': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 and len(x) > 0 else 'Unknown',
    'status': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 and len(x) > 0 else 'Unknown',
    'net': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 and len(x) > 0 else 'Unknown'
})

dfextra_mlp = dfextra_mlp.rename(columns={'MagType':'magType','Type':'type'})

dfextra_mlp = dfextra_mlp.reset_index(drop=True)
dfextra_mlp.index = dfextra_mlp.index + 1
dfextra_mlp.index.name = 'timeindex'

dfextra_mlp['futuremag'] = dfextra_mlp['mag'].shift(-1)
dfextra_mlp['futuredepth'] = dfextra_mlp['depth'].shift(-1)
dfextra_mlp['futurelat'] = dfextra_mlp['latitude'].shift(-1)
dfextra_mlp['futurelon'] = dfextra_mlp['longitude'].shift(-1)
dfextra_mlp = dfextra_mlp.dropna(subset=['futuremag','futuredepth','futurelat','futurelon'])

mlp_hiperparametreler_3 = {
    'hidden_layer_sizes': [(100,), (200,), (100, 50),(200,50) ,(200, 100), (150, 75),(200,75),(150,100)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'lbfgs'],
    'alpha': [0.001, 0.01, 0.1],
    'learning_rate_init': [0.001, 0.02, 0.04],
    'max_iter': [900, 1750, 2000],
    'shiftnum': [3, 4, 5]
}

train_mlp_3 = dfextra_mlp[:int((4*len(dfextra_mlp))/5)]
test_mlp_3 = dfextra_mlp[int(4*len(dfextra_mlp)/5):]

splitter_mlp_3 = TimeSeriesSplit(n_splits=3)
grid_mlp_3 = GridSearchCV(
    estimator=MLPEarthquakePredictor(random_state=42),
    param_grid=mlp_hiperparametreler_3,
    cv=splitter_mlp_3,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=1
)

grid_mlp_3.fit(train_mlp_3)
best_model_mlp_3 = grid_mlp_3.best_estimator_

test_predictions_mlp_3 = best_model_mlp_3.predict(test_mlp_3)
test_actual_mlp_3 = test_mlp_3[['futuremag', 'futuredepth', 'futurelat', 'futurelon']].dropna()
min_len_mlp_3 = min(len(test_predictions_mlp_3), len(test_actual_mlp_3))

for i, (name, label) in enumerate(zip(target_names, target_labels)):
    if i < test_predictions_mlp_3.shape[1] and i < test_actual_mlp_3.shape[1]:
        target_mse = mean_squared_error(
            test_actual_mlp_3.iloc[:min_len_mlp_3, i], 
            test_predictions_mlp_3[:min_len_mlp_3, i]
        )
        target_mae = mean_absolute_error(
            test_actual_mlp_3.iloc[:min_len_mlp_3, i], 
            test_predictions_mlp_3[:min_len_mlp_3, i]
        )
        target_r2 = r2_score(
            test_actual_mlp_3.iloc[:min_len_mlp_3, i], 
            test_predictions_mlp_3[:min_len_mlp_3, i]
        )
        
        print(f"   {label:12}: MSE={target_mse:.3f}, MAE={target_mae:.3f}, R²={target_r2:.3f}")

Fitting 3 folds for each of 2592 candidates, totalling 7776 fits




   Magnitude   : MSE=0.083, MAE=0.235, R²=-3.049
   Depth       : MSE=2567.900, MAE=39.785, R²=-0.032
   Latitude    : MSE=168.634, MAE=10.109, R²=0.068
   Longitude   : MSE=3678.452, MAE=49.853, R²=-0.252


In [14]:
joblib.dump(best_model_mlp_3, 'models/mlp_extra.pkl')
mlp_best_params['extra_params'] = grid_mlp_3.best_params_
print(grid_mlp_3.best_params_)
with open('models/mlp_best_params.json', 'w') as f:
    json.dump(mlp_best_params, f, indent=2)

{'activation': 'relu', 'alpha': 0.001, 'hidden_layer_sizes': (100,), 'learning_rate_init': 0.001, 'max_iter': 900, 'shiftnum': 3, 'solver': 'adam'}
