In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.preprocessing import RobustScaler
from sklearn.ensemble import IsolationForest
import warnings
warnings.filterwarnings('ignore')#uyarının altta gözükmesini engeller

%run MLProject.ipynb

df = pd.read_csv('usgs_main.csv')
df1 = df.copy()
df1['time'] = pd.to_datetime(df1['time'])
df1 = df1.sort_values('time')
df1 = df1.dropna(subset=['latitude','longitude','depth','mag','time'])

dfweek_mlp = df1.set_index('time').resample('W').apply({
    'mag':'mean',
    'latitude':'mean',
    'longitude':'mean',
    'depth':'mean',
    'magType': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else x.iloc[0],
    'type': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else x.iloc[0],
    'status': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else x.iloc[0],
    'net': lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else x.iloc[0]
})

dfweek_mlp = dfweek_mlp.reset_index(drop=True)
dfweek_mlp.index = dfweek_mlp.index + 1
dfweek_mlp.index.name = 'timeindex'
# https://scikit-learn.org/stable/modules/outlier_detection.html
# outlier detection hakkında
numerical_cols = ['mag', 'latitude', 'longitude', 'depth']
outlier_detector = IsolationForest(
    contamination=0.1,  # %10 veriyi outlier olarak işaretle
    random_state=42,
    n_estimators=100
)

outlier_mask = outlier_detector.fit_predict(dfweek_mlp[numerical_cols]) == 1
dfweek_mlp = dfweek_mlp[outlier_mask]
print(f"Outlier detection sonrası veri sayısı: {len(dfweek_mlp)}")
print(f"Temizlenen outlier sayısı: {np.sum(~outlier_mask)}")

#https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.RobustScaler.html
#Bu linkte robustscaler ile ilgili bilgi edindim.
robust_scaler = RobustScaler()
robust_cols = ['mag', 'latitude', 'longitude', 'depth']
available_robust_cols = [col for col in robust_cols if col in dfweek_mlp.columns]

if available_robust_cols:
    robust_scaled_data = robust_scaler.fit_transform(dfweek_mlp[available_robust_cols])
    for i, col in enumerate(available_robust_cols):
        dfweek_mlp[f'{col}_robust'] = robust_scaled_data[:, i]
    


dfweek_mlp['futuremag'] = dfweek_mlp['mag'].shift(-1)
dfweek_mlp['futuredepth'] = dfweek_mlp['depth'].shift(-1)
dfweek_mlp['futurelat'] = dfweek_mlp['latitude'].shift(-1)
dfweek_mlp['futurelon'] = dfweek_mlp['longitude'].shift(-1)
dfweek_mlp = dfweek_mlp.dropna(subset=['futuremag','futuredepth','futurelat','futurelon'])



# Hyperparameters (orijinal kod)
mlp_hiperparametreler = {
    'hidden_layer_sizes': [(50,), (100,),(150,), (50, 50), (100, 50),(150,100), (100, 100)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'lbfgs'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.001, 0.01, 0.1],
    'max_iter': [500, 1000, 1500],
    'shiftnum': [2, 3, 4]
}

# Train-test split (orijinal kod)
train_mlp = dfweek_mlp[:int((4*len(dfweek_mlp))/5)]
test_mlp = dfweek_mlp[int(4*len(dfweek_mlp)/5):]



# Grid search ve model eğitimi (orijinal kod)
splitter_mlp = TimeSeriesSplit(n_splits=3)
grid_mlp = GridSearchCV(
    estimator=MLPEarthquakePredictor(random_state=42),
    param_grid=mlp_hiperparametreler,
    cv=splitter_mlp,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=1
)
grid_mlp.fit(train_mlp)
best_model_mlp = grid_mlp.best_estimator_

print(f"En iyi parametreler: {grid_mlp.best_params_}")

# Test tahminleri (orijinal kod)
test_predictions_mlp = best_model_mlp.predict(test_mlp)
test_actual_mlp = test_mlp[['futuremag', 'futuredepth', 'futurelat', 'futurelon']].dropna()
min_len_mlp = min(len(test_predictions_mlp), len(test_actual_mlp))

# Sonuçları değerlendir (orijinal kod)
target_names = ['futuremag', 'futuredepth', 'futurelat', 'futurelon']
target_labels = ['Magnitude', 'Depth', 'Latitude', 'Longitude']



for i, (name, label) in enumerate(zip(target_names, target_labels)):
    if i < test_predictions_mlp.shape[1] and i < test_actual_mlp.shape[1]:
        target_mse = mean_squared_error(
            test_actual_mlp.iloc[:min_len_mlp, i], 
            test_predictions_mlp[:min_len_mlp, i]
        )
        target_mae = mean_absolute_error(
            test_actual_mlp.iloc[:min_len_mlp, i], 
            test_predictions_mlp[:min_len_mlp, i]
        )
        target_r2 = r2_score(
            test_actual_mlp.iloc[:min_len_mlp, i], 
            test_predictions_mlp[:min_len_mlp, i]
        )
        
        print(f"   {label:12}: MSE={target_mse:.3f}, MAE={target_mae:.3f}, R²={target_r2:.3f}")

Outlier detection sonrası veri sayısı: 37
Temizlenen outlier sayısı: 5
Fitting 3 folds for each of 2268 candidates, totalling 6804 fits
En iyi parametreler: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (50,), 'learning_rate_init': 0.001, 'max_iter': 500, 'shiftnum': 2, 'solver': 'adam'}
   Magnitude   : MSE=0.001, MAE=0.029, R²=0.691
   Depth       : MSE=3.568, MAE=1.655, R²=-0.370
   Latitude    : MSE=0.109, MAE=0.303, R²=-2.864
   Longitude   : MSE=9.442, MAE=2.947, R²=-2.030


In [13]:
import joblib
import json
joblib.dump(best_model_mlp, 'models/bestresult.pkl')
best_results = {
    'best_result': grid_mlp.best_params_,
}
with open('models/best_model_mlp.json', 'w') as f:
    json.dump(best_results, f, indent=2)