In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

In [2]:
earthquakes_csv = r'C:\Users\andr7495\Documents\GitHubDrewOrtego\PredictiveMaps\geoQuakes\static\resources\earthquakeTest.csv'
database_csv = r'C:\Users\andr7495\Documents\GitHubDrewOrtego\PredictiveMaps\geoQuakes\static\resources\database.csv'

In [3]:
df_train = pd.read_csv(database_csv)
df_test = pd.read_csv(earthquakes_csv)

In [4]:
df_train_loaded = df_train.drop(['Depth Error', 'Time', 'Depth Seismic Stations', 'Magnitude Error', 'Magnitude Seismic Stations', 'Root Mean Square', 'Source', 'Location Source', 'Magnitude Source', 'Status', 'Azimuthal Gap', 'Horizontal Distance', 'Horizontal Error'], axis=1)
df_test_loaded = df_test[['time', 'latitude', 'longitude', 'mag', 'depth']]

df_train_loaded = df_train_loaded.rename(columns={"Magnitude Type": "Magnitude_Type"})
df_test_loaded = df_test_loaded.rename(columns={'time': 'Date', 'latitude': 'Latitude', 'longitude': 'Longitude', 'mag': 'Magnitude', 'depth': 'Depth'})

df_testing = df_test_loaded[['Latitude', 'Longitude', 'Magnitude', 'Depth']]
df_training = df_train_loaded[['Latitude', 'Longitude', 'Magnitude', 'Depth']]

df_testing.dropna()
df_training.dropna()

Unnamed: 0,Latitude,Longitude,Magnitude,Depth
0,19.2460,145.6160,6.0,131.60
1,1.8630,127.3520,5.8,80.00
2,-20.5790,-173.9720,6.2,20.00
3,-59.0760,-23.5570,5.8,15.00
4,11.9380,126.4270,5.8,15.00
...,...,...,...,...
23407,38.3917,-118.8941,5.6,12.30
23408,38.3777,-118.8957,5.5,8.80
23409,36.9179,140.4262,5.9,10.00
23410,-9.0283,118.6639,6.3,79.00


In [5]:
df_testing

Unnamed: 0,Latitude,Longitude,Magnitude,Depth
0,32.980000,-115.545833,2.68,11.500
1,2.832700,127.578600,5.00,78.930
2,32.973000,-115.550500,2.65,9.400
3,-5.949700,153.898800,4.10,10.000
4,-2.930200,139.432800,4.10,49.250
...,...,...,...,...
19995,-21.459800,168.774000,4.30,10.000
19996,35.239500,-97.745300,2.60,6.364
19997,42.139833,-121.692667,2.58,6.880
19998,67.461600,-158.713600,2.80,6.500


In [6]:
df_training

Unnamed: 0,Latitude,Longitude,Magnitude,Depth
0,19.2460,145.6160,6.0,131.60
1,1.8630,127.3520,5.8,80.00
2,-20.5790,-173.9720,6.2,20.00
3,-59.0760,-23.5570,5.8,15.00
4,11.9380,126.4270,5.8,15.00
...,...,...,...,...
23407,38.3917,-118.8941,5.6,12.30
23408,38.3777,-118.8957,5.5,8.80
23409,36.9179,140.4262,5.9,10.00
23410,-9.0283,118.6639,6.3,79.00


In [7]:
X_train = df_training[['Latitude', 'Longitude']]
y_train = df_training[['Magnitude', 'Depth']]

X_test = df_testing[['Latitude', 'Longitude']]
y_test = df_testing[['Magnitude', 'Depth']]

X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [8]:
model_reg = RandomForestRegressor(random_state=50)
model_reg.fit(X_train, y_train)
model_reg.predict(X_test)
score = model_reg.score(X_test, y_test) * 100



In [9]:
score

86.93540661667079

In [10]:
# Improve score/accuracy by automating the hyperparameter-tuning
parameters = {'n_estimators': [10, 20, 50, 100, 200, 500]}
grid_obj = GridSearchCV(model_reg, parameters)
grid_fit = grid_obj.fit(X_train, y_train)
best_fit = grid_fit.best_estimator_
results = best_fit.predict(X_test)





In [11]:
print(results)

[[  5.922  155.27  ]
 [  5.513   13.102 ]
 [  5.701   75.591 ]
 ...
 [  6.042  207.621 ]
 [  5.999   19.453 ]
 [  5.709   26.9805]]


In [12]:
score_improve = best_fit.score(X_test, y_test)*100



In [13]:
score_improve

86.93540661667079

In [14]:
# make prediction on output of sample data (Earthquakes for next year 2017)
final_results = best_fit.predict(X_test)
# Evauluate the model
final_score = best_fit.score(X_test, y_test) * 100
final_score



86.93540661667079

In [15]:
final_results

array([[  5.922 , 155.27  ],
       [  5.513 ,  13.102 ],
       [  5.701 ,  75.591 ],
       ...,
       [  6.042 , 207.621 ],
       [  5.999 ,  19.453 ],
       [  5.709 ,  26.9805]])

In [16]:
lst_magnitudes = []
lst_depth = []
for i, r in enumerate(final_results.tolist()):
    lst_magnitudes.append(final_results[i][0])
    lst_depth.append(final_results[i][1])

df_results = X_test[['Latitude', 'Longitude']]
df_results['Magnitude'] = lst_magnitudes
df_results['Depth'] = lst_depth
df_results['Score'] = final_score

In [17]:
df_results.head()

Unnamed: 0,Latitude,Longitude,Magnitude,Depth,Score
3848,3.166,99.015,5.922,155.27,86.935407
14008,43.679,-29.02,5.513,13.102,86.935407
16258,1.142,98.911,5.701,75.591,86.935407
18090,38.649,15.39,5.652,246.629,86.935407
15192,38.457,31.351,5.977,18.49,86.935407
