# NASA Space Apps 2020
## Automated Detection of Hazards  
  
____________________  

Before the prediction we need to get, prepare data.  
This process includes increasing values, feauture preparation, restoring missing values and connection.  

Firstly, we import libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import random as r
import time
import datetime
import impyute as impy
import math
#import zipfile

#from sklearn.model_selection import train_test_split
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

plt.rc("font", size=14)
sns.set(style="white")
sns.set(style="whitegrid", color_codes=True)

Using TensorFlow backend.


Now we can import dataset(s)

In [2]:
!ls

earthquakes.csv  MaxDate.txt   model.hdf5   predict.ipynb
landslides.csv	 model2.ipynb  model.ipynb


In [3]:
#zf = zipfile.ZipFile('ECA_nonblend_tg.zip')

In [4]:
df=pd.read_csv("earthquakes.csv")
df

Unnamed: 0,Date,Time,Latitude,Longitude,Type,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Type,...,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square,ID,Source,Location Source,Magnitude Source,Status
0,01/02/1965,13:44:18,19.2460,145.6160,Earthquake,131.60,,,6.0,MW,...,,,,,,ISCGEM860706,ISCGEM,ISCGEM,ISCGEM,Automatic
1,01/04/1965,11:29:49,1.8630,127.3520,Earthquake,80.00,,,5.8,MW,...,,,,,,ISCGEM860737,ISCGEM,ISCGEM,ISCGEM,Automatic
2,01/05/1965,18:05:58,-20.5790,-173.9720,Earthquake,20.00,,,6.2,MW,...,,,,,,ISCGEM860762,ISCGEM,ISCGEM,ISCGEM,Automatic
3,01/08/1965,18:49:43,-59.0760,-23.5570,Earthquake,15.00,,,5.8,MW,...,,,,,,ISCGEM860856,ISCGEM,ISCGEM,ISCGEM,Automatic
4,01/09/1965,13:32:50,11.9380,126.4270,Earthquake,15.00,,,5.8,MW,...,,,,,,ISCGEM860890,ISCGEM,ISCGEM,ISCGEM,Automatic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23407,12/28/2016,08:22:12,38.3917,-118.8941,Earthquake,12.30,1.2,40.0,5.6,ML,...,18.0,42.47,0.120,,0.1898,NN00570710,NN,NN,NN,Reviewed
23408,12/28/2016,09:13:47,38.3777,-118.8957,Earthquake,8.80,2.0,33.0,5.5,ML,...,18.0,48.58,0.129,,0.2187,NN00570744,NN,NN,NN,Reviewed
23409,12/28/2016,12:38:51,36.9179,140.4262,Earthquake,10.00,1.8,,5.9,MWW,...,,91.00,0.992,4.8,1.5200,US10007NAF,US,US,US,Reviewed
23410,12/29/2016,22:30:19,-9.0283,118.6639,Earthquake,79.00,1.8,,6.3,MWW,...,,26.00,3.553,6.0,1.4300,US10007NL0,US,US,US,Reviewed


Let's prepare data.  
1. [Replace NaN](#Replace-NaN)  
2. [Cut useless features](#Cut-useless-features)
3. [Encoding](#Encoding)
4. [Augment](#Augment)

### Replace NaN and missing values

In [5]:
df=df.fillna(0)
df[:5]

Unnamed: 0,Date,Time,Latitude,Longitude,Type,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Type,...,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square,ID,Source,Location Source,Magnitude Source,Status
0,01/02/1965,13:44:18,19.246,145.616,Earthquake,131.6,0.0,0.0,6.0,MW,...,0.0,0.0,0.0,0.0,0.0,ISCGEM860706,ISCGEM,ISCGEM,ISCGEM,Automatic
1,01/04/1965,11:29:49,1.863,127.352,Earthquake,80.0,0.0,0.0,5.8,MW,...,0.0,0.0,0.0,0.0,0.0,ISCGEM860737,ISCGEM,ISCGEM,ISCGEM,Automatic
2,01/05/1965,18:05:58,-20.579,-173.972,Earthquake,20.0,0.0,0.0,6.2,MW,...,0.0,0.0,0.0,0.0,0.0,ISCGEM860762,ISCGEM,ISCGEM,ISCGEM,Automatic
3,01/08/1965,18:49:43,-59.076,-23.557,Earthquake,15.0,0.0,0.0,5.8,MW,...,0.0,0.0,0.0,0.0,0.0,ISCGEM860856,ISCGEM,ISCGEM,ISCGEM,Automatic
4,01/09/1965,13:32:50,11.938,126.427,Earthquake,15.0,0.0,0.0,5.8,MW,...,0.0,0.0,0.0,0.0,0.0,ISCGEM860890,ISCGEM,ISCGEM,ISCGEM,Automatic


### Cut useless features

In [6]:
df=df.drop(columns=[
                    "Time",
                    "Type",
                    "ID",
                    "Source",
                    "Location Source",
                    "Magnitude Source",
                    "Status",
                    "Depth Error"
                ])
df[:5]

Unnamed: 0,Date,Latitude,Longitude,Depth,Depth Seismic Stations,Magnitude,Magnitude Type,Magnitude Error,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square
0,01/02/1965,19.246,145.616,131.6,0.0,6.0,MW,0.0,0.0,0.0,0.0,0.0,0.0
1,01/04/1965,1.863,127.352,80.0,0.0,5.8,MW,0.0,0.0,0.0,0.0,0.0,0.0
2,01/05/1965,-20.579,-173.972,20.0,0.0,6.2,MW,0.0,0.0,0.0,0.0,0.0,0.0
3,01/08/1965,-59.076,-23.557,15.0,0.0,5.8,MW,0.0,0.0,0.0,0.0,0.0,0.0
4,01/09/1965,11.938,126.427,15.0,0.0,5.8,MW,0.0,0.0,0.0,0.0,0.0,0.0


### Encoding

In [14]:
def encode_dates(a):
    try:
        ts=time.mktime(datetime.datetime.strptime(a, "%m/%d/%Y").timetuple())
    except:
        return 0
    return ts
    #return math.sin(ts)

In [15]:
#Encode dates

df = df[df.Date != 0] #Remove rows with 0 value
df['Date']=df['Date'].apply(encode_dates)
MaxDate=df['Date'].max()
df['Date']=df['Date'].apply(lambda x: x/MaxDate)

In [16]:
#Categories

def myonehot_types(x):
    index=list(set(df['Magnitude Type'])).index(x)
    #return [1 if i==index else 0 for i in range(len(set(landslide_types)))]
    return index

df['Magnitude Type']=df['Magnitude Type'].apply(myonehot_types)
df[:5]

Unnamed: 0,Date,Latitude,Longitude,Depth,Depth Seismic Stations,Magnitude,Magnitude Type,Magnitude Error,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square
0,-0.106324,19.246,145.616,131.6,0.0,6.0,3,0.0,0.0,0.0,0.0,0.0,0.0
1,-0.106207,1.863,127.352,80.0,0.0,5.8,3,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.106149,-20.579,-173.972,20.0,0.0,6.2,3,0.0,0.0,0.0,0.0,0.0,0.0
3,-0.105974,-59.076,-23.557,15.0,0.0,5.8,3,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.105916,11.938,126.427,15.0,0.0,5.8,3,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
df=df.dropna()

# Let's make a model

In [18]:
#data

x=df.iloc[:,:3].to_numpy()
y=df.iloc[:,3:].to_numpy()

x.shape, y.shape

((23412, 3), (23412, 10))

In [35]:
#Build and compile model

model = Sequential()
model.add(Dense(3, input_dim=3, kernel_initializer='normal', activation='relu'))
model.add(Dense(8, kernel_initializer='normal', activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(10, kernel_initializer='normal'))
#sgd = optimizers.SGD(lr=0.01,decay=1e-6,momentum=0.9,nesterov=True)

model.compile(loss='mean_squared_error', optimizer="adam", metrics=["accuracy"])

In [36]:
history = model.fit(x=x,y=y,batch_size=4, epochs=100, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [38]:
model.predict(x=np.array([[1,48,30]]))

array([[ 5.9068985e+01,  3.1234497e+02,  5.8398328e+00,  9.8447723e+00,
        -2.3078272e-02,  1.2428572e+01,  5.0514030e+01,  1.5383412e+00,
         1.5257006e+00,  1.0581424e+00]], dtype=float32)

In [37]:
model.save("./model2.hdf5")