In [1]:
import csv
import pandas as pd
import numpy as np
import time
#creating dataset

#reading csv file
myfile = pd.read_csv("database.csv", sep=",")
#numpy database
database = myfile.to_numpy()

#assigning features
date = database[:,0]
lat = database[:,2]
long = database[:,3]
mag = database[:,8]

#number of items/examples
m = len(date)

#changing date format to yyyymmdd
for i in range(m):  
    date[i] = time.strptime(date[i],"%m-%d-%Y")
    date[i] = int(time.strftime("%Y%m%d",date[i]))

#initialising array of feaures
X=np.zeros((23409,3))

#adding feature vectors into array
X[:,0] = date
X[:,1] = lat
X[:,2] = long

#y is the expected result, earthquake magnitude
y = mag
print(X)

[[ 1.9650102e+07  1.9246000e+01  1.4561600e+02]
 [ 1.9650104e+07  1.8630000e+00  1.2735200e+02]
 [ 1.9650105e+07 -2.0579000e+01 -1.7397200e+02]
 ...
 [ 2.0161228e+07  3.6917900e+01  1.4042620e+02]
 [ 2.0161229e+07 -9.0283000e+00  1.1866390e+02]
 [ 2.0161230e+07  3.7397300e+01  1.4141030e+02]]


In [27]:
#Z score normalisation

#number of columns/features
n = X.shape[1] 
mu = np.mean(X,axis = 0)
print(mu)
sigma = np.std(X,axis=0)
X = (X-mu)/sigma
#Z = (X-mu)/dev
# for i in range(n):
#     a = X[:,i]
#     sum = np.sum(a)
#     mean = sum/m
#     diff = a - mean
#     sqrddiff = np.sum(diff**2)
#     dev = np.sqrt(sqrddiff/n)
#     a = (a-mean)/dev
#     X[:,i] = a
#     print(mu)



[ 3.84009042e-17 -2.19543414e-18  1.14232827e-18]


In [3]:
#cost function

def compute_cost(w,b,X,y):
    m = len(y)
    diff = 0
    for i in range(m):
        f = np.dot(X[i],w) + b
        diff += (f - y[i])**2
    cost = diff/(2*m)
    
    return(cost)

In [4]:
#initialising w and b
w_init = np.array([0,0,0])
b_init = 0
print(w_init.shape)

#compute cost for initial w and b guesses
cost = compute_cost(w_init,b_init,X,y)

(3,)


In [5]:
#computing gradient
def compute_gradient(w,b,X,y):
    m,n = X.shape

    #initialise w and b derivatives
    der_w = np.zeros((n,))
    der_b = 0

    for i in range(m):
        error = np.dot(X[i],w) + b - y[i]
        der_b += error
        for j in range(n):
            der_w[j] += error * X[i,j]

    der_b = der_b/m
    der_w = der_w/m

    return(der_w,der_b)

In [6]:
der_w,der_b = compute_gradient(w_init,b_init,X,y)
print(der_w,der_b)

[ 0.00015353 -0.0001675  -0.00018479] -5.882558417702829


In [13]:
def gradient_descent(w_init,b_init,X,y,compute_cost,compute_gradient,alpha):

    w = w_init
    b = b_init
    iterations = 1000
    J=np.empty(iterations)

    for k in range(iterations):
        der_w,der_b = compute_gradient(w,b,X,y)
        w = w - alpha*der_w
        b = b - alpha*der_b
        J[k] = compute_cost(w,b,X,y)
        print(k)

    return(w,b,J)

In [14]:
w_final,b_final,J_final = gradient_descent(w_init,b_init,X,y,compute_cost,compute_gradient,0.1)

0
1
2
3
4
5
6
7
8
9


In [15]:
print(J_final)

[14.10431623 11.44150045  9.28461966  7.53754622  6.12241673  4.97616184
  4.04769538  3.29563755  2.6864707   2.19304555]


In [10]:
print(w_final)
print(X.shape)
X_predic = np.array([20221006, 52.52,13.4050])
X_predic_norm = (X_predic - mu)/ 
print(X_predic)

[-0.00153415  0.00167359  0.00184643]
(23409, 3)
[2.0221006e+07 5.2520000e+01 1.3405000e+01]


In [11]:
p = np.dot(X_predic,w_final) + b_final
print(p)
print(X)

-31016.156723434964
[[-2.17295921e-02  6.60423396e-03  9.55881810e-03]
 [-2.17294351e-02  6.92621723e-05  7.91149362e-03]
 [-2.17293567e-02 -8.36759204e-03 -1.92664742e-02]
 ...
 [ 1.83815576e-02  1.32478149e-02  9.09072324e-03]
 [ 1.83816361e-02 -4.02521739e-03  7.12786901e-03]
 [ 1.83817146e-02  1.34280407e-02  9.17948430e-03]]
