In [22]:
import numpy as np
import pandas as pd
import math


# Training the model
def training(x1, y1):
    print('TRAINING DATA')
    X_train_prediction = model.predict(x1)
    #Evaluating the trained model
    evaluation(y_train, X_train_prediction)
    
#Testing the model
def testing(x1, y1):
    print('TESTING DATA')
    X_test_prediction = model.predict(x1)
    #Evaluating the tested model
    evaluation(y1, X_test_prediction)
    
# Predicting with the complete data
def complete_prediction(x1, y1):
    print('COMPLETE DATA')
    X_complete_prediction = model.predict(x1)
    evaluation(y1, X_complete_prediction)
    #Convert to dataframe
    pred = pd.DataFrame(X_complete_prediction, columns=['PREDICTED_GEOID_HEIGHT'])
    # Join original data table and pred
    complete_data = pd.DataFrame(pd.concat([data, pred], axis=1))
    #print into excel csv file
    geoid_surface_result = complete_data.to_csv('geoid_surface_LAGOS_result_GRNN.csv')
    


# Model evaluation with Mean Absolute Error, Root Mean Square Error and Root Meas Square Percentage Error
def evaluation(x, y):
    import sys
    np.set_printoptions(precision=None, threshold=sys.maxsize, edgeitems=7)
    mae = np.abs(np.subtract(x, np.asarray(y))).mean()
    print('MAE = ', mae)
    rmse = math.sqrt(np.square(np.subtract(x, y)).mean())
    print('RMSE = ', rmse)
    n = (np.subtract(x, x.mean())*np.subtract(y, y.mean())).sum()
    d = math.sqrt(np.square(np.subtract(x, x.mean())).sum()*np.square(np.subtract(y, y.mean())).sum())
    r2s = np.ceil(np.square(n/d)*100.0)
    print('Rsquared =', r2s, "\n")


## Modelling Multilayer Perceptron ANN 

In [28]:
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import math

# Read the file
data = pd.read_csv("geoid_surface_data_LAGOS_ALL_quintic.csv")

#data = pd.read_csv("geoid_surface_data_LAGOS.csv")

# Modeling with Selected Features
X = data.drop(columns=['C_N', 'N'], axis=1)

y = data['N']

# Splitting data_var into training-test set in ratio 70-30
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)

model = Pipeline([('scaler', StandardScaler()), ('sgd', MLPRegressor(hidden_layer_sizes=(32,),
                   activation="relu", 
                   solver='adam',
                   learning_rate_init=0.1,
                   random_state=1, 
                   warm_start=True,
                   max_iter=2000))])
   
# Fitting the model
model.fit(X_train, y_train)

# Calling training, testing and complete_prediction functions
training(X_train, y_train)

testing(X_test, y_test)

complete_prediction(X, y)


TRAINING DATA
MAE =  0.10594847909575965
RMSE =  0.12996711761875082
Rsquared = 90.0 

TESTING DATA
MAE =  0.1054023847511698
RMSE =  0.13101845580779894
Rsquared = 90.0 

COMPLETE DATA
MAE =  0.1057844164171275
RMSE =  0.1302838618309373
Rsquared = 90.0 



## Modelling GRNN

In [30]:
import numpy as np
import pandas as pd
import math
from sklearn import datasets
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import  GridSearchCV
from sklearn.metrics import mean_squared_error as MSE
from pyGRNN import GRNN
# Loading the diabetes dataset
# Read the file
#data = pd.read_csv("geoid_surface_data_LAGOS.csv")

data = pd.read_csv("geoid_surface_data_LAGOS_ALL_quintic.csv")

# Modeling with Selected Features
X = data.drop(columns=['C_N', 'N'], axis=1)

y = data['N']

# Splitting data into training and testing and scaling with minmax_scale
X_train, X_test, y_train, y_test = train_test_split(preprocessing.minmax_scale(X),
                                                    preprocessing.minmax_scale(y),
                                                    test_size=0.3)

# Use Isotropic GRNN with a Grid Search Cross validation to select the optimal bandwidth
IGRNN = GRNN()
params_IGRNN = {'kernel':["RBF"],
                'sigma' : list(np.arange(0.1, 4, 0.01)),
                'calibration' : ['None']
                 }
model = GridSearchCV(estimator=IGRNN,
                          param_grid=params_IGRNN,
                          scoring='neg_mean_squared_error',
                          cv=5,
                          verbose=1
                          )
model.fit(X_train, y_train.ravel())
best_model = model.best_estimator_
y_pred = best_model.predict(X_test)
mse_IGRNN = MSE(y_test, y_pred)
print('Root Mean Squared Error (RMSE)', math.sqrt(mse_IGRNN), "\n")

complete_prediction = best_model.predict(X)
evaluation(complete_prediction, y)

# Calling training, testing and complete_prediction functions
training(X_train, y_train)
testing(X_test, y_test)
#complete_prediction(X, y)


Fitting 5 folds for each of 390 candidates, totalling 1950 fits
Root Mean Squared Error (RMSE) 0.015886146815947737 

MAE =  22.92510071244635
RMSE =  22.92847734481247
Rsquared = nan 

TRAINING DATA
MAE =  0.009638310402819325
RMSE =  0.013252988493083782
Rsquared = 100.0 

TESTING DATA
MAE =  0.011425097050104558
RMSE =  0.015886146815947737
Rsquared = 100.0 

