In [1]:
import sys
import os

current_path = os.path.abspath('')
parent_path = os.path.dirname(current_path)

if parent_path not in sys.path:
    sys.path.insert(0, parent_path)

import geopandas as gpd
import pandas as pd
import numpy as np

# for generating simulated data
import csv 
from pointpats import PoissonPointProcess, as_window 
from libpysal.cg import Point, Polygon

# for classic backfitting
import scipy
import statsmodels.api as sm
from copy import deepcopy
from spglm.iwls import _compute_betas_gwr
from mgwr.search import golden_section

# for modified backfitting
from smoother import ConstantTerm, LinearTerm, DistanceWeighting, SpatialWeightSmoother 
from gass import GASS 



# Repeated Simulation: Classic V.S Modified Backfitting

## Functions

In [2]:
def format_sigma_for_filename(sigma):
    """
    Format the sigma values to be used in the filename.
    - Remove the negative sign.
    - Replace the decimal point with 'pt'.
    - Do not include the leading zero if the absolute value is less than 1.
    - Do not include trailing zeros after the decimal point.
    """
    # Convert sigma to a positive number and create a formatted string
    formatted_sigma = '{:.2f}'.format(abs(sigma)).lstrip('0').replace('.', 'pt').rstrip('pt0')
    
    return formatted_sigma

In [3]:
# Classic Backfitting

def classic_backfit(y, X, w, term_mapping, verbose = False, max_iter = 50, tol = 1e-8):
    n,k = X.shape
    betas = _compute_betas_gwr(y, X, w.reshape((-1, 1)))[0]
    XB = np.multiply(betas.T, X)
    yhat = np.dot(X, betas)
    err = y.reshape((-1, 1)) - yhat
    scores = []
    delta = 1e6
    
    sigs = [-1, -1]

    for n_iter in range(1, max_iter + 1):
        new_XB = np.zeros_like(X)
        params = np.zeros_like(betas)

        for j in range(k):

            temp_y = XB[:, j].reshape((-1, 1))
            temp_y = temp_y + err.reshape((-1, 1))
            temp_X = X[:, j].reshape((-1, 1))
            type_name, term_instance = term_mapping[j]  

            if type_name not in ['LinearTerm', 'ConstantTerm']:
                gscr = lambda x: sm.OLS(y, np.hstack((term_instance.cal(x)))).fit().aic
                sig = golden_section(term_instance.lower_bound, term_instance.upper_bound, 0.3879, gscr, 1e-2, 50, 50)[0]
                sigs[j-1] = sig
                sv = term_instance.cal(sig) # new smoothed values
                X[:, j] = sv.flatten()
                temp_X = sv.flatten().reshape((-1,1))

            beta = _compute_betas_gwr(temp_y, temp_X, w.reshape((-1,1)))[0]
            yhat = np.dot(temp_X, beta)
            new_XB[:, j] = yhat.flatten()
            err = (temp_y - yhat).reshape((-1, 1))
            params[j, :] = beta[0][0]

        num = np.sum((XB-new_XB)**2)
        den = 1 + np.sum(np.sum(XB, axis=1)**2)
        score = (num / den)
        XB = new_XB

        scores.append(deepcopy(score))
        delta = score

        if verbose:
            print("Current iteration:", n_iter, ",SOC:", np.round(score, 8))
        if delta < tol:
            break

    return params, X, sigs

def calibrate_Gaussian(y, X, term_mapping, verbose = False, max_iter = 50, crit_threshold = 1e-8):
    
    X = X.copy()
    y = y.copy()

    sigmas = [-1, -1]

    s_0 = np.mean(y)
    eta = s_0.reshape((-1, 1))
    s_old = np.ones_like(X)
    crit = 9999
    n_iter = 0

    while crit > crit_threshold and n_iter < max_iter:
        w = np.ones(X.shape[0])
        z = y.reshape((-1, 1))
        betas, X, sigmas = classic_backfit(z, X, w, term_mapping = term_mapping, verbose = verbose, max_iter = max_iter, tol = crit_threshold) 

        s_new = np.multiply(betas.T, X)
        inner = np.sum((s_old - s_new)**2, axis=1)
        num = np.sum(w*inner)
        den = np.sum(w*np.sum((1 + s_old)**2, axis=1).reshape((-1, 1)))
        crit = num / den
        eta = np.sum(s_new, axis=1).reshape((-1, 1))
        s_old = s_new

        n_iter += 1  # increment the iteration counter
    return betas, sigmas

In [4]:
def simulate_and_write_results_classic(sigma1, sigma2):

    # Convert sigma1 and sigma2 to a string suitable for the filename
    sigma1_str = format_sigma_for_filename(sigma1)
    sigma2_str = format_sigma_for_filename(sigma2)
    filename = f"../results/0%Noise_classic_{sigma1_str}_{sigma2_str}.csv"

    with open(filename, "a", newline='') as csvfile:
        writer = csv.writer(csvfile)

        # Check if file is empty and write headers
        if csvfile.tell() == 0:
            writer.writerow(["beta_x1", "beta_s_x2", "beta_s_pois", "sigma_s_x2", "sigma_s_pois"])

        # 1. Implement 100 Simulations
        n_iter = 0 # track iterations
        for i in range(100): # i is also the seed 
            # Spatial Systems
            np.random.seed(i)
            square = Polygon([Point((0, 0)), Point((0, 2000)), Point((2000, 2000)), Point((2000, 0))])
            squwin = as_window(square)

            # Stations
            np.random.seed(i)
            squsamples = PoissonPointProcess(squwin, 150, 1, conditioning=False, asPP=False)
            squpts_random = squsamples.realizations[0]

            # DataFrame for Stations
            random_df = pd.DataFrame(squpts_random, columns = ['Lon', 'Lat'])
            random_df = random_df.assign(Name = np.arange(random_df.shape[0]))
            digits = len(str(150))
            random_df.Name = random_df.Name.astype(str).str.zfill(digits).apply(lambda x: "{}{}".format('R', x))

            # Add attributes, X1 and X2, for Stations
            attr1 = np.random.uniform(2, 100, 150)
            attr2 = np.random.uniform(100, 500, 150)

            random_df = random_df.assign(X1 = attr1, X2 = attr2)

            # POIs
            np.random.seed(i * 2)
            squsamples_pois = PoissonPointProcess(squwin, 900, 1, conditioning=False, asPP=False)
            squpts_pois = squsamples_pois.realizations[0]

            # DataFrame for POIs
            pois_df = pd.DataFrame(squpts_pois, columns = ['Lon', 'Lat'])
            pois_df = pois_df.assign(Name = np.arange(pois_df.shape[0]))
            digits = len(str(900))
            pois_df.Name = pois_df.Name.astype(str).str.zfill(digits).apply(lambda x: "{}{}".format('I', x))

            # Add attribute for POIs
            attrI = np.ones(pois_df.shape[0])
            pois_df = pois_df.assign(AttributeI = attrI)

            # Necessary data for constructing `LinearTerm` of X1, and `DistanceWeighting` smoother of X2
            dta_simul = random_df[['Name', 'X1', 'X2']]
            dta_simul_gdf = random_df[['Name', 'Lon', 'Lat']]
            dta_simul_gdf = gpd.GeoDataFrame(dta_simul_gdf.copy(), geometry=gpd.points_from_xy(dta_simul_gdf.Lon, dta_simul_gdf.Lat))

            # Necessary data for constructing `DistanceWeighting` smoother of POIs
            pois_simul_df = pois_df[['Name', 'AttributeI']]
            pois_simul_gdf = pois_df[['Name', 'Lon', 'Lat']]
            pois_simul_gdf = gpd.GeoDataFrame(pois_simul_gdf.copy(), geometry=gpd.points_from_xy(pois_simul_gdf.Lon, pois_simul_gdf.Lat))
            map_simul_gdf = pd.concat([dta_simul_gdf, pois_simul_gdf]) # A map includes both the stations and POIs 

            # Construct spatial smoothers for X1, X2 and POIs
            lin_simul = LinearTerm(dta_simul, 1, standard = True) # X1
            sws_simul_X2 = SpatialWeightSmoother(dta_simul, dta_simul_gdf, [0,0,2], standard = True) # smoothing X2
            dw_simul_pois = DistanceWeighting(dta_simul, map_simul_gdf, pois_simul_df, [0,0,0,1], standard = True)# smoothing POI

            # Set the true values of distance decay parameters using the function arguments
            s_X2_simul = sws_simul_X2.cal(sigma1) # Use sigma1 here
            s_POIs_simul = dw_simul_pois.cal(sigma2) # Use sigma2 here

            # Set the true values of coefficients for X1, s(X2), and s(POIs)
            coefs_simul = [1,1,1] 

            # Generate y without noise
            np.random.seed(i)
            X_simul = np.hstack((lin_simul.X, s_X2_simul, s_POIs_simul)) 
            y_fitted = np.dot(X_simul, coefs_simul).reshape(-1,1)

            # Fit calssic backfitting model
            term_mapping = {}
            term_mapping [0] = (type(lin_simul).__name__, lin_simul)  
            term_mapping [1] = (type(sws_simul_X2).__name__, sws_simul_X2)   
            term_mapping [2] = (type(dw_simul_pois).__name__, dw_simul_pois)  

            initial_X_simul = np.hstack((lin_simul.X, sws_simul_X2.cal(-1), dw_simul_pois.cal(-1)))

            coefs, sigs = calibrate_Gaussian(y_fitted, initial_X_simul, term_mapping)

            # Store results
            beta_x1 = pd.to_numeric(coefs[0].item())
            beta_s_x2 = pd.to_numeric(coefs[1].item())
            beta_s_pois = pd.to_numeric(coefs[2].item())
            sigma_s_x2 = pd.to_numeric(sigs[0].item())
            sigma_s_pois = pd.to_numeric(sigs[1].item())
            #print(beta_x1, beta_s_x2, beta_s_pois, sigma_s_x2, sigma_s_pois)
 
            writer.writerow([beta_x1, beta_s_x2, beta_s_pois, sigma_s_x2, sigma_s_pois])

In [5]:
def simulate_and_write_results_modified(sigma1, sigma2):
    # Convert sigma1 and sigma2 to a string suitable for the filename
    sigma1_str = format_sigma_for_filename(sigma1)
    sigma2_str = format_sigma_for_filename(sigma2)
    filename = f"../results/0%Noise_{sigma1_str}_{sigma2_str}.csv"
    
    with open(filename, "a", newline='') as csvfile:
        writer = csv.writer(csvfile)
        
        # Check if file is empty and write headers
        if csvfile.tell() == 0:
            writer.writerow(["beta_x1", "beta_s_x2", "beta_s_pois", "sigma_s_x2", "sigma_s_pois"])
        
        # 1. Implement 100 Simulations
        n_iter = 0 # track iterations
        for i in range(100): # i is also the seed 
            # Spatial Systems
            np.random.seed(i)
            square = Polygon([Point((0, 0)), Point((0, 2000)), Point((2000, 2000)), Point((2000, 0))])
            squwin = as_window(square)

            # Stations
            np.random.seed(i)
            squsamples = PoissonPointProcess(squwin, 150, 1, conditioning=False, asPP=False)
            squpts_random = squsamples.realizations[0]

            # DataFrame for Stations
            random_df = pd.DataFrame(squpts_random, columns = ['Lon', 'Lat'])
            random_df = random_df.assign(Name = np.arange(random_df.shape[0]))
            digits = len(str(150))
            random_df.Name = random_df.Name.astype(str).str.zfill(digits).apply(lambda x: "{}{}".format('R', x))

            # Add attributes, X1 and X2, for Stations
            attr1 = np.random.uniform(2, 100, 150)
            attr2 = np.random.uniform(100, 500, 150)

            random_df = random_df.assign(X1 = attr1, X2 = attr2)

            # POIs
            np.random.seed(i * 2)
            squsamples_pois = PoissonPointProcess(squwin, 900, 1, conditioning=False, asPP=False)
            squpts_pois = squsamples_pois.realizations[0]

            # DataFrame for POIs
            pois_df = pd.DataFrame(squpts_pois, columns = ['Lon', 'Lat'])
            pois_df = pois_df.assign(Name = np.arange(pois_df.shape[0]))
            digits = len(str(900))
            pois_df.Name = pois_df.Name.astype(str).str.zfill(digits).apply(lambda x: "{}{}".format('I', x))

            # Add attribute for POIs
            attrI = np.ones(pois_df.shape[0])
            pois_df = pois_df.assign(AttributeI = attrI)

            # Necessary data for constructing `LinearTerm` of X1, and `DistanceWeighting` smoother of X2
            dta_simul = random_df[['Name', 'X1', 'X2']]
            dta_simul_gdf = random_df[['Name', 'Lon', 'Lat']]
            dta_simul_gdf = gpd.GeoDataFrame(dta_simul_gdf.copy(), geometry=gpd.points_from_xy(dta_simul_gdf.Lon, dta_simul_gdf.Lat))

            # Necessary data for constructing `DistanceWeighting` smoother of POIs
            pois_simul_df = pois_df[['Name', 'AttributeI']]
            pois_simul_gdf = pois_df[['Name', 'Lon', 'Lat']]
            pois_simul_gdf = gpd.GeoDataFrame(pois_simul_gdf.copy(), geometry=gpd.points_from_xy(pois_simul_gdf.Lon, pois_simul_gdf.Lat))
            map_simul_gdf = pd.concat([dta_simul_gdf, pois_simul_gdf]) # A map includes both the stations and POIs 

            # Construct spatial smoothers for X1, X2 and POIs
            lin_simul = LinearTerm(dta_simul, 1, standard = True) # X1
            sws_simul_X2 = SpatialWeightSmoother(dta_simul, dta_simul_gdf, [0,0,2], standard = True) # smoothing X2
            dw_simul_pois = DistanceWeighting(dta_simul, map_simul_gdf, pois_simul_df, [0,0,0,1], standard = True)# smoothing POI

            # Set the true values of distance decay parameters using the function arguments
            s_X2_simul = sws_simul_X2.cal(sigma1) # Use sigma1 here
            s_POIs_simul = dw_simul_pois.cal(sigma2) # Use sigma2 here

            # Set the true values of coefficients for X1, s(X2), and s(POIs)
            coefs_simul = [1,1,1] 

            # Generate y with adding 5% noise 
            np.random.seed(i)
            X_simul = np.hstack((lin_simul.X, s_X2_simul, s_POIs_simul)) 
            y_fitted = np.dot(X_simul, coefs_simul).reshape(-1,1)
            
            # Fit GASS model
            gass_simul = GASS(y_fitted, lin_simul, sws_simul_X2, dw_simul_pois, constant = False)
            gass_simul.fit_Gaussian()

            # Store results
            beta_x1 = pd.to_numeric(gass_simul.coefficients[0].item())
            beta_s_x2 = pd.to_numeric(gass_simul.coefficients[1].item())
            beta_s_pois = pd.to_numeric(gass_simul.coefficients[2].item())
            sigma_s_x2 = pd.to_numeric(gass_simul.sigmas[0].item())
            sigma_s_pois = pd.to_numeric(gass_simul.sigmas[1].item())
            
            writer.writerow([beta_x1, beta_s_x2, beta_s_pois, sigma_s_x2, sigma_s_pois])

In [6]:
# Test the combination of sigma1 = -0.5 & sigma 2 = -1.5
# Results are stored in 'results' folder, called "0%Noise_classic_pt5_1.csv" & "0%Noise_pt5_1.csv", respectively

simulate_and_write_results_classic(-0.5, -1.5)
simulate_and_write_results_modified(-0.5, -1.5)

# Repeated Simulations with Noise

In [2]:
# Compute the error term's standard deviation with p*100 % noise levels

def error_term_sdd(y_fitted, p):
    # Calculate variance of y_fitted
    var_systematic = np.var(y_fitted)
    
    # Calculate variance of error term using the derived formula
    var_error = p * var_systematic / (1-p)
    
    # Return standard deviation of the error term
    return np.sqrt(var_error)

## 5% Noise

In [31]:
import csv
with open("../results/5%Noise.csv", "a", newline='') as csvfile_5:
    writer = csv.writer(csvfile_5)
    
    # Check if file is empty and write headers
    if csvfile_5.tell() == 0:
        writer.writerow(["beta_x1", "beta_s_x2", "beta_s_pois", "sigma_s_x2", "sigma_s_pois", 
                         "lb_awci_sigma_s_x2", "ub_awci_sigma_s_x2", "lb_awci_sigma_s_pois", "ub_awci_sigma_s_pois",
                         "lb_rbci_sigma_s_x2", "ub_rbci_sigma_s_x2", "lb_rbci_sigma_s_pois", "ub_rbci_sigma_s_pois"])
    
    # 1. Implement 100 Simulations
    n_iter = 0 # track iterations
    for i in range(100): # i is also the seed 

        # Spatial Systems
        np.random.seed(i)
        square = Polygon([Point((0, 0)), Point((0, 2000)), Point((2000, 2000)), Point((2000, 0))])
        squwin = as_window(square)

        # Stations
        np.random.seed(i)
        squsamples = PoissonPointProcess(squwin, 150, 1, conditioning=False, asPP=False)
        squpts_random = squsamples.realizations[0]

        # DataFrame for Stations
        random_df = pd.DataFrame(squpts_random, columns = ['Lon', 'Lat'])
        random_df = random_df.assign(Name = np.arange(random_df.shape[0]))
        digits = len(str(150))
        random_df.Name = random_df.Name.astype(str).str.zfill(digits).apply(lambda x: "{}{}".format('R', x))

        # Add attributes, X1 and X2, for Stations
        attr1 = np.random.uniform(2, 100, 150)
        attr2 = np.random.uniform(100, 500, 150)

        random_df = random_df.assign(X1 = attr1, X2 = attr2)

        # POIs
        np.random.seed(i * 2)
        squsamples_pois = PoissonPointProcess(squwin, 900, 1, conditioning=False, asPP=False)
        squpts_pois = squsamples_pois.realizations[0]

        # DataFrame for POIs
        pois_df = pd.DataFrame(squpts_pois, columns = ['Lon', 'Lat'])
        pois_df = pois_df.assign(Name = np.arange(pois_df.shape[0]))
        digits = len(str(900))
        pois_df.Name = pois_df.Name.astype(str).str.zfill(digits).apply(lambda x: "{}{}".format('I', x))

        # Add attribute for POIs
        attrI = np.ones(pois_df.shape[0])
        pois_df = pois_df.assign(AttributeI = attrI)

        # Necessary data for constructing `LinearTerm` of X1, and `DistanceWeighting` smoother of X2
        dta_simul = random_df[['Name', 'X1', 'X2']]
        dta_simul_gdf = random_df[['Name', 'Lon', 'Lat']]
        dta_simul_gdf = gpd.GeoDataFrame(dta_simul_gdf.copy(), geometry=gpd.points_from_xy(dta_simul_gdf.Lon, dta_simul_gdf.Lat))

        # Necessary data for constructing `DistanceWeighting` smoother of POIs
        pois_simul_df = pois_df[['Name', 'AttributeI']]
        pois_simul_gdf = pois_df[['Name', 'Lon', 'Lat']]
        pois_simul_gdf = gpd.GeoDataFrame(pois_simul_gdf.copy(), geometry=gpd.points_from_xy(pois_simul_gdf.Lon, pois_simul_gdf.Lat))
        map_simul_gdf = pd.concat([dta_simul_gdf, pois_simul_gdf]) # A map includes both the stations and POIs 

        # Construct spatial smoothers for X1, X2 and POIs
        lin_simul = LinearTerm(dta_simul, 1, standard = True) # X1
        sws_simul_X2 = SpatialWeightSmoother(dta_simul, dta_simul_gdf, [0,0,2], standard = True) # smoothing X2
        dw_simul_pois = DistanceWeighting(dta_simul, map_simul_gdf, pois_simul_df, [0,0,0,1], standard = True)# smoothing POI

        # Set the true values of distance decay parameters for s(X2) and s(POIs)
        s_X2_simul = sws_simul_X2.cal(-0.5) #s(X2)
        s_POIs_simul = dw_simul_pois.cal(-1.5) #s(POIs)

        # Set the true values of coefficients for X1, s(X2), and s(POIs)
        coefs_simul = [1,1,1] 

        # Generate y with adding 5% noise 
        np.random.seed(i)
        X_simul = np.hstack((lin_simul.X, s_X2_simul, s_POIs_simul)) 
        y_fitted = np.dot(X_simul, coefs_simul).reshape(-1,1)
        y_simul = y_fitted + np.random.normal(0, error_term_sdd(y_fitted, 0.05), (dta_simul.shape[0],1)).reshape(-1,1) # 0.05: 5%

        # Fit GASS model
        gass_simul = GASS(y_simul, lin_simul, sws_simul_X2, dw_simul_pois, constant = False)
        gass_simul.fit_Gaussian()
        gass_simul.inference_Gaussian()
        gass_simul.calculate_AWCI_sigmas()
        gass_simul.calculate_RBCI_sigmas(max_iter = 100)
        
        # Store results
        beta_x1 = pd.to_numeric(gass_simul.coefficients[0][0])
        beta_s_x2 = pd.to_numeric(gass_simul.coefficients[1][0])
        beta_s_pois = pd.to_numeric(gass_simul.coefficients[2][0])
        sigma_s_x2 = pd.to_numeric(gass_simul.sigmas[0])
        sigma_s_pois = pd.to_numeric(gass_simul.sigmas[1])
        
        lb_awci_sigma_s_x2 = pd.to_numeric(gass_simul.AWCI_sigmas[0][0])
        ub_awci_sigma_s_x2 = pd.to_numeric(gass_simul.AWCI_sigmas[0][1])       
        lb_awci_sigma_s_pois = pd.to_numeric(gass_simul.AWCI_sigmas[1][0])
        ub_awci_sigma_s_pois = pd.to_numeric(gass_simul.AWCI_sigmas[1][1])
        
        lb_rbci_sigma_s_x2 = pd.to_numeric(gass_simul.RBCI_sigmas[0][0])
        ub_rbci_sigma_s_x2 = pd.to_numeric(gass_simul.RBCI_sigmas[0][1])
        lb_rbci_sigma_s_pois = pd.to_numeric(gass_simul.RBCI_sigmas[1][0])
        ub_rbci_sigma_s_pois = pd.to_numeric(gass_simul.RBCI_sigmas[1][1]) 

        # Write results to the CSV file
        writer.writerow([beta_x1, beta_s_x2, beta_s_pois, sigma_s_x2, sigma_s_pois, 
                         lb_awci_sigma_s_x2, ub_awci_sigma_s_x2, lb_awci_sigma_s_pois, ub_awci_sigma_s_pois,
                         lb_rbci_sigma_s_x2, ub_rbci_sigma_s_x2, lb_rbci_sigma_s_pois, ub_rbci_sigma_s_pois])

        # Track iteration progress
        print(n_iter)
        n_iter+=1

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


## 10% Noise

In [32]:
with open("../results/10%Noise.csv", "a", newline='') as csvfile_10:
    writer = csv.writer(csvfile_10)
    
    # Check if file is empty and write headers
    if csvfile_10.tell() == 0:
        writer.writerow(["beta_x1", "beta_s_x2", "beta_s_pois", "sigma_s_x2", "sigma_s_pois", 
                         "lb_awci_sigma_s_x2", "ub_awci_sigma_s_x2", "lb_awci_sigma_s_pois", "ub_awci_sigma_s_pois",
                         "lb_rbci_sigma_s_x2", "ub_rbci_sigma_s_x2", "lb_rbci_sigma_s_pois", "ub_rbci_sigma_s_pois"])
        
    # 1. Implement 100 Simulations
    n_iter = 0 # track iterations
    for i in range(100): # i is also the seed 

        # Spatial Systems
        np.random.seed(i)
        square = Polygon([Point((0, 0)), Point((0, 2000)), Point((2000, 2000)), Point((2000, 0))])
        squwin = as_window(square)

        # Stations
        np.random.seed(i)
        squsamples = PoissonPointProcess(squwin, 150, 1, conditioning=False, asPP=False)
        squpts_random = squsamples.realizations[0]

        # DataFrame for Stations
        random_df = pd.DataFrame(squpts_random, columns = ['Lon', 'Lat'])
        random_df = random_df.assign(Name = np.arange(random_df.shape[0]))
        digits = len(str(150))
        random_df.Name = random_df.Name.astype(str).str.zfill(digits).apply(lambda x: "{}{}".format('R', x))

        # Add attributes, X1 and X2, for Stations
        attr1 = np.random.uniform(2, 100, 150)
        attr2 = np.random.uniform(100, 500, 150)

        random_df = random_df.assign(X1 = attr1, X2 = attr2)

        # POIs
        np.random.seed(i * 2)
        squsamples_pois = PoissonPointProcess(squwin, 900, 1, conditioning=False, asPP=False)
        squpts_pois = squsamples_pois.realizations[0]

        # DataFrame for POIs
        pois_df = pd.DataFrame(squpts_pois, columns = ['Lon', 'Lat'])
        pois_df = pois_df.assign(Name = np.arange(pois_df.shape[0]))
        digits = len(str(900))
        pois_df.Name = pois_df.Name.astype(str).str.zfill(digits).apply(lambda x: "{}{}".format('I', x))

        # Add attribute for POIs
        attrI = np.ones(pois_df.shape[0])
        pois_df = pois_df.assign(AttributeI = attrI)

        # Necessary data for constructing `LinearTerm` of X1, and `DistanceWeighting` smoother of X2
        dta_simul = random_df[['Name', 'X1', 'X2']]
        dta_simul_gdf = random_df[['Name', 'Lon', 'Lat']]
        dta_simul_gdf = gpd.GeoDataFrame(dta_simul_gdf.copy(), geometry=gpd.points_from_xy(dta_simul_gdf.Lon, dta_simul_gdf.Lat))

        # Necessary data for constructing `DistanceWeighting` smoother of POIs
        pois_simul_df = pois_df[['Name', 'AttributeI']]
        pois_simul_gdf = pois_df[['Name', 'Lon', 'Lat']]
        pois_simul_gdf = gpd.GeoDataFrame(pois_simul_gdf.copy(), geometry=gpd.points_from_xy(pois_simul_gdf.Lon, pois_simul_gdf.Lat))
        map_simul_gdf = pd.concat([dta_simul_gdf, pois_simul_gdf]) # A map includes both the stations and POIs 

        # Construct spatial smoothers for X1, X2 and POIs
        lin_simul = LinearTerm(dta_simul, 1, standard = True) # X1
        sws_simul_X2 = SpatialWeightSmoother(dta_simul, dta_simul_gdf, [0,0,2], standard = True) # smoothing X2
        dw_simul_pois = DistanceWeighting(dta_simul, map_simul_gdf, pois_simul_df, [0,0,0,1], standard = True)# smoothing POI

        # Set the true values of distance decay parameters for s(X2) and s(POIs)
        s_X2_simul = sws_simul_X2.cal(-0.5) #s(X2)
        s_POIs_simul = dw_simul_pois.cal(-1.5) #s(POIs)

        # Set the true values of coefficients for X1, s(X2), and s(POIs)
        coefs_simul = [1,1,1] 

        # Generate y with adding 10% noise 
        np.random.seed(i)
        X_simul = np.hstack((lin_simul.X, s_X2_simul, s_POIs_simul)) 
        y_fitted = np.dot(X_simul, coefs_simul).reshape(-1,1)
        y_simul = y_fitted + np.random.normal(0, error_term_sdd(y_fitted, 0.10), (dta_simul.shape[0],1)).reshape(-1,1) # 0.10: 10%

        # Fit GASS model
        gass_simul = GASS(y_simul, lin_simul, sws_simul_X2, dw_simul_pois, constant = False)
        gass_simul.fit_Gaussian()
        gass_simul.inference_Gaussian()
        gass_simul.calculate_AWCI_sigmas()
        gass_simul.calculate_RBCI_sigmas(max_iter = 100)

        # Store results
        beta_x1 = pd.to_numeric(gass_simul.coefficients[0][0])
        beta_s_x2 = pd.to_numeric(gass_simul.coefficients[1][0])
        beta_s_pois = pd.to_numeric(gass_simul.coefficients[2][0])
        sigma_s_x2 = pd.to_numeric(gass_simul.sigmas[0])
        sigma_s_pois = pd.to_numeric(gass_simul.sigmas[1])
        
        lb_awci_sigma_s_x2 = pd.to_numeric(gass_simul.AWCI_sigmas[0][0])
        ub_awci_sigma_s_x2 = pd.to_numeric(gass_simul.AWCI_sigmas[0][1])     
        lb_awci_sigma_s_pois = pd.to_numeric(gass_simul.AWCI_sigmas[1][0])
        ub_awci_sigma_s_pois = pd.to_numeric(gass_simul.AWCI_sigmas[1][1])
        
        lb_rbci_sigma_s_x2 = pd.to_numeric(gass_simul.RBCI_sigmas[0][0])
        ub_rbci_sigma_s_x2 = pd.to_numeric(gass_simul.RBCI_sigmas[0][1])
        lb_rbci_sigma_s_pois = pd.to_numeric(gass_simul.RBCI_sigmas[1][0])
        ub_rbci_sigma_s_pois = pd.to_numeric(gass_simul.RBCI_sigmas[1][1]) 

        # Write results to the CSV file
        writer.writerow([beta_x1, beta_s_x2, beta_s_pois, sigma_s_x2, sigma_s_pois, 
                         lb_awci_sigma_s_x2, ub_awci_sigma_s_x2, lb_awci_sigma_s_pois, ub_awci_sigma_s_pois,
                         lb_rbci_sigma_s_x2, ub_rbci_sigma_s_x2, lb_rbci_sigma_s_pois, ub_rbci_sigma_s_pois])

        # Track iteration progress
        print(n_iter)
        n_iter+=1

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


## 25% Noise

In [33]:
with open("../results/25%Noise.csv", "a", newline='') as csvfile_25:
    writer = csv.writer(csvfile_25)
    
    # Check if file is empty and write headers
    if csvfile_25.tell() == 0:
        writer.writerow(["beta_x1", "beta_s_x2", "beta_s_pois", "sigma_s_x2", "sigma_s_pois", 
                         "lb_awci_sigma_s_x2", "ub_awci_sigma_s_x2", "lb_awci_sigma_s_pois", "ub_awci_sigma_s_pois",
                         "lb_rbci_sigma_s_x2", "ub_rbci_sigma_s_x2", "lb_rbci_sigma_s_pois", "ub_rbci_sigma_s_pois"])
        
    # 1. Implement 100 Simulations
    n_iter = 0 # track iterations
    for i in range(2): # i is also the seed 

        # Spatial Systems
        np.random.seed(i)
        square = Polygon([Point((0, 0)), Point((0, 2000)), Point((2000, 2000)), Point((2000, 0))])
        squwin = as_window(square)

        # Stations
        np.random.seed(i)
        squsamples = PoissonPointProcess(squwin, 150, 1, conditioning=False, asPP=False)
        squpts_random = squsamples.realizations[0]

        # DataFrame for Stations
        random_df = pd.DataFrame(squpts_random, columns = ['Lon', 'Lat'])
        random_df = random_df.assign(Name = np.arange(random_df.shape[0]))
        digits = len(str(150))
        random_df.Name = random_df.Name.astype(str).str.zfill(digits).apply(lambda x: "{}{}".format('R', x))

        # Add attributes, X1 and X2, for Stations
        attr1 = np.random.uniform(2, 100, 150)
        attr2 = np.random.uniform(100, 500, 150)

        random_df = random_df.assign(X1 = attr1, X2 = attr2)

        # POIs
        np.random.seed(i * 2)
        squsamples_pois = PoissonPointProcess(squwin, 900, 1, conditioning=False, asPP=False)
        squpts_pois = squsamples_pois.realizations[0]

        # DataFrame for POIs
        pois_df = pd.DataFrame(squpts_pois, columns = ['Lon', 'Lat'])
        pois_df = pois_df.assign(Name = np.arange(pois_df.shape[0]))
        digits = len(str(900))
        pois_df.Name = pois_df.Name.astype(str).str.zfill(digits).apply(lambda x: "{}{}".format('I', x))

        # Add attribute for POIs
        attrI = np.ones(pois_df.shape[0])
        pois_df = pois_df.assign(AttributeI = attrI)

        # Necessary data for constructing `LinearTerm` of X1, and `DistanceWeighting` smoother of X2
        dta_simul = random_df[['Name', 'X1', 'X2']]
        dta_simul_gdf = random_df[['Name', 'Lon', 'Lat']]
        dta_simul_gdf = gpd.GeoDataFrame(dta_simul_gdf.copy(), geometry=gpd.points_from_xy(dta_simul_gdf.Lon, dta_simul_gdf.Lat))

        # Necessary data for constructing `DistanceWeighting` smoother of POIs
        pois_simul_df = pois_df[['Name', 'AttributeI']]
        pois_simul_gdf = pois_df[['Name', 'Lon', 'Lat']]
        pois_simul_gdf = gpd.GeoDataFrame(pois_simul_gdf.copy(), geometry=gpd.points_from_xy(pois_simul_gdf.Lon, pois_simul_gdf.Lat))
        map_simul_gdf = pd.concat([dta_simul_gdf, pois_simul_gdf]) # A map includes both the stations and POIs 

        # Construct spatial smoothers for X1, X2 and POIs
        lin_simul = LinearTerm(dta_simul, 1, standard = True) #X1
        sws_simul_X2 = SpatialWeightSmoother(dta_simul, dta_simul_gdf, [0,0,2], standard = True) # smoothing X2
        dw_simul_pois = DistanceWeighting(dta_simul, map_simul_gdf, pois_simul_df, [0,0,0,1], standard = True)# smoothing POI

        # Set the true values of distance decay parameters for s(X2) and s(POIs)
        s_X2_simul = sws_simul_X2.cal(-0.5) # s(X2)
        s_POIs_simul = dw_simul_pois.cal(-1.5) # s(POIs)

        # Set the true values of coefficients for X1, s(X2), and s(POIs)
        coefs_simul = [1,1,1] 

        # Generate y with adding 25% noise 
        np.random.seed(i)
        X_simul = np.hstack((lin_simul.X, s_X2_simul, s_POIs_simul)) 
        y_fitted = np.dot(X_simul, coefs_simul).reshape(-1,1)
        y_simul = y_fitted + np.random.normal(0, error_term_sdd(y_fitted, 0.25), (dta_simul.shape[0],1)).reshape(-1,1) # 0.25: 25%

        # Fit GASS model
        gass_simul = GASS(y_simul, lin_simul, sws_simul_X2, dw_simul_pois, constant = False)
        gass_simul.fit_Gaussian()
        gass_simul.inference_Gaussian()
        gass_simul.calculate_AWCI_sigmas()
        gass_simul.calculate_RBCI_sigmas(max_iter = 100)
        
        # Store results
        beta_x1 = pd.to_numeric(gass_simul.coefficients[0][0])
        beta_s_x2 = pd.to_numeric(gass_simul.coefficients[1][0])
        beta_s_pois = pd.to_numeric(gass_simul.coefficients[2][0])
        sigma_s_x2 = pd.to_numeric(gass_simul.sigmas[0])
        sigma_s_pois = pd.to_numeric(gass_simul.sigmas[1])
        
        lb_awci_sigma_s_x2 = pd.to_numeric(gass_simul.AWCI_sigmas[0][0])
        ub_awci_sigma_s_x2 = pd.to_numeric(gass_simul.AWCI_sigmas[0][1])       
        lb_awci_sigma_s_pois = pd.to_numeric(gass_simul.AWCI_sigmas[1][0])
        ub_awci_sigma_s_pois = pd.to_numeric(gass_simul.AWCI_sigmas[1][1])
        
        lb_rbci_sigma_s_x2 = pd.to_numeric(gass_simul.RBCI_sigmas[0][0])
        ub_rbci_sigma_s_x2 = pd.to_numeric(gass_simul.RBCI_sigmas[0][1])
        lb_rbci_sigma_s_pois = pd.to_numeric(gass_simul.RBCI_sigmas[1][0])
        ub_rbci_sigma_s_pois = pd.to_numeric(gass_simul.RBCI_sigmas[1][1]) 

        # Write results to the CSV file
        writer.writerow([beta_x1, beta_s_x2, beta_s_pois, sigma_s_x2, sigma_s_pois, 
                         lb_awci_sigma_s_x2, ub_awci_sigma_s_x2, lb_awci_sigma_s_pois, ub_awci_sigma_s_pois,
                         lb_rbci_sigma_s_x2, ub_rbci_sigma_s_x2, lb_rbci_sigma_s_pois, ub_rbci_sigma_s_pois])

        # Track iteration progress
        print(n_iter)
        n_iter+=1

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
