In [100]:
import numpy as np
from copy import deepcopy
from spglm.iwls import iwls, _compute_betas_gwr
from spglm.family import *
from mgwr.search import golden_section
import pandas as pd
import statsmodels.api as sm
import geopandas as gp
import matplotlib.pyplot as plt
import libpysal.weights as sw

## Spatial Smooth

In [101]:
def SpatialSmooth(m, X_df, pid, geomap, sigma):
    
    '''
    m: the index of the column storing the quality of destination
    X_df: the dataframe containing the predictors columns; 
          by default, we assume X_df has the columns with those names:
            `Origin`: the name or code of origin
            `Destination`: the name or code of destination
            `Code`: `Origi` + `Destination` 
            `Oi`, 'Wj', 'Dij': No need to have the same column name
            `Dij`: In the future, users do not need to provide it, the distance between origin ande destination in a flow will be calculate by using geomap automatically
    
    pid: the index of the column storing the name of the place in geomap # NUTS_ID, 1 in austrai_shp
    geomap: the file with coordinates information to calculate the distance
    sigma: sigma in the formula of accessibility in CDM
    '''
    Aij = np.zeros_like(X_df.iloc[:,m]).reshape((-1,1))
    
    X = X_df.copy()
        
    #obtain the quality of all destinations, mk
    j_df = X.drop_duplicates(subset = ['Destination'])
    j_df = j_df.set_index(j_df['Destination']).sort_index()
    mk = j_df.iloc[:,m].values.reshape((-1,1))
        
    #obtain all destinations
    all_places = geomap.iloc[:,pid].values.reshape((-1,1)) 
    num_j = len(all_places)
        
    #create the flow codes
    temp_codes = [['DELETE']]
        
    for place in all_places:
        temp_places = np.tile(place, (num_j-1,1))
        other_places = all_places[all_places != place].reshape((-1,1))
        r = temp_places + other_places
        temp_codes = np.vstack((temp_codes,r))
                
    temp_codes = temp_codes[temp_codes != 'DELETE'].reshape((-1,1))
     
    #calculate the accessibility
    W = sw.distance.DistanceBand.from_dataframe(geomap, threshold=99999, binary=False, alpha=sigma)
    W = W.full()[0]
    mask = W!=0
    temp_Aij = (W * mk)
    temp_Aij = temp_Aij[mask].reshape((num_j,num_j-1))
    temp_Aij = np.tile(np.sum(temp_Aij, axis=1), num_j).reshape((num_j,num_j))
    temp_Aij = temp_Aij[mask].reshape((-1,1))

    temp_Aij = pd.DataFrame(np.hstack((temp_codes, temp_Aij)), columns = ['Code', 'Aij'])

    X_Aij_df = pd.merge(X, temp_Aij, how = 'left', on =  ['Code'])

    Aij = pd.to_numeric(X_Aij_df['Aij'].values).reshape((-1,1))

    return Aij

## CompeteDestination

In [108]:
def CompeteDestination(y, v1, v2, v3, X_df, pid, geomap, is_Spatial=True, verbose=False):
    
    '''
    y: the array of the response variable
    v1: the index of the Oi column
    v2: the index of the Wj column
    v3: the index of the Dij column
    X_df: the dataframe containing the predictors columns; 
          by default, we assume X_df has the columns with those names:
            `Origin`: the name or code of origin
            `Destination`: the name or code of destination
            `Code`: `Origi` + `Destination` 
            `Oi`, 'Wj', 'Dij': No need to have the same column name
            `Dij`: In the future, users do not need to provide it, the distance between origin ande destination in a flow will be calculate by using geomap automatically
            
    geomap: the file with coordinate info of origin and destination
            must have a column storing the name or code of origin and destination
    pid: the index of the must-have column in geomap
    is_Spatial: whether we need to create a spatial smooth, by default it is true
    '''
    
    # build X
    def build_X(v1, v2, v3, X_df, pid, geomap, is_Spatial):
        
        Oi = X_df.iloc[:,v1]
        Wj = X_df.iloc[:,v2]
        Dij = X_df.iloc[:,v3]
        X = np.column_stack((Oi, Wj, Dij))
        
        if is_Spatial == True:
            #initialize the accissibility term with sigma set as -1
            init_Aij = SpatialSmooth(v2, X_df, pid, geomap, -1)
            X = np.hstack((X,init_Aij)) 
        return X
    
    X = build_X(v1, v2, v3, X_df, pid, geomap, is_Spatial)

    s_0 = np.log(np.mean(y))
    eta = s_0.reshape((-1, 1))
    s_old = np.zeros_like(X)
    crit = 9999
    sigma = np.inf
    
    # backfitting - inner loop
    def backfit(y, X, w, v2, X_df, pid, geomap, is_Spatial, verbose):
        n,k = X.shape
        betas = _compute_betas_gwr(y, np.log(X), w.reshape((-1, 1)))[0]
        #print(betas)
        XB = np.multiply(betas.T, np.log(X))
        yhat = np.dot(np.log(X), betas)
        err = y.reshape((-1, 1)) - yhat
        iters = 0
        scores = []
        delta = 1e6
        tol = 1e-8
        max_iter = 50
        sig = np.inf
        
        if is_Spatial == True:
            for iters in range(1, max_iter + 1):
                new_XB = np.zeros_like(X)
                params = np.zeros_like(betas)

                for j in range(k):
                    temp_y = XB[:, j].reshape((-1, 1))
                    temp_y = temp_y + err.reshape((-1, 1))
                    temp_X = np.log(X[:, j]).reshape((-1, 1))

                    if j == k-1:
                        score = lambda x: sm.OLS(temp_y, np.log(SpatialSmooth(v2, X_df, pid, geomap, x))).fit().aic
                        sig  = golden_section(-6, 6, 0.38197, score, 1e-2, 50)[0]
                        Aij = SpatialSmooth(v2, X_df, pid, geomap, sig)
                        X[:, j] = Aij.flatten()
                        temp_X = np.log(Aij).reshape((-1, 1))

                    beta = _compute_betas_gwr(temp_y, temp_X, w.reshape((-1, 1)))[0]
                    #print(beta)
                    yhat = np.dot(temp_X, beta)
                    new_XB[:, j] = yhat.flatten()
                    err = (temp_y - yhat).reshape((-1, 1))
                    params[j, :] = beta[0][0]
    
                num = np.sum((XB-new_XB)**2)
                den = 1 + np.sum(np.sum(XB, axis=1)**2)
                score = (num / den)
                XB = new_XB
        
                scores.append(deepcopy(score))
                delta = score

                if verbose:
                    print("Current iteration:", iters, ",SOC:", np.round(score, 8))
                if delta < tol:
                    break
                    
        elif is_Spatial == False:
            for iters in range(1, max_iter + 1):
                new_XB = np.zeros_like(X)
                params = np.zeros_like(betas)

                for j in range(k):
                    temp_y = XB[:, j].reshape((-1, 1))
                    temp_y = temp_y + err.reshape((-1, 1))
                    temp_X = np.log(X[:, j]).reshape((-1, 1))
                    beta = _compute_betas_gwr(temp_y, temp_X, w.reshape((-1, 1)))[0]
                    #print(beta)
                    yhat = np.dot(temp_X, beta)
                    new_XB[:, j] = yhat.flatten()
                    err = (temp_y - yhat).reshape((-1, 1))
                    params[j, :] = beta[0][0]
                    
                num = np.sum((XB-new_XB)**2)
                den = 1 + np.sum(np.sum(XB, axis=1)**2)
                score = (num / den)
                XB = new_XB
        
                scores.append(deepcopy(score))
                delta = score

                if verbose:
                    print("Current iteration:", iters, ",SOC:", np.round(score, 8))
                if delta < tol:
                    break

        return params, X, sig
    
    #local scoring - outer loop 
    while crit > 1e-8:
        mu = np.exp(eta).reshape((-1, 1))
        w = mu.reshape((-1,1))
        z = eta + ((y.reshape((-1, 1)) - mu) / mu)
        betas, X, sigma = backfit(z, X, w, v2, X_df, pid, geomap,is_Spatial, verbose)
        s_new = np.multiply(betas.T, np.log(X))
        inner = np.sum((s_old - s_new)**2, axis=1)
        num = np.sum(w*inner)
        den = np.sum(w*np.sum((1 + s_old)**2, axis=1).reshape((-1, 1)))
        crit = num / den
        eta = np.sum(s_new, axis=1).reshape((-1, 1))
        s_old = s_new
        
    return betas, eta, sigma

## Test

In [109]:
austria = pd.read_csv('Data/austria.csv')
austria_shp = gp.read_file('Data/austria.shp')
Dja = austria['Dj'].values 

In [110]:
austria = austria[austria['Origin'] != austria['Destination']]
flows = austria['Data'].values 
Oi = austria['Oi'].values 
Wj = austria['Dj'].values 
Dij = austria['Dij'].values 

In [111]:
X_df = austria.copy()
X_df['Code'] = X_df['Origin'] + X_df['Destination']
X_df = X_df.loc[:, ~X_df.columns.str.contains('^Unnamed')]
X_df = X_df[['Code','Origin', 'Destination', 'Data', 'Oi', 'Dj', 'Dij']] 

In [112]:
Aij = SpatialSmooth(5, X_df, 1, austria_shp, -1.35)
X = np.column_stack((Oi, Wj, Dij, Aij))
w = np.ones(72).reshape((-1,1))
betas = iwls(flows.reshape((-1,1)), np.log(X), family=Poisson(), offset=w.reshape((-1, 1)), y_fix=None, wi=w.reshape((-1, 1)))[0]
f = np.exp(np.dot(np.log(X), betas) + np.random.normal(0, .0001, (72,1))).reshape((-1,1))
betas_cd, eta_cd, sigma_cd = CompeteDestination(f, 4, 5, 6, X_df, 1, austria_shp, is_Spatial=True, verbose=False)


In [113]:
print('betas_iwls:', betas, 'betas_cd:', betas_cd,'sigma_cd:', sigma_cd, sep='\n')

betas_iwls:
[[ 0.71369558]
 [ 0.72409961]
 [-1.0584376 ]
 [-0.08370744]]
betas_cd:
[[ 0.7137392 ]
 [ 0.7241024 ]
 [-1.05849506]
 [-0.08372059]]
sigma_cd:
-1.35
