# NVDI PDP local Estimation

In [1]:
%pwd

'/mnt/d/15_Article/03_RStudio/PyCode_v241111'

In [2]:
%cd ..

/mnt/d/15_Article/03_RStudio


## Import Package 

In [3]:
from glob import glob
from joblib import dump, load
import joblib
import numpy as np
import pandas as pd
import random
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, KFold

## Load Data

### Connection NDVI

In [9]:
ori_df_list = []
pdp_df_list = []

for i in list(range(1, 11)):
    X_pdp = pd.read_parquet(f'03_Results/PdpLocal/PDP_Local_NDVI_{i}.parquet')
    length = int(X_pdp.shape[0]/41)
    print(length)
    ori_df_list.append(X_pdp.iloc[:length, :])
    
    y_ori_pred = X_pdp.iloc[:length, -1].to_list() * 41

    value_list = []
    for i in list(range(0, 41)):
        value_list = value_list + [0.05 * i] * length
    
    X_pdp['incre'] = value_list

    X_pdp['Pred_y_change'] = np.array(X_pdp['Pred_y']) - np.array(y_ori_pred)

    pdp_df_list.append(X_pdp)

38318
38318
38318
38317
38317
38317
38317
38317
38317
38317


In [10]:
pdp_df = pd.concat(pdp_df_list)
ori_df = pd.concat(ori_df_list)

In [11]:
pdp_df.shape

(15710093, 36)

In [12]:
ori_df.shape

(383173, 34)

### Build Local Connection

In [41]:
line_result_df = []
for line_row in range(ori_df.shape[0]):
    left = ori_df.iloc[line_row, 29]
    right = ori_df.iloc[line_row, 30]
    down = ori_df.iloc[line_row, 31]
    up = ori_df.iloc[line_row, 32]

    this_point_df = pdp_df[(pdp_df['lat']>down) & (pdp_df['lat']<up) & (pdp_df['lon']>left) & (pdp_df['lon']<right)]
    this_point_df = this_point_df[['incre', 'Pred_y_change']]

    pdp_local = this_point_df['Pred_y_change'].groupby(this_point_df['incre']).mean()
    #pdp_local = this_point_df
    pdp_local = pd.DataFrame(pdp_local).reset_index()
    
    X = pdp_local[['incre']]
    y = pdp_local['Pred_y_change']

    model = LinearRegression()
    model.fit(X, y)

    y_pred = model.predict(X)
    r2 = r2_score(y, y_pred)
    #print("R^2 Score:", r2, X.shape)
    
    lat = ori_df.iloc[line_row, 1]
    lon = ori_df.iloc[line_row, 2]

    line_result = [lat, lon, model.coef_[0], model.intercept_, r2]

    line_result_df.append(line_result)

In [42]:
result_df = pd.DataFrame(line_result_df, columns = ['lat', 'lon', 'coeff', 'intercept', 'r2'])

In [43]:
result_df.to_excel('03_Results/PdpConnectionCoef/PdpConnection_NDVI.xlsx')

### Connection NTL

In [44]:
ori_df_list = []
pdp_df_list = []

for i in list(range(1, 11)):
    X_pdp = pd.read_parquet(f'03_Results/PdpLocal/PDP_Local_NTL_{i}.parquet')
    length = int(X_pdp.shape[0]/41)
    print(length)
    ori_df_list.append(X_pdp.iloc[:length, :])
    
    y_ori_pred = X_pdp.iloc[:length, -1].to_list() * 41

    value_list = []
    for i in list(range(0, 41)):
        value_list = value_list + [0.05 * i] * length
    
    X_pdp['incre'] = value_list

    X_pdp['Pred_y_change'] = np.array(X_pdp['Pred_y']) - np.array(y_ori_pred)

    pdp_df_list.append(X_pdp)

38318
38318
38318
38317
38317
38317
38317
38317
38317
38317


In [45]:
pdp_df = pd.concat(pdp_df_list)
ori_df = pd.concat(ori_df_list)

In [46]:
pdp_df.shape

(15710093, 36)

In [47]:
ori_df.shape

(383173, 34)

### Build Local Connection

In [48]:
line_result_df = []
for line_row in range(ori_df.shape[0]):
    left = ori_df.iloc[line_row, 29]
    right = ori_df.iloc[line_row, 30]
    down = ori_df.iloc[line_row, 31]
    up = ori_df.iloc[line_row, 32]

    this_point_df = pdp_df[(pdp_df['lat']>down) & (pdp_df['lat']<up) & (pdp_df['lon']>left) & (pdp_df['lon']<right)]
    this_point_df = this_point_df[['incre', 'Pred_y_change']]

    pdp_local = this_point_df['Pred_y_change'].groupby(this_point_df['incre']).mean()
    #pdp_local = this_point_df
    pdp_local = pd.DataFrame(pdp_local).reset_index()
    
    X = pdp_local[['incre']]
    y = pdp_local['Pred_y_change']

    model = LinearRegression()
    model.fit(X, y)

    y_pred = model.predict(X)
    r2 = r2_score(y, y_pred)
    #print("R^2 Score:", r2, X.shape)
    
    lat = ori_df.iloc[line_row, 1]
    lon = ori_df.iloc[line_row, 2]

    line_result = [lat, lon, model.coef_[0], model.intercept_, r2]

    line_result_df.append(line_result)

In [49]:
result_df = pd.DataFrame(line_result_df, columns = ['lat', 'lon', 'coeff', 'intercept', 'r2'])

In [50]:
result_df.to_excel('03_Results/PdpConnectionCoef/PdpConnection_NTL.xlsx')

### Connection income

In [51]:
ori_df_list = []
pdp_df_list = []

for i in list(range(1, 11)):
    X_pdp = pd.read_parquet(f'03_Results/PdpLocal/PDP_Local_income_indiv_{i}.parquet')
    length = int(X_pdp.shape[0]/41)
    print(length)
    ori_df_list.append(X_pdp.iloc[:length, :])
    
    y_ori_pred = X_pdp.iloc[:length, -1].to_list() * 41

    value_list = []
    for i in list(range(0, 41)):
        value_list = value_list + [0.05 * i] * length
    
    X_pdp['incre'] = value_list

    X_pdp['Pred_y_change'] = np.array(X_pdp['Pred_y']) - np.array(y_ori_pred)

    pdp_df_list.append(X_pdp)

38318
38318
38318
38317
38317
38317
38317
38317
38317
38317


In [52]:
pdp_df = pd.concat(pdp_df_list)
ori_df = pd.concat(ori_df_list)

In [53]:
pdp_df.shape

(15710093, 36)

In [54]:
ori_df.shape

(383173, 34)

### Build Local Connection

In [55]:
line_result_df = []
for line_row in range(ori_df.shape[0]):
    left = ori_df.iloc[line_row, 29]
    right = ori_df.iloc[line_row, 30]
    down = ori_df.iloc[line_row, 31]
    up = ori_df.iloc[line_row, 32]

    this_point_df = pdp_df[(pdp_df['lat']>down) & (pdp_df['lat']<up) & (pdp_df['lon']>left) & (pdp_df['lon']<right)]
    this_point_df = this_point_df[['incre', 'Pred_y_change']]

    pdp_local = this_point_df['Pred_y_change'].groupby(this_point_df['incre']).mean()
    #pdp_local = this_point_df
    pdp_local = pd.DataFrame(pdp_local).reset_index()
    
    X = pdp_local[['incre']]
    y = pdp_local['Pred_y_change']

    model = LinearRegression()
    model.fit(X, y)

    y_pred = model.predict(X)
    r2 = r2_score(y, y_pred)
    #print("R^2 Score:", r2, X.shape)
    
    lat = ori_df.iloc[line_row, 1]
    lon = ori_df.iloc[line_row, 2]

    line_result = [lat, lon, model.coef_[0], model.intercept_, r2]

    line_result_df.append(line_result)

In [56]:
result_df = pd.DataFrame(line_result_df, columns = ['lat', 'lon', 'coeff', 'intercept', 'r2'])

In [57]:
result_df.to_excel('03_Results/PdpConnectionCoef/PdpConnection_income_indiv.xlsx')