In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import pandas as pd
import numpy as np
import matplotlib as mpl

import matplotlib.pyplot as plt
import platform
if platform.system() == "Windows":
    plt.rcParams['font.family'] = ['simhei',] # windows->Heiti TC
else:
    plt.rcParams['font.family'] = ['Heiti TC',] # windows->Heiti TC
    
plt.rcParams['axes.unicode_minus'] = False # windows->simhei
plt.rcParams['figure.dpi'] = 200

import geopandas as gpd
import shapely
from shapely.geometry import Point,Polygon,LineString,MultiLineString,MultiPoint,MultiPolygon

import pickle
from tqdm import tqdm
from glob import glob
import joblib

In [2]:
import scipy.stats as stats

In [3]:
import os

import platform
if platform.system() == "Windows":
    os.chdir(r'H:\BaiduSyncdisk\DR.MENG-Full\Y2024-002-DT-NANJING-ACCESSIBILITY-MAUP')
else:
    os.chdir(r'/Volumes/SANDISK/DR.MENG')

# SAMPLE

## pt

In [4]:
pt_buildings_1800 = gpd.read_file('./data_access_geojson/od_pt_buildings_1800.geojson')
pt_buildings_900 = gpd.read_file('./data_access_geojson/od_pt_buildings_900.geojson')

pt_grid250_1800 = gpd.read_file('./data_access_geojson/od_pt_grid250_od_1800.geojson')
pt_grid250_900 = gpd.read_file('./data_access_geojson/od_pt_grid250_od_900.geojson')

In [5]:
pt_grid500_1800 = gpd.read_file('./data_access_geojson/od_pt_grid250_od_1800.geojson')
pt_grid500_900 = gpd.read_file('./data_access_geojson/od_pt_grid250_od_900.geojson')

In [6]:
pt_grid1000_1800 = gpd.read_file('./data_access_geojson/od_pt_grid1000_od_1800.geojson')
pt_community_1800 = gpd.read_file('./data_access_geojson/od_pt_community_od_1800.geojson')

In [7]:
def get_mannwhitneyu(file_name):
    sample = gpd.read_file(file_name)
    
    if 'buildings' in file_name:
        sample['geometry'] = sample['geometry'].representative_point()
        center = gpd.read_file(r".\shp\central\nanjing_central.shp")
        sample = gpd.sjoin(sample, center, predicate='within')
    elif 'grid' in file_name or 'community' in file_name:
        pass
    else:
        print('error...')
        return None
    
    sample = sample.sort_values(by='access_val')
    sample_99 = sample['access_val'].quantile(0.99)
    sample = sample[(sample['access_val'] > 0)]
    sample = sample[(sample['access_val'] > 0) & (sample['access_val'] < sample_99)]

    sample_a = sample[sample['Id'] == 'south']['access_val'].values
    sample_b = sample[sample['Id'] == 'north']['access_val'].values
    
    u, p_value = stats.mannwhitneyu(sample_a, sample_b, alternative='two-sided')
    
    South = round(np.median(sample_a), 2)
    North = round(np.median(sample_b), 2)
    
    u, p_value = round(u, 2), round(p_value, 4)
    # 输出结果
#     print("U值:", u)
#     print("P值:", p_value)
    
#     print("South median", South)
#     print("North median:", North)

    # 解释结果
    if p_value < 0.05:
        # print("两组之间存在显著差异。")
        diff = 'YES'
    else:
        # print("两组之间没有显著差异。")
        diff = 'NO'
    
    name = os.path.basename(file_name)
    # print(name)
    
    dic = {}
    dic['name'] = name
    dic['South_median'] = South
    dic['North_median'] = North
    dic['u'] = u
    dic['p_value'] = p_value
    dic['diff'] = diff
    
    dxx = pd.DataFrame([dic])
    
    return dxx

In [8]:
file_name = './data_access_geojson/od_pt_grid250_od_1800.geojson'
get_mannwhitneyu(file_name)

Unnamed: 0,name,South_median,North_median,u,p_value,diff
0,od_pt_grid250_od_1800.geojson,3.64,9.2,3280134.0,0.0,YES


In [9]:
file_name = './data_access_geojson/od_pt_community_od_900.geojson'
get_mannwhitneyu(file_name)

Unnamed: 0,name,South_median,North_median,u,p_value,diff
0,od_pt_community_od_900.geojson,3.39,4.33,21.0,0.9565,NO


In [10]:
file_name = './data_access_geojson/od_pt_buildings_1800.geojson'
get_mannwhitneyu(file_name)

Unnamed: 0,name,South_median,North_median,u,p_value,diff
0,od_pt_buildings_1800.geojson,3.56,2.92,385763895.5,0.0,YES


# ALL DATA

In [11]:
files = glob('./data_access_geojson/*.geojson')

In [12]:
len(files)

72

In [13]:
dfa = pd.concat([get_mannwhitneyu(file_name) for file_name in files], ignore_index=True)

In [14]:
dfa.head()

Unnamed: 0,name,South_median,North_median,u,p_value,diff
0,od_cycle_buildings_1800.geojson,9.41,6.9,922306924.0,0.0,YES
1,od_cycle_buildings_900.geojson,3.95,3.12,438109027.0,0.0,YES
2,od_cycle_community_od_1800.geojson,6.16,6.93,3763.0,0.649,NO
3,od_cycle_community_od_900.geojson,3.3,7.8,1208.0,0.0988,NO
4,od_cycle_grid1000_od_1800.geojson,6.23,7.88,75451.0,0.6422,NO


In [15]:
dfa['transit'] = dfa['name'].str.split('_').str.get(1)
dfa['grid_type'] = dfa['name'].str.split('_').str.get(2)
dfa['threshold'] = dfa['name'].str.split('_').str.get(-1).str.replace('.geojson','')
dfa.head()

Unnamed: 0,name,South_median,North_median,u,p_value,diff,transit,grid_type,threshold
0,od_cycle_buildings_1800.geojson,9.41,6.9,922306924.0,0.0,YES,cycle,buildings,1800
1,od_cycle_buildings_900.geojson,3.95,3.12,438109027.0,0.0,YES,cycle,buildings,900
2,od_cycle_community_od_1800.geojson,6.16,6.93,3763.0,0.649,NO,cycle,community,1800
3,od_cycle_community_od_900.geojson,3.3,7.8,1208.0,0.0988,NO,cycle,community,900
4,od_cycle_grid1000_od_1800.geojson,6.23,7.88,75451.0,0.6422,NO,cycle,grid1000,1800


In [16]:
dfa.drop('name', axis=1, inplace=True)

In [17]:
dfa

Unnamed: 0,South_median,North_median,u,p_value,diff,transit,grid_type,threshold
0,9.41,6.90,922306924.0,0.0000,YES,cycle,buildings,1800
1,3.95,3.12,438109027.0,0.0000,YES,cycle,buildings,900
2,6.16,6.93,3763.0,0.6490,NO,cycle,community,1800
3,3.30,7.80,1208.0,0.0988,NO,cycle,community,900
4,6.23,7.88,75451.0,0.6422,NO,cycle,grid1000,1800
...,...,...,...,...,...,...,...,...
67,7.93,62.65,198467.0,0.0000,YES,walk,grid250,900
68,6.33,6.02,210.0,0.8958,NO,walk,grid3000,1800
69,4.72,6.02,30.0,0.7629,NO,walk,grid3000,900
70,4.62,17.25,152712.0,0.0000,YES,walk,grid500,1800


In [18]:
def get_pivot(sample):
    dic = {
        'buildings': 'buildings',
        'grid250': 'grid0250',
        'grid500': 'grid0500',
        'grid1000': 'grid1000',
        'grid1500': 'grid1500',
        'grid2000': 'grid2000',
        'grid2500': 'grid2500',
        'grid3000': 'grid3000',
        'community': 'community',
    }
    sample['grid_type'] = sample['grid_type'].map(dic)
    sample = sample[['p_value','threshold','grid_type']]
    result = sample.pivot(index='threshold', columns='grid_type', values='p_value')
    result = result.round(3)
    return result

In [19]:
result = dfa.groupby(by='transit').apply(get_pivot)

In [22]:
result.sort_index(ascending=False)

Unnamed: 0_level_0,grid_type,buildings,community,grid0250,grid0500,grid1000,grid1500,grid2000,grid2500,grid3000
transit,threshold,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
walk,900,0.0,0.329,0.0,0.0,0.0,0.001,0.425,0.09,0.763
walk,1800,0.0,0.156,0.0,0.0,0.0,0.038,0.108,0.123,0.896
pt,900,0.0,0.956,0.0,0.0,0.041,0.071,0.21,0.111,0.333
pt,1800,0.0,0.102,0.0,0.0,0.0,0.004,0.553,0.104,0.928
nav,900,0.0,0.009,0.0,0.0,0.0,0.056,0.407,0.115,0.105
nav,1800,0.0,0.712,0.0,0.023,0.775,0.825,0.501,0.929,0.514
cycle,900,0.0,0.099,0.0,0.0,0.0,0.023,0.316,0.097,0.808
cycle,1800,0.0,0.649,0.0,0.045,0.642,0.977,0.793,0.747,0.816


In [None]:
"""
.py file can be found in py folder. N01-mannwhitney-U-test.py
"""