In [10]:
import alphashape
from descartes import PolygonPatch
import folium
import geopandas as gpd
from geopy.geocoders import Nominatim
from ipywidgets import interact, fixed, widgets
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import numpy as np
import osmnx as ox
import pandas as pd
from shapely import geometry
from pyproj import CRS
import shapely
from shapely.ops import unary_union
from shapely.geometry import Point, Polygon
from itertools import chain, combinations
import sys
import os

import warnings
warnings.filterwarnings("ignore")

In [20]:
current = os.path.dirname(os.path.abspath("__file__"))
parent_dir = os.path.split(current)[0]

In [18]:
# Boundary data of Pingtung County
ping = gpd.read_file(os.path.join(parent_dir, "Taiwan_county", "COUNTY_MOI_1090820.shp"), encoding='utf-8')
ping = ping[ping['COUNTYNAME']=='屏東縣']

# Boundary data of the villages of Pingtung County
gdf_vill = gpd.read_file(os.path.join(parent_dir, "PINGVILL", "VILLAGE_MOI_1101214.shp"),encoding='utf-8')
gdf_vill_ping = gdf_vill[gdf_vill['COUNTYNAME']=='屏東縣']
vill_sanhe = gpd.read_file(os.path.join(parent_dir, "PINGVILL", "Village_Sanhe.shp"),encoding='utf-8')
gdf_vill_ping = pd.concat([gdf_vill_ping, vill_sanhe])
gdf_vill_ping = gdf_vill_ping.reset_index(drop=True)
gdf_vill_ping = gdf_vill_ping[['TOWNNAME','VILLNAME','geometry']]
gdf_vill_ping = gdf_vill_ping.drop(gdf_vill_ping[gdf_vill_ping['TOWNNAME']=='琉球鄉'].index)
gdf_vill_ping=gdf_vill_ping.reset_index(drop=True)

# Population data of Pingtung County
ping_pop_no = pd.read_excel(os.path.join(parent_dir, "test_patient_data", "屏東人口數.xls"), index_col=0)

# Matching population to village's name
map_vill_name = [gdf_vill_ping['VILLNAME'][i] for i in range(len(gdf_vill_ping))]
excel_vil_name = [ping_pop_no['鄉村名'][i] for i in range(len(ping_pop_no))]

gdf_vill_ping['pop']=0
for i in ping_pop_no['鄉村名']:
    for j in range(len(gdf_vill_ping['VILLNAME'])):
        if(gdf_vill_ping.at[j,'VILLNAME'] == i):
            gdf_vill_ping.at[j,'pop'] = ping_pop_no.at[excel_vil_name.index(i),'人口數']
            
gdf_vill_ping['sam_p_1'] = gdf_vill_ping['pop'] / sum(gdf_vill_ping['pop'])
vill_sampling_1 = [gdf_vill_ping.at[i,'sam_p_1'] for i in range(len(gdf_vill_ping))]

gdf_vill_ping['sam_p_2'] = gdf_vill_ping['pop'] / gdf_vill_ping['geometry'].area
vill_sampling_2 = [gdf_vill_ping.at[i,'sam_p_2']/sum(gdf_vill_ping['sam_p_2']) for i in range(len(gdf_vill_ping))]

In [None]:
def pat_data_generator(number_of_data, data_type, iter_no):
    # set random seed
    np.random.seed(iter_no)
    # patient data
    pat_data = gpd.GeoSeries()
    # uniform sampling from the boundary
    if(data_type == 1):
        x_min, y_min, x_max, y_max = ping.total_bounds
        x_min=120.42
        while(len(pat_data) < number_of_data):
            re_pat_no = number_of_data - len(pat_data)
            for i in range(re_pat_no):
                x = np.random.uniform(x_min, x_max)
                y = np.random.uniform(y_min, y_max)
                pat_data=pat_data.append(gpd.GeoSeries(Point(x, y)))
            pat_data = pat_data[pat_data.within(ping.at[21,'geometry'])]
    
    # sampling based on population
    elif(data_type == 2):
        while(len(pat_data) < number_of_data):
            sam_vill_ind = np.random.choice([i for i in gdf_vill_ping.index], p = vill_sampling_1)
            x_min, y_min, x_max, y_max = gdf_vill_ping['geometry'][sam_vill_ind].bounds
            x = np.random.uniform(x_min, x_max)
            y = np.random.uniform(y_min, y_max)
            pat_data = pat_data.append(gpd.GeoSeries(Point(x, y)))
            pat_data = pat_data[pat_data.within(ping.at[21,'geometry'])]
    
    # sampling based on population density
    elif(data_type == 3):
        while(len(pat_data) < number_of_data):
            sam_vill_ind = np.random.choice([i for i in gdf_vill_ping.index], p = vill_sampling_2)
            x_min, y_min, x_max, y_max = gdf_vill_ping['geometry'][sam_vill_ind].bounds
            x = np.random.uniform(x_min, x_max)
            y = np.random.uniform(y_min, y_max)
            pat_data = pat_data.append(gpd.GeoSeries(Point(x, y)))
            pat_data = pat_data[pat_data.within(ping.at[21,'geometry'])]
    
    pat_data = pat_data.reset_index(drop=True)
    
    pat_data_info = pd.DataFrame(pat_data)
    pat_data_info.rename(columns = {0:'loc'}, inplace = True)
    for i in range(len(pat_data_info)):
        pat_data_info.at[i,'stroke_p'] = np.random.random()
    
    return pat_data_info

In [4]:
data1=[]
for random_seed in range(50):
    pat_data_info_1 = pat_data_generator(1000,1,random_seed)
    data1.append(pat_data_info_1)
Excelwriter = pd.ExcelWriter("data_generate_1.xlsx",engine="xlsxwriter")

for i, df in enumerate (data1):
    df.to_excel(Excelwriter, sheet_name="Sheet" + str(i+1),index=False)
Excelwriter.save()

In [5]:
data2=[]
for random_seed in range(50):
    pat_data_info_2 = pat_data_generator(1000,2,random_seed)
    data2.append(pat_data_info_2)
Excelwriter = pd.ExcelWriter("data_generate_2.xlsx",engine="xlsxwriter")

for i, df in enumerate (data2):
    df.to_excel(Excelwriter, sheet_name="Sheet" + str(i+1),index=False)
Excelwriter.save()

In [6]:
data3=[]
for random_seed in range(50):
    pat_data_info_3 = pat_data_generator(1000,3,random_seed)
    data3.append(pat_data_info_3)
Excelwriter = pd.ExcelWriter("data_generate_3.xlsx",engine="xlsxwriter")

for i, df in enumerate (data3):
    df.to_excel(Excelwriter, sheet_name="Sheet" + str(i+1),index=False)
Excelwriter.save()