In [3]:
import pandas as pd
import geopandas as gpd
import numpy as np

import os
import pickle

In [4]:
os.chdir("../")
root_path = os.getcwd()

data_folder_path = os.path.join(root_path, 'data')
original_file_path = os.path.join(data_folder_path, 'original_data')
original_raw_file_path = os.path.join(original_file_path,'raw_data')
original_processed_file_path = os.path.join(original_file_path,'processed_data')

raw_file_folders = os.listdir(original_raw_file_path)

external_file_path = os.path.join(data_folder_path,'external_data')
external_raw_file_path = os.path.join(external_file_path,'raw_data')
external_processed_file_path = os.path.join(external_file_path,'processed_data')

In [44]:
def load_data():
    geo_df=gpd.read_file(os.path.join(original_raw_file_path,raw_file_folders[0],'4개지역_행정동.SHP'))
    bus_station_num_by_dong = pd.read_csv(os.path.join(external_processed_file_path,'bus_station_num_by_dong.csv'))
    bus_route_num_dtd = pd.read_csv(os.path.join(external_processed_file_path,'bus_route_num_dtd.csv'), index_col=[0])
    subway_route_num_dtd = pd.read_csv(os.path.join(external_processed_file_path,'subway_route_num_dtd.csv'),index_col=[0])
    real_distance = pd.read_csv(os.path.join(external_processed_file_path,'distance_center_dong_to_dong.csv'),index_col=[0])
    
    with open(os.path.join(external_processed_file_path, 'total_route_num.pickle'), 'rb') as f:
        total_route_num = pickle.load(f)
        
    return geo_df, bus_station_num_by_dong, bus_route_num_dtd, subway_route_num_dtd, real_distance, total_route_num

In [63]:
def Conv_Index_Matrix():
    
    geo_df, bus_station_num_by_dong, bus_route_num_dtd, subway_route_num_dtd, real_distance, total_route_num = load_data()
    
    def double_power_distance_weight(distance,df=real_distance):
        shape = df.shape
        names = df.columns.tolist()
        flatten_values = np.concatenate(df.values)
        weights = np.array([(1-(dist/distance)**2) if dist < distance else 0 for dist in flatten_values ])
        weights_df = pd.DataFrame(weights.reshape(shape), columns = names)
        weights_df.index = names
        return weights_df
    
    def Conv_In(geo_df=geo_df, bus_station_num_by_dong=bus_station_num_by_dong):
        bus_station_num_by_dong.columns = ['HDONG_NM','bus_station_num']
        conv_in_diag = geo_df[['HDONG_NM','AREA']].iloc[35:].reset_index(drop=True).merge(bus_station_num_by_dong).eval('conv_in_diag = bus_station_num/AREA*10000').conv_in_diag.values
        return np.diag(conv_in_diag)
    
    def Conv_Out(real_distance=real_distance, total_route_num=total_route_num, bus_route_num_dtd=bus_route_num_dtd, subway_route_num_dtd=subway_route_num_dtd):
        dong_order=real_distance.columns.tolist()

        total_bus_route = total_route_num.get('총버스노선수')
        conv_bus=np.sqrt(bus_route_num_dtd/total_bus_route)
        conv_bus=conv_bus.loc[dong_order,dong_order]
        
        total_subway_route = total_route_num.get('총지하철호선수')
        conv_subway=np.sqrt(subway_route_num_dtd/total_subway_route)
        conv_subway=conv_subway.loc[dong_order,dong_order]
        
        conv_out_df = np.multiply(
                        double_power_distance_weight(3000),
                        conv_bus+conv_subway
                    )
        return conv_out_df
    
    return Conv_In() + Conv_Out()

$Conv\_In_{n,n} = \begin{bmatrix}
    c_{1} & & \\
    & \ddots & \\
    & & c_{n}
  \end{bmatrix}\ \ s.t\ \ c_i={\# bus\ station\ in\ h_i \over AREA(h_i)}$