### 0. Library

In [69]:
import pandas as pd
import numpy as np
from pyproj import Proj, transform
from haversine import haversine

### 1. Data

In [None]:
path = "C:\\Users\\SOHYEUN\\Desktop\\BOAZ_광진구\\광진구_최종\\"

center = pd.read_csv(path + "center.csv")
park = pd.read_csv(path + "park.csv", encoding='cp949')
bus = pd.read_csv(path + "bus.csv", encoding='cp949')
subway = pd.read_excel(path + "subway.xlsx")
tow = pd.read_csv(path + "tow.csv", encoding='cp949')

### 2. Data Preprocessing

In [None]:
# Calculate the center of the hexagon by averaging the left-right and bottom-top values
center['X'] = (center['left'] + center['right']) / 2
center['Y'] = (center['bottom'] + center['top']) / 2
center.drop(columns=['left', 'right', 'bottom', 'top', '최종_레이블_결과(2)_행정동', '최종_레이블_결과(2)_클러스터링', 
                     '최종_레이블_결과(2)_유동인구', '최종_레이블_결과(2)_견인횟수', '최종_레이블_결과(2)_유동인구_레이블', '최종_레이블_결과(2)_견인횟수_레이블'], inplace=True)
center.dropna(inplace=True)
center.reset_index(drop=True, inplace=True)

In [None]:
# Coordinate transformation (UTM-K to WGS84)
original_crs = Proj(init='epsg:5179')  # UTM-K
target_crs = Proj(init='epsg:4326')  # WGS84

def convert_coordinates(x, y):
    lon, lat = transform(original_crs, target_crs, x, y)
    return lat, lon

center['경도'], center['위도'] = zip(*center.apply(lambda row: convert_coordinates(row['X'], row['Y']), axis=1))
center.drop(columns=['row_index', 'col_index', 'X', 'Y'], inplace=True)
center.rename(columns={'최종_레이블_결과(2)_유동인구+견인횟수': '유동인구+견인횟수', '위도': '경도', '경도': '위도'}, inplace=True)

### 3. Data Analysis

In [None]:
# Prepare location points for different datasets
center_points = np.array([list(i) for i in zip(center['위도'], center['경도'])])
bus_points = np.array([list(i) for i in zip(bus['위도'], bus['경도'])])
subway_points = np.array([list(i) for i in zip(subway['위도'], subway['경도'])])
park_points = np.array([list(i) for i in zip(park['위도'], park['경도'])])
tow_points = np.array([list(i) for i in zip(tow['위도'], tow['경도'])])

# Combine all location points
X = list(bus['경도']) + list(subway['경도']) + list(park['경도']) + list(tow['경도'])
Y = list(bus['위도']) + list(subway['위도']) + list(park['위도']) + list(tow['위도'])
points = np.array([list(i) for i in zip(X, Y)])

In [None]:
# Calculate weights for balancing data distribution
total_w = points.shape[0]
bus_w = bus_points.shape[0]
subway_w = subway_points.shape[0]
park_w = park_points.shape[0]
tow_w = tow_points.shape[0]

m1 = (total_w - bus_w) / total_w
m2 = (total_w - subway_w) / total_w
m3 = (total_w - park_w) / total_w
m4 = (total_w - tow_w) / total_w

In [None]:
# Define p-median function to calculate distance weights
def pmedian(center_points, bus_points, subway_points, park_points, tow_points):
    # Calculate distances for each location type
    def calculate_distances(center_points, points):
        return [[haversine(i, j) * 1000 for j in points] for i in center_points]

    # Calculate weighted distance matrices for each point type
    havers1 = calculate_distances(center_points, bus_points)
    havers2 = calculate_distances(center_points, subway_points)
    havers3 = calculate_distances(center_points, park_points)
    havers4 = calculate_distances(center_points, tow_points)

    # Create DataFrames for each location's distance matrix
    location = list(center['id'])
    location1 = list(bus['정류소명'])
    location2 = list(subway['역명'])
    location3 = list(park['주차장명'])
    location4 = list(tow['주소'])

    havers_D1 = dict(zip(location, [dict(zip(location1, i)) for i in havers1]))
    havers_D2 = dict(zip(location, [dict(zip(location2, i)) for i in havers2]))
    havers_D3 = dict(zip(location, [dict(zip(location3, i)) for i in havers3]))
    havers_D4 = dict(zip(location, [dict(zip(location4, i)) for i in havers4]))

    # Convert distance matrices to DataFrames
    D1 = pd.DataFrame(havers_D1)
    D2 = pd.DataFrame(havers_D2)
    D3 = pd.DataFrame(havers_D3)
    D4 = pd.DataFrame(havers_D4)

    # Assign weights based on minimum distance
    def assign_weights(D, min_values, weight):
        for i in D.index:
            for j in D.columns:
                D.loc[i, j] = weight if D.loc[i, j] == min_values[i] else 0
        return D

    bus_min = D1.min(axis=1)
    subway_min = D2.min(axis=1)
    park_min = D3.min(axis=1)
    tow_min = D4.min(axis=1)

    D1 = assign_weights(D1, bus_min, m1)
    D2 = assign_weights(D2, subway_min, m2)
    D3 = assign_weights(D3, park_min, m3)
    D4 = assign_weights(D4, tow_min, m4)

    # Combine all DataFrames
    D_final = pd.concat([D1, D2, D3, D4])

    return D_final

In [81]:
D_final = pmedian(center_points, bus_points, subway_points, park_points, tow_points)

In [None]:
# Summarize results and save to CSV
result_df = pd.DataFrame(D_final.sum().sort_values(ascending=False), columns=['Weight'])
result_df.reset_index(inplace=True)

In [None]:
# Top 30 locations by weight
result_df_30 = result_df[:30]
result_df_30.to_csv(path + "P-median_30.csv", index=False)