### 0. Library

In [200]:
import pandas as pd
import numpy as np
from pyproj import Proj, transform
import math

### 1. Data

In [None]:
path = "C:\\Users\\SOHYEUN\\Desktop\\BOAZ_광진구\\광진구_최종\\"

center = pd.read_csv(path + "center.csv")
park = pd.read_csv(path + "park.csv", encoding='cp949')

### 2. Data Preprocessing

In [None]:
# Calculate the center of the hexagon by averaging the left-right and bottom-top values
center['X'] = (center['left'] + center['right']) / 2
center['Y'] = (center['bottom'] + center['top']) / 2
center.drop(columns=['left', 'right', 'bottom', 'top', '최종_레이블_결과(2)_행정동', '최종_레이블_결과(2)_클러스터링', 
                '최종_레이블_결과(2)_유동인구', '최종_레이블_결과(2)_견인횟수', '최종_레이블_결과(2)_유동인구_레이블', '최종_레이블_결과(2)_견인횟수_레이블'], inplace=True)
center.dropna(inplace=True)
center.reset_index(drop=True, inplace=True)
center.head()

In [None]:
# Coordinate transformation (UTM-K to WGS84)
original_crs = Proj(init='epsg:5179')  # UTM-K
target_crs = Proj(init='epsg:4326')  # WGS84

def convert_coordinates(x, y):
    lon, lat = transform(original_crs, target_crs, x, y)
    return lat, lon

center['경도'], center['위도'] = zip(*center.apply(lambda row: convert_coordinates(row['X'], row['Y']), axis=1))
center.drop(columns=['row_index', 'col_index', 'X', 'Y'], inplace=True)
center.rename(columns={'최종_레이블_결과(2)_유동인구+견인횟수': '유동인구+견인횟수', '위도': '경도', '경도': '위도'}, inplace=True)
center.head()

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  lon, lat = transform(original_crs, target_crs, x, y)


Unnamed: 0,id,유동인구+견인횟수,위도,경도
0,3110,4.0,37.545402,127.10971
1,3111,4.0,37.5445,127.109715
2,3112,4.0,37.543599,127.109719
3,2940,4.0,37.54404,127.106776
4,2941,4.0,37.543139,127.106781


### 3. Data Analysis

In [None]:
# Function to calculate the Haversine distance between two points (latitude, longitude)
def haversine_distance(lat1, lon1, lat2, lon2):

    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])

    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    r = 6371
    
    return r * c * 1000 

# Function to calculate the distances from a given center to all parks
def calculate_distances(center_lat, center_lon, park_df):
    distances = []
    
    for i, park_row in park_df.iterrows():
        park_lat = park_row['위도']
        park_lon = park_row['경도']
        distance = haversine_distance(center_lat, center_lon, park_lat, park_lon)
        distances.append(distance)
    
    return distances

center['park_distances'] = center.apply(lambda row: calculate_distances(row['위도'], row['경도'], park), axis=1)

In [None]:
# Function to count how many distances are within 200 meters
def count_distances_within_200m(distances):
    return sum(distance < 200 for distance in distances)

center['count_within_200m'] = center['park_distances'].apply(count_distances_within_200m)

In [None]:
# Count the occurrences of each value in the 'count_within_200m' column and sort the result by index
center['count_within_200m'].value_counts().sort_index()

0     647
1     205
2     105
3      86
4      54
5      58
6      23
7      29
8      23
9      27
10      9
11      2
12      3
13      6
14      1
15      2
16      3
17      1
18      1
20      1
21      1
Name: count_within_200m, dtype: int64

In [None]:
# Priority mapping based on the number of parks within 200 meters
priority_mapping = {
    21: 1, 20: 2, 18: 3, 17: 4, 16: 5, 15: 6, 14: 7, 13: 8,
    12: 9, 11: 10, 10: 11, 9: 12, 8: 13, 7: 14, 6: 15, 5: 16,
    4: 17, 3: 18, 2: 19, 1: 20, 0: 21
}

center['priority'] = center['count_within_200m'].map(priority_mapping)

In [None]:
# Sort the 'center' DataFrame by the 'priority' column in descending order
center_sorted = center.sort_values(by='priority', ascending=True)

# Update the 'priority' column to 99 if the distance between the current row and any previous row is less than 200 meters
for i in range(len(center_sorted)):
    lat1 = center_sorted.iloc[i]['위도']
    lon1 = center_sorted.iloc[i]['경도']
    for j in range(i):
        lat2 = center_sorted.iloc[j]['위도']
        lon2 = center_sorted.iloc[j]['경도']
        if haversine_distance(lat1, lon1, lat2, lon2) < 200:
            center_sorted.at[center_sorted.index[i], 'priority'] = 99
            break

center_sorted = center_sorted.sort_values(by='priority', ascending=True)
center_sorted['priority'].value_counts().sort_index()

1        1
8        1
11       2
12       2
13       1
15       2
16       3
17       2
19       2
20       7
21      16
99    1248
Name: priority, dtype: int64

### 4. Result

In [None]:
center.to_csv(path + "maximize_demand_within_radius.csv", index=False)
center_sorted.to_csv(path + "maximize_demand_within_radius_2.csv", index=False)