In [1]:
from geopy.distance import geodesic
from sklearn.metrics.pairwise import haversine_distances

center_locations = df_KIC[['center_name', 'center_lat', 'center_lon']].drop_duplicates().reset_index(drop=True)

import re
def normalize_center_name(name):
    name = name.strip()
    name = name.replace(' ', '').replace('\u3000', '')
    return name

center_locations['center_name'] = center_locations['center_name'].apply(normalize_center_name)
df_subway['center_name'] = df_subway['center_name'].apply(normalize_center_name)
df_road['center_name'] = df_road['center_name'].apply(normalize_center_name)

center_locations = center_locations.merge(df_road, on='center_name', how='left')
center_locations = center_locations.rename(columns={'road_score': 'road_score_100'})

def subway_score(distances, cutoff):
    filtered = [d for d in distances if d <= cutoff]
    if not filtered:
        return 0
    J_list = [(cutoff - d) / 20 for d in filtered]
    k_list = [1 / d for d in filtered]
    return sum(J * k for J, k in zip(J_list, k_list)) / sum(k_list)

def convenience_score(distances, cutoff):
    filtered = [d for d in distances if d <= cutoff]
    if not filtered:
        return 0
    J_list = [(cutoff - d) / 50 for d in filtered]
    k_list = [1 / d for d in filtered]
    return sum(J * k for J, k in zip(J_list, k_list)) / sum(k_list)

def bank_score(distances, cutoff):
    return 20 if any(d <= cutoff for d in distances) else 0

def calc_infra_scores(center_df, subway_df, max_neighbors=50, cutoff=2000):
    results = []
    for _, row in center_df.iterrows():
        name = row['center_name']
        lat, lon = row['center_lat'], row['center_lon']
        center_coord_rad = np.radians([[lat, lon]])
        convenience_coords_rad = np.radians(df_convenience[['위도', '경도']].dropna().values)
        bank_coords_rad = np.radians(df_bank[['위도', '경도']].dropna().values)

        s_dists = subway_df[subway_df['center_name'] == name]['distance'].tolist()
        c_dists = haversine_distances(center_coord_rad, convenience_coords_rad)[0] * 6371000
        b_dists = haversine_distances(center_coord_rad, bank_coords_rad)[0] * 6371000

        
        s_score = subway_score(s_dists, 1500)
        #c_score = convenience_score(c_dists, 1500)
        b_score = bank_score(b_dists, 1000)
        r_score = row.get('road_score_100', 0) or 0
        total_score = (s_score + b_score + r_score)

        results.append({
            'center_name': name,
            'center_lat': lat,
            'center_lon': lon,
            'subway_score': s_score,
            'bank_score': b_score,
            'road_score': r_score,
            'infra_score': total_score
        })

    return pd.DataFrame(results)

df_infra_score = calc_infra_scores(center_locations, df_subway)
df_infra_score = df_infra_score.groupby('center_name', as_index=False).mean()

df_infra_score.to_csv("./infra_score.csv", index=False, encoding="utf-8-sig")

NameError: name 'df_KIC' is not defined