In [15]:
import math
import pandas as pd

In [16]:
# haversine 공식 함수
def haversine_distance(lat1, lon1, lat2, lon2):
  # 위도와 경도를 라디안으로 변환
  lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])

  # Haversine 공식 계산
  dlon = lon2 - lon1
  dlat = lat2 - lat1
  a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
  c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
  # 지구 반지름
  radius = 6371
  # 두 지점 간의 거리 계산
  distance = radius * c

  return distance

# 아파트와 병원 간의 거리 계산 후 데이터프레임에 추가하는 함수
def calculate_distances(apartRow, medicalDf):
    distances = []
    for _, medicalRow in medicalDf.iterrows():
        distance = haversine_distance(apartRow['위도'], apartRow['경도'], medicalRow['위도'], medicalRow['경도'])
        distances.append(distance)
    return pd.Series(distances, index=medicalDf['이름'])

In [17]:
def upload_location_data():
    elementary = pd.read_csv('../preprocessed/교육시설/초등학교.csv')
    middle = pd.read_csv('../preprocessed/교육시설/중학교.csv')
    high = pd.read_csv('../preprocessed/교육시설/고등학교.csv')
    kinder = pd.read_csv('../preprocessed/교육시설/유치원.csv')
    daycare = pd.read_csv('../preprocessed/교육시설/어린이집.csv')
    academy = pd.read_csv('../preprocessed/교육시설/학원.csv')

    bus = pd.read_csv('../preprocessed/교통시설/버스정류장.csv')
    subway = pd.read_csv('../preprocessed/교통시설/역.csv')
    
    mart = pd.read_csv('../preprocessed/상업시설/마트.csv')

    public = pd.read_csv('../preprocessed/의료시설/보건병원.csv')
    nursing = pd.read_csv('../preprocessed/의료시설/요양병원.csv')
    hospital = pd.read_csv('../preprocessed/의료시설/일반병원.csv')
    general = pd.read_csv('../preprocessed/의료시설/종합병원.csv')
    clinic = pd.read_csv('../preprocessed/의료시설/의원.csv')
    oriental = pd.read_csv('../preprocessed/의료시설/한의원.csv')
    dentistry = pd.read_csv('../preprocessed/의료시설/치과.csv')
    pharmacy = pd.read_csv('../preprocessed/의료시설/약국.csv')

    park = pd.read_csv('../preprocessed/편의시설/공원.csv')
    library = pd.read_csv('../preprocessed/편의시설/도서관.csv')
    cafe = pd.read_csv('../preprocessed/편의시설/카페.csv')
    convenience = pd.read_csv('../preprocessed/편의시설/편의점.csv')

    return (elementary, middle, high, kinder, daycare, academy, bus, subway, mart, public, nursing, hospital, general, clinic, oriental, dentistry, pharmacy, park, library, cafe, convenience)



In [18]:
def get_distance(apart):
  # 1개의 아파트와 n개의 입지시설 데이터를 매칭시키고 거리를 구함.
  # 구해진 거리 중 최소의 값만 추출할 것.
  # 의료시설
  pharmacy_dt = apart.apply(lambda row: calculate_distances(row, pharmacy), axis=1)
  pharmacy_minDt = pharmacy_dt.apply(lambda row : row.min(), axis = 1)

  clinic_dt = apart.apply(lambda row: calculate_distances(row, clinic), axis=1)
  clinic_minDt = clinic_dt.apply(lambda row : row.min(), axis = 1)

  hospital_dt = apart.apply(lambda row: calculate_distances(row, hospital), axis=1)
  hospital_minDt = hospital_dt.apply(lambda row : row.min(), axis = 1)

  general_dt = apart.apply(lambda row: calculate_distances(row, general), axis=1)
  general_minDt = general_dt.apply(lambda row : row.min(), axis = 1)
  
  nursing_dt = apart.apply(lambda row: calculate_distances(row, nursing), axis=1)
  nursing_minDt = nursing_dt.apply(lambda row : row.min(), axis = 1)
  
  public_dt = apart.apply(lambda row: calculate_distances(row, public), axis=1)
  public_minDt = public_dt.apply(lambda row : row.min(), axis = 1)

  dentistry_dt = apart.apply(lambda row: calculate_distances(row, dentistry), axis=1)
  dentistry_minDt = dentistry_dt.apply(lambda row : row.min(), axis = 1)

  oriental_dt = apart.apply(lambda row: calculate_distances(row, oriental), axis=1)
  oriental_minDt = oriental_dt.apply(lambda row : row.min(), axis = 1)


  # 상업시설
  mart_dt = apart.apply(lambda row: calculate_distances(row, mart), axis=1)
  mart_minDt = mart_dt.apply(lambda row : row.min(), axis = 1)


  # 편의시설
  park_dt = apart.apply(lambda row: calculate_distances(row, park), axis=1)
  park_minDt = park_dt.apply(lambda row : row.min(), axis = 1)

  library_dt = apart.apply(lambda row: calculate_distances(row, library), axis=1)
  library_minDt = library_dt.apply(lambda row : row.min(), axis = 1)

  cafe_dt = apart.apply(lambda row: calculate_distances(row, cafe), axis=1)
  cafe_minDt = cafe_dt.apply(lambda row : row.min(), axis = 1)

  convenience_dt = apart.apply(lambda row: calculate_distances(row, convenience), axis=1)
  convenience_minDt = convenience_dt.apply(lambda row : row.min(), axis = 1)


  # 교육시설
  kinder_dt = apart.apply(lambda row: calculate_distances(row, kinder), axis=1)
  kinder_minDt = kinder_dt.apply(lambda row : row.min(), axis = 1)

  elementary_dt = apart.apply(lambda row: calculate_distances(row, elementary), axis=1)
  elementary_minDt = elementary_dt.apply(lambda row : row.min(), axis = 1)

  middle_dt = apart.apply(lambda row: calculate_distances(row, middle), axis=1)
  middle_minDt = middle_dt.apply(lambda row : row.min(), axis = 1)

  high_dt = apart.apply(lambda row: calculate_distances(row, high), axis=1)
  high_minDt = high_dt.apply(lambda row : row.min(), axis = 1)

  academy_dt = apart.apply(lambda row: calculate_distances(row, academy), axis=1)
  academy_minDt = academy_dt.apply(lambda row : row.min(), axis = 1)

  daycare_dt = apart.apply(lambda row: calculate_distances(row, daycare), axis=1)
  daycare_minDt = daycare_dt.apply(lambda row : row.min(), axis = 1)


  # 교통시설
  subway_dt = apart.apply(lambda row: calculate_distances(row, subway), axis=1)
  subway_minDt = subway_dt.apply(lambda row : row.min(), axis = 1)

  bus_dt = apart.apply(lambda row: calculate_distances(row, bus), axis=1)
  bus_minDt = bus_dt.apply(lambda row : row.min(), axis = 1)


  # 모든 아파트로부터 입지 간 최소 거리 데이터 하나로 합침
  location = pd.concat(
    [pharmacy_minDt, clinic_minDt, hospital_minDt, general_minDt, nursing_minDt, public_minDt, oriental_minDt, dentistry_minDt, mart_minDt, park_minDt, library_minDt, cafe_minDt, convenience_minDt, daycare_minDt ,kinder_minDt, elementary_minDt, middle_minDt, high_minDt, academy_minDt, subway_minDt, bus_minDt],
    axis=1,
    keys=['약국', '의원', '일반병원', '종합병원', '요양병원', '보건병원', '한방병원', '치과', '대형상권', '공원', '도서관', '카페', '편의점', '유치원', '어린이집', '초등학교', '중학교', '고등학교', '학원', '지하철', '버스정류장']
    )

  return location

In [20]:
apart = pd.read_csv('../preprocessed/아파트거래정보/아파트.csv')
elementary, middle, high, kinder, daycare, academy, bus, subway, mart, public, nursing, hospital, general, clinic, oriental, dentistry, pharmacy, park, library, cafe, convenience = upload_location_data()

location = get_distance(apart)

In [21]:
location

Unnamed: 0,약국,의원,병원,종합병원,요양병원,보건병원,한방병원,치과,대형상권,공원,...,카페,편의점,유치원,어린이집,초등학교,중학교,고등학교,학원,지하철,버스정류장
0,0.130256,0.454854,,0.291043,0.679355,0.707622,0.469056,0.484308,0.628393,0.116912,...,0.084001,0.152504,0.033228,0.332967,0.328262,0.623375,0.215546,0.099229,3.028092,0.083207
1,0.721088,1.258362,,2.711167,3.071503,3.361473,1.198929,1.258362,3.136678,0.213333,...,0.111722,0.273108,0.039809,0.350827,0.360251,1.191155,2.866694,0.105415,5.602925,0.067051
2,0.143320,0.143320,,1.217039,0.842160,0.952475,0.143320,0.205523,0.881730,0.079776,...,0.156677,0.178030,0.216343,0.228673,0.469912,0.206943,0.247931,0.170982,1.597263,0.068985
3,0.423571,0.423571,,0.899408,1.724261,1.632069,0.660496,0.555569,1.600529,0.174151,...,0.269183,0.591760,0.064406,0.218806,0.245773,0.953547,1.190197,0.064406,3.941658,0.183002
4,0.166394,0.166394,,0.517475,0.376609,0.481063,0.166394,0.227715,0.327894,0.110791,...,0.128020,0.116686,0.054395,0.183632,0.164991,0.882960,0.242664,0.122964,2.743277,0.178459
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11816,0.194992,0.160569,,1.072278,0.933046,1.118312,0.142270,0.233355,0.692581,0.185065,...,0.076305,0.098657,0.197714,0.070444,0.141312,0.795269,0.545639,0.104001,0.532769,0.041589
11817,0.186667,0.075061,,3.009552,1.030643,1.316948,0.470192,0.408563,1.865011,0.232229,...,0.172667,0.029006,0.203691,0.212836,0.422670,0.521126,0.529095,0.275826,0.303048,0.096952
11818,0.115017,0.138698,,3.289887,1.308601,1.575671,0.376609,0.190867,2.141763,0.192393,...,0.000000,0.000000,0.238545,0.241582,0.418986,0.632656,0.644900,0.263998,0.378780,0.062472
11819,0.405643,0.407097,,3.184476,1.306136,1.207937,0.409136,0.405643,1.843682,0.124290,...,0.169283,0.099925,0.031811,0.197850,0.194667,0.235065,0.808604,0.031811,0.852373,0.043606


In [41]:
data = pd.concat([apart, location], axis=1)
data

Unnamed: 0,단지명,주소,층,계약년월,전용면적(㎡),건축년도,거래금액(만원),위도,경도,약국,...,카페,편의점,유치원,어린이집,초등학교,중학교,고등학교,학원,지하철,버스정류장
0,신명나리,경기도 평택시 비전동 881,14,202408,59.97,1995,17800,36.993127,127.119439,0.130256,...,0.084001,0.152504,0.033228,0.332967,0.328262,0.623375,0.215546,0.099229,3.028092,0.083207
1,e편한세상평택용이2단지,경기도 평택시 용이동 738,20,202408,84.91,2017,33000,37.003199,127.146569,0.721088,...,0.111722,0.273108,0.039809,0.350827,0.360251,1.191155,2.866694,0.105415,5.602925,0.067051
2,외기노조,경기도 평택시 비전동 산76-17,2,202408,58.86,1985,11500,36.995202,127.102515,0.143320,...,0.156677,0.178030,0.216343,0.228673,0.469912,0.206943,0.247931,0.170982,1.597263,0.068985
3,평택효성해링턴플레이스2단지,경기도 평택시 소사동 697,9,202408,72.65,2019,34700,36.987405,127.129700,0.423571,...,0.269183,0.591760,0.064406,0.218806,0.245773,0.953547,1.190197,0.064406,3.941658,0.183002
4,비전현대,경기도 평택시 비전동 879-5,8,202408,59.72,1994,20000,36.993776,127.116120,0.166394,...,0.128020,0.116686,0.054395,0.183632,0.164991,0.882960,0.242664,0.122964,2.743277,0.178459
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11816,휴먼파크리움,경기도 평택시 서정동 263-1,12,202208,22.28,2018,10000,37.057824,127.058599,0.194992,...,0.076305,0.098657,0.197714,0.070444,0.141312,0.795269,0.545639,0.104001,0.532769,0.041589
11817,이안힐시티타워2차아파트,경기도 평택시 지산동 851-10,11,202208,56.48,2017,20000,37.075324,127.057812,0.186667,...,0.172667,0.029006,0.203691,0.212836,0.422670,0.521126,0.529095,0.275826,0.303048,0.096952
11818,이한렉스빌플러스,경기도 평택시 지산동 781-9,5,202208,27.04,2017,11000,37.077851,127.057330,0.115017,...,0.000000,0.000000,0.238545,0.241582,0.418986,0.632656,0.644900,0.263998,0.378780,0.062472
11819,건영(지산),경기도 평택시 지산동 1094,8,202208,53.58,1993,16500,37.076366,127.063924,0.405643,...,0.169283,0.099925,0.031811,0.197850,0.194667,0.235065,0.808604,0.031811,0.852373,0.043606


In [42]:
data.to_csv('../preprocessed/data.csv', index=False)