In [1]:
import math
import pandas as pd

In [2]:
# haversine 공식 함수
def haversine_distance(lat1, lon1, lat2, lon2):
  # 위도와 경도를 라디안으로 변환
  lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])

  # Haversine 공식 계산
  dlon = lon2 - lon1
  dlat = lat2 - lat1
  a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
  c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
  # 지구 반지름
  radius = 6371
  # 두 지점 간의 거리 계산
  distance = radius * c

  return distance

# 아파트와 병원 간의 거리 계산 후 데이터프레임에 추가하는 함수
def calculate_distances(apartRow, medicalDf):
    distances = []
    for _, medicalRow in medicalDf.iterrows():
        distance = haversine_distance(apartRow['위도'], apartRow['경도'], medicalRow['위도'], medicalRow['경도'])
        distances.append(distance)
    return pd.Series(distances, index=medicalDf['이름'])

In [3]:
def upload_location_data():
    elementary = pd.read_csv('../preprocessed/교육시설/초등학교.csv')
    middle = pd.read_csv('../preprocessed/교육시설/중학교.csv')
    high = pd.read_csv('../preprocessed/교육시설/고등학교.csv')
    kinder = pd.read_csv('../preprocessed/교육시설/유치원.csv')
    daycare = pd.read_csv('../preprocessed/교육시설/어린이집.csv')
    academy = pd.read_csv('../preprocessed/교육시설/학원.csv')

    bus = pd.read_csv('../preprocessed/교통시설/버스정류장.csv')
    subway = pd.read_csv('../preprocessed/교통시설/역.csv')
    
    mart = pd.read_csv('../preprocessed/상업시설/마트.csv')

    public = pd.read_csv('../preprocessed/의료시설/보건병원.csv')
    nursing = pd.read_csv('../preprocessed/의료시설/요양병원.csv')
    hospital = pd.read_csv('../preprocessed/의료시설/일반병원.csv')
    general = pd.read_csv('../preprocessed/의료시설/종합병원.csv')
    clinic = pd.read_csv('../preprocessed/의료시설/의원.csv')
    oriental = pd.read_csv('../preprocessed/의료시설/한의원.csv')
    dentistry = pd.read_csv('../preprocessed/의료시설/치과.csv')
    pharmacy = pd.read_csv('../preprocessed/의료시설/약국.csv')

    park = pd.read_csv('../preprocessed/편의시설/공원.csv')
    library = pd.read_csv('../preprocessed/편의시설/도서관.csv')
    cafe = pd.read_csv('../preprocessed/편의시설/카페.csv')
    convenience = pd.read_csv('../preprocessed/편의시설/편의점.csv')

    return (elementary, middle, high, kinder, daycare, academy, bus, subway, mart, public, nursing, hospital, general, clinic, oriental, dentistry, pharmacy, park, library, cafe, convenience)



In [4]:
def get_distance(apart):
  # 1개의 아파트와 n개의 입지시설 데이터를 매칭시키고 거리를 구함.
  # 구해진 거리 중 최소의 값만 추출할 것.
  # 의료시설
  pharmacy_dt = apart.apply(lambda row: calculate_distances(row, pharmacy), axis=1)
  pharmacy_minDt = pharmacy_dt.apply(lambda row : row.min(), axis = 1)

  clinic_dt = apart.apply(lambda row: calculate_distances(row, clinic), axis=1)
  clinic_minDt = clinic_dt.apply(lambda row : row.min(), axis = 1)

  hospital_dt = apart.apply(lambda row: calculate_distances(row, hospital), axis=1)
  hospital_minDt = hospital_dt.apply(lambda row : row.min(), axis = 1)

  general_dt = apart.apply(lambda row: calculate_distances(row, general), axis=1)
  general_minDt = general_dt.apply(lambda row : row.min(), axis = 1)
  
  nursing_dt = apart.apply(lambda row: calculate_distances(row, nursing), axis=1)
  nursing_minDt = nursing_dt.apply(lambda row : row.min(), axis = 1)
  
  public_dt = apart.apply(lambda row: calculate_distances(row, public), axis=1)
  public_minDt = public_dt.apply(lambda row : row.min(), axis = 1)

  dentistry_dt = apart.apply(lambda row: calculate_distances(row, dentistry), axis=1)
  dentistry_minDt = dentistry_dt.apply(lambda row : row.min(), axis = 1)

  oriental_dt = apart.apply(lambda row: calculate_distances(row, oriental), axis=1)
  oriental_minDt = oriental_dt.apply(lambda row : row.min(), axis = 1)


  # 상업시설
  mart_dt = apart.apply(lambda row: calculate_distances(row, mart), axis=1)
  mart_minDt = mart_dt.apply(lambda row : row.min(), axis = 1)


  # 편의시설
  park_dt = apart.apply(lambda row: calculate_distances(row, park), axis=1)
  park_minDt = park_dt.apply(lambda row : row.min(), axis = 1)

  library_dt = apart.apply(lambda row: calculate_distances(row, library), axis=1)
  library_minDt = library_dt.apply(lambda row : row.min(), axis = 1)

  cafe_dt = apart.apply(lambda row: calculate_distances(row, cafe), axis=1)
  cafe_minDt = cafe_dt.apply(lambda row : row.min(), axis = 1)

  convenience_dt = apart.apply(lambda row: calculate_distances(row, convenience), axis=1)
  convenience_minDt = convenience_dt.apply(lambda row : row.min(), axis = 1)


  # 교육시설
  kinder_dt = apart.apply(lambda row: calculate_distances(row, kinder), axis=1)
  kinder_minDt = kinder_dt.apply(lambda row : row.min(), axis = 1)

  elementary_dt = apart.apply(lambda row: calculate_distances(row, elementary), axis=1)
  elementary_minDt = elementary_dt.apply(lambda row : row.min(), axis = 1)

  middle_dt = apart.apply(lambda row: calculate_distances(row, middle), axis=1)
  middle_minDt = middle_dt.apply(lambda row : row.min(), axis = 1)

  high_dt = apart.apply(lambda row: calculate_distances(row, high), axis=1)
  high_minDt = high_dt.apply(lambda row : row.min(), axis = 1)

  academy_dt = apart.apply(lambda row: calculate_distances(row, academy), axis=1)
  academy_minDt = academy_dt.apply(lambda row : row.min(), axis = 1)

  daycare_dt = apart.apply(lambda row: calculate_distances(row, daycare), axis=1)
  daycare_minDt = daycare_dt.apply(lambda row : row.min(), axis = 1)


  # 교통시설
  subway_dt = apart.apply(lambda row: calculate_distances(row, subway), axis=1)
  subway_minDt = subway_dt.apply(lambda row : row.min(), axis = 1)

  bus_dt = apart.apply(lambda row: calculate_distances(row, bus), axis=1)
  bus_minDt = bus_dt.apply(lambda row : row.min(), axis = 1)


  # 모든 아파트로부터 입지 간 최소 거리 데이터 하나로 합침
  location = pd.concat(
    [pharmacy_minDt, clinic_minDt, hospital_minDt, general_minDt, nursing_minDt, public_minDt, oriental_minDt, dentistry_minDt, mart_minDt, park_minDt, library_minDt, cafe_minDt, convenience_minDt, daycare_minDt ,kinder_minDt, elementary_minDt, middle_minDt, high_minDt, academy_minDt, subway_minDt, bus_minDt],
    axis=1,
    keys=['약국', '의원', '일반병원', '종합병원', '요양병원', '보건병원', '한방병원', '치과', '대형상권', '공원', '도서관', '카페', '편의점', '유치원', '어린이집', '초등학교', '중학교', '고등학교', '학원', '지하철', '버스정류장']
    )

  return location

In [5]:
apart = pd.read_csv('../preprocessed/아파트거래정보/아파트.csv')
elementary, middle, high, kinder, daycare, academy, bus, subway, mart, public, nursing, hospital, general, clinic, oriental, dentistry, pharmacy, park, library, cafe, convenience = upload_location_data()

location = get_distance(apart)

In [6]:
location

Unnamed: 0,약국,의원,일반병원,종합병원,요양병원,보건병원,한방병원,치과,대형상권,공원,...,카페,편의점,유치원,어린이집,초등학교,중학교,고등학교,학원,지하철,버스정류장
0,0.130256,0.454854,0.542032,0.291043,0.679355,0.707622,0.469056,0.484308,0.628393,0.116912,...,0.084001,0.152504,0.033228,0.332967,0.328262,0.623375,0.215546,0.099229,3.028092,0.083207
1,0.721088,1.258362,2.835217,2.711167,3.071503,3.361473,1.198929,1.258362,3.136678,0.213333,...,0.111722,0.273108,0.039809,0.350827,0.360251,1.191155,2.866694,0.105415,5.602925,0.067051
2,0.143320,0.143320,0.629816,1.217039,0.842160,0.952475,0.143320,0.205523,0.881730,0.079776,...,0.156677,0.178030,0.216343,0.228673,0.469912,0.206943,0.247931,0.170982,1.597263,0.068985
3,0.423571,0.423571,1.388439,0.899408,1.724261,1.632069,0.660496,0.555569,1.600529,0.174151,...,0.269183,0.591760,0.064406,0.218806,0.245773,0.953547,1.190197,0.064406,3.941658,0.183002
4,0.166394,0.166394,0.415085,0.517475,0.376609,0.481063,0.166394,0.227715,0.327894,0.110791,...,0.128020,0.116686,0.054395,0.183632,0.164991,0.882960,0.242664,0.122964,2.743277,0.178459
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18718,0.306613,0.306613,2.529721,3.381084,2.664595,3.728887,0.306613,0.273670,2.771110,0.162814,...,0.134132,0.000000,0.072416,0.185278,0.160754,0.553682,1.961931,0.253618,3.298519,0.132168
18719,0.695487,0.695487,3.476062,3.616592,2.437567,4.646605,0.695487,0.706854,2.641652,0.140752,...,0.276984,0.134697,0.021981,0.398723,0.841367,1.428941,2.408930,0.135541,3.199122,0.157108
18720,0.492188,0.633400,1.246105,4.129745,1.401621,2.597206,0.659045,0.642252,3.133757,0.152098,...,0.079623,0.000000,0.242387,0.972496,1.071163,1.250350,1.263084,0.801470,1.138497,0.052010
18721,0.229807,0.229807,0.766202,0.496637,2.005204,2.056608,0.320461,0.229807,0.647811,0.123452,...,0.207378,0.000000,0.156655,0.308612,0.331496,1.125586,1.116376,0.078086,0.376599,0.196818


In [7]:
data = pd.concat([apart, location], axis=1)
data

Unnamed: 0,단지명,주소,층,계약년월,전용면적(㎡),건축년도,거래금액(만원),위도,경도,약국,...,카페,편의점,유치원,어린이집,초등학교,중학교,고등학교,학원,지하철,버스정류장
0,신명나리,경기도 평택시 비전동 881,14,202408,59.9700,1995,17800,36.993127,127.119439,0.130256,...,0.084001,0.152504,0.033228,0.332967,0.328262,0.623375,0.215546,0.099229,3.028092,0.083207
1,e편한세상평택용이2단지,경기도 평택시 용이동 738,20,202408,84.9100,2017,33000,37.003199,127.146569,0.721088,...,0.111722,0.273108,0.039809,0.350827,0.360251,1.191155,2.866694,0.105415,5.602925,0.067051
2,외기노조,경기도 평택시 비전동 산76-17,2,202408,58.8600,1985,11500,36.995202,127.102515,0.143320,...,0.156677,0.178030,0.216343,0.228673,0.469912,0.206943,0.247931,0.170982,1.597263,0.068985
3,평택효성해링턴플레이스2단지,경기도 평택시 소사동 697,9,202408,72.6500,2019,34700,36.987405,127.129700,0.423571,...,0.269183,0.591760,0.064406,0.218806,0.245773,0.953547,1.190197,0.064406,3.941658,0.183002
4,비전현대,경기도 평택시 비전동 879-5,8,202408,59.7200,1994,20000,36.993776,127.116120,0.166394,...,0.128020,0.116686,0.054395,0.183632,0.164991,0.882960,0.242664,0.122964,2.743277,0.178459
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18718,평택지제역동문굿모닝힐맘시티4단지,경기도 평택시 칠원동 595,6,202108,73.4148,2019,48000,37.024345,127.106774,0.306613,...,0.134132,0.000000,0.072416,0.185278,0.160754,0.553682,1.961931,0.253618,3.298519,0.132168
18719,동광,경기도 평택시 칠원동 499,4,202108,84.9290,2003,19800,37.032128,127.102220,0.695487,...,0.276984,0.134697,0.021981,0.398723,0.841367,1.428941,2.408930,0.135541,3.199122,0.157108
18720,미도,경기도 평택시 신장동 363-8,3,202108,46.5300,1986,6800,37.084974,127.049929,0.492188,...,0.079623,0.000000,0.242387,0.972496,1.071163,1.250350,1.263084,0.801470,1.138497,0.052010
18721,롯데인벤스스카이,경기도 평택시 평택동 316,17,202108,141.2890,2008,52500,36.988598,127.088953,0.229807,...,0.207378,0.000000,0.156655,0.308612,0.331496,1.125586,1.116376,0.078086,0.376599,0.196818


In [8]:
data.to_csv('../preprocessed/data.csv', index=False)