# 1. Load data and Rectify

In [1]:
import pandas as pd
from tqdm import tqdm

# Load friendly facilities
df = pd.read_csv("datas/base_data_final.csv", encoding="utf-8")

# Load house data
df2 = pd.read_csv("datas/seoul_pri_2020_ll.csv", encoding="utf-8")


In [2]:
# data rectify: delete null data

filt = df['longitude'].isnull()
df.drop(index=df[filt].index, inplace=True)

filt2 = df['latitude'].isnull()
df.drop(index=df[filt2].index, inplace=True)


In [3]:
# data rectify: convert longitde from str to float
df['longitude'] = df['longitude'].apply(lambda x: float(x))


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 17641 entries, 0 to 17828
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   명칭            17637 non-null  object 
 1   latitude      17641 non-null  float64
 2   longitude     17641 non-null  float64
 3   Station_line  738 non-null    object 
 4   구분            17641 non-null  object 
 5   Area(m^2)     411 non-null    float64
dtypes: float64(3), object(3)
memory usage: 964.7+ KB


In [5]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163821 entries, 0 to 163820
Data columns (total 7 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   주소         163821 non-null  object 
 1   건물주용도      163821 non-null  object 
 2   건물명        163821 non-null  object 
 3   건물면적       163821 non-null  float64
 4   물건금액       163821 non-null  int64  
 5   latitude   163821 non-null  float64
 6   longitude  163821 non-null  float64
dtypes: float64(3), int64(1), object(3)
memory usage: 8.7+ MB


## 1.1 Prepare empty df

In [6]:
df_complete = pd.DataFrame(columns=['공원최단거리', '도서관최단거리',
                                    '초등학교최단거리', '중학교최단거리', '고등학교최단거리',
                                    '지진옥외대피소최단거리', '응급실최단거리',
                                    '0.5km지하철', '1.0km지하철', '1.5km지하철',
                                    '1.0km병원', '2.5km병원',
                                    #'0.1km편의점', '0.3km편의점', '0.5km편의점',
                                    '0.5km버스정류장', '1.0km버스정류장', '1.5km버스정류장',
                                     '건물면적', '건물금액', '건물주용도'])

# 2. Define functions

In [7]:
from scipy.spatial import distance

In [8]:
# trial calculating
distance.euclidean((df2['longitude'][0], df2['latitude'][0]),(df['longitude'][1], df['latitude'][1]))

0.1279450611492633

In [9]:
# distance's output is not km.
def make_km(x):
    # 2.3f
    return x // 0.00001 /1000

## 2.1 최단거리 구하기

In [None]:
# Input: house 는 데이터 하나, 우호시설은 데이터 column
def least_distance_friendly(house_lon, house_lat, friendly_lons, friendly_lats):
    dist = []
    for i in range(len(friendly_lons)):
        try: 
            dist.append(
                make_km(distance.euclidean((house_lon, house_lat), (friendly_lons[i], friendly_lats[i]))
                ))
        except: 
            dist.append(-999)
    try:
        ret = min(dist)
    except:
        ret = -999
    return ret

## 2.2 범위 내 개수 구하기

In [None]:
# count num of friendly facilities in range
# Input: distance list, range list [km]

def count_in_distance(dist_list, range_km):
    count = 0
    for i in dist_list:
        if i < range_km:
            count +=1
    return count

In [None]:
# Input: house 는 데이터 하나, 우호시설은 데이터 column
def num_by_distance(house_lon, house_lat, friendly_lons, friendly_lats, range_list):
    dist = []
    for i in range(len(friendly_lons)):
        dist.append(
            make_km(distance.euclidean((house_lon, house_lat), (friendly_lons[i], friendly_lats[i]))))
            
    ret = []
    for ran in range_list:
        ret.append(count_in_distance(dist, ran))
        
    return ret # list

In [13]:
# selection = ['지하철', '병원', '버스정류장'
#             #, '편의점'
#             ]
# selection_range = [[0.5, 1, 1.5],
#                    [0.5, 1],
#                    [0.5, 1, 1.5]
#                    #,[0.1, 0.3, 0.5]
#                   ]

# for i in tqdm(range(len(selection)), desc='Progress..', mininterval=0.1):
#     filt = df['구분'] == selection[i]

#     for j in range(len(df['longitude'])):
#         ret = num_by_distance(df['longitude'][j], df['latitude'][j], df[filt]['longitude'].values, df[filt]['latitude'].values, selection_range[i])

In [37]:
selection = ['공원', '도서관', '초등학교', '중학교', '고등학교', '지진옥외대피소', '응급실']
selection2 = ['지하철', '병원', '버스정류장'
            #, '편의점'
             ]
selection_range = [[0.5, 1, 1.5],
                   [1, 2.5],
                   [0.5, 1, 1.5]
                   #,[0.1, 0.3, 0.5]
                  ]

################################################################################################

for j in tqdm(range(len(df2['longitude'])), desc="Progress", mininterval=0.1):     
    ret = []
    
    # append min dist facilities
    for i in range(len(selection)):
        filt = df['구분'] == selection[i]
        ret.append(least_distance_friendly(df2['longitude'][j], df2['latitude'][j], df[filt]['longitude'].values, df[filt]['latitude'].values))  
    
    # append range facilities
    for k in range(len(selection2)):
        filt = df['구분'] == selection2[k]
        temp = num_by_distance(df2['longitude'][j], df2['latitude'][j], df[filt]['longitude'].values, df[filt]['latitude'].values, selection_range[k])
        for tmp in temp:
            ret.append(tmp)
    
    # add them in dataframe
    df_complete = df_complete.append(pd.Series(ret, index=df_complete.columns[:-4]), ignore_index=True)

    if (j%10000==0):
        df_complete.to_csv("min_distance_columns.csv", encoding="utf-8")
        
# append house_price and etc..
df_complete['건물면적', '건물금액', '건물주용도'] = df2['건물면적', '물건금액', '건물주용도']


df_complete.to_csv("min_distance_columns.csv", encoding="utf-8")

Progress:   0%|          | 25/163821 [00:05<10:16:04,  4.43it/s]


KeyboardInterrupt: 

In [13]:
df_complete[['건물면적', '건물주용도']] = df2[['건물면적', '건물주용도']]

In [14]:
df_complete[['건물면적', '건물주용도']]

Unnamed: 0,건물면적,건물주용도
0,59.92,아파트
1,84.77,아파트
2,36.90,아파트
3,59.55,아파트
4,64.43,아파트
...,...,...
163816,84.98,아파트
163817,57.02,연립주택
163818,56.48,연립주택
163819,98.88,아파트
