In [1]:
import numpy as np
import pandas as pd
from math import radians, sin, cos, sqrt, atan2

path = './data/'

# Load real estate and subway station data
real_estate_data = pd.read_csv(path+'data_APT.csv', nrows = 10000)  # Replace with your real estate data file
subway_station_data = pd.read_csv(path+'Subway_Stations.csv',encoding = 'utf-8')# Replace with your subway station data file

# Function to calculate Haversine distance
def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6371000  # Radius of Earth in meters
    phi1 = radians(lat1)
    phi2 = radians(lat2)

    delta_phi = radians(lat2 - lat1)
    delta_lambda = radians(lon2 - lon1)

    a = sin(delta_phi / 2) * sin(delta_phi / 2) + cos(phi1) * cos(phi2) * sin(delta_lambda / 2) * sin(delta_lambda / 2)
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c  # Distance in meters
    return distance

# Calculate shortest distance for each real estate to each subway station
shortest_distances = []

for _, real_estate_row in real_estate_data.iterrows():
    real_estate_lat = real_estate_row['위도']
    real_estate_lon = real_estate_row['경도']

    distances_to_subway_stations = []
    for _, subway_station_row in subway_station_data.iterrows():
        subway_station_lat = subway_station_row['위도']
        subway_station_lon = subway_station_row['경도']

        distance = haversine_distance(real_estate_lat, real_estate_lon, subway_station_lat, subway_station_lon)
        distances_to_subway_stations.append(distance)

    shortest_distance = min(distances_to_subway_stations)
    shortest_distances.append(shortest_distance)

# Add the shortest distances to subway stations to the real estate data
real_estate_data['Shortest_Distance_to_Subway'] = shortest_distances

# Save the merged data to a new CSV file
real_estate_data.to_csv('data_APT.csv', index=False)

data_APT = real_estate_data

data_APT

Unnamed: 0,Building_Age,JS_Price,JS_BA,Population,UR,LC_index,CA_index,TC_index,SDT_index,HSP_index,Sell_Price,Crime_Rates,위도,경도,IR,Region_Name,Building_Use,YearMonth,Shortest_Distance_to_Subway
0,15,18000,59.97,299533,3.8,72.0,78.2,74.2,114.099327,81.2,31400.00,0.930480,37.550252,127.029502,2.75,성동구,아파트,2011-01-01,459.389192
1,32,30000,84.43,557563,3.8,72.0,78.2,74.2,113.009872,81.2,100404.17,0.842083,37.497418,127.065327,2.75,강남구,아파트,2011-01-01,540.778887
2,14,22000,59.82,368260,3.8,72.0,78.2,74.2,113.009872,81.2,56836.67,0.836749,37.537397,127.097621,2.75,광진구,아파트,2011-01-01,351.857009
3,25,18000,47.94,494724,3.8,72.0,78.2,74.2,114.099106,81.2,82850.00,0.943467,37.534642,126.884816,2.75,양천구,아파트,2011-01-01,1304.834707
4,12,23500,149.97,420803,3.8,72.0,78.2,74.2,114.270256,81.2,34030.00,0.854310,37.495264,126.862502,2.75,구로구,아파트,2011-01-01,1787.847611
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,21,9000,41.30,338041,3.7,73.9,80.5,77.9,113.840279,77.1,17987.50,0.861823,37.624709,127.047114,3.25,강북구,아파트,2012-03-01,935.781672
9996,20,15000,49.50,561431,3.7,73.9,80.5,77.9,113.790062,77.1,24366.67,1.462903,37.563229,126.859294,3.25,강서구,아파트,2012-03-01,2037.958053
9997,3,19000,84.98,422946,3.7,73.9,80.5,77.9,113.809578,77.1,32000.00,1.463401,37.486951,126.829451,3.25,구로구,아파트,2012-03-01,1139.915870
9998,0,38000,87.05,669058,3.7,73.9,80.5,77.9,114.266663,77.1,57850.00,1.149124,37.502677,127.120006,3.25,송파구,아파트,2012-03-01,790.022872


In [2]:
import numpy as np
import pandas as pd
from math import radians, sin, cos, sqrt, atan2

# Load real estate and university data
university_data = pd.read_csv(path+'University.csv', encoding = 'euc-kr')  # Replace with your university data file

# Function to calculate Haversine distance
def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6371000  # Radius of Earth in meters
    phi1 = radians(lat1)
    phi2 = radians(lat2)

    delta_phi = radians(lat2 - lat1)
    delta_lambda = radians(lon2 - lon1)

    a = sin(delta_phi / 2) * sin(delta_phi / 2) + cos(phi1) * cos(phi2) * sin(delta_lambda / 2) * sin(delta_lambda / 2)
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c  # Distance in meters
    return distance

# Calculate shortest distance for each real estate to each university
shortest_distances = []

for _, real_estate_row in real_estate_data.iterrows():
    real_estate_lat = real_estate_row['위도']
    real_estate_lon = real_estate_row['경도']

    distances_to_universities = []
    for _, university_row in university_data.iterrows():
        university_lat = university_row['위도']
        university_lon = university_row['경도']

        distance = haversine_distance(real_estate_lat, real_estate_lon, university_lat, university_lon)
        distances_to_universities.append(distance)

    shortest_distance = min(distances_to_universities)
    shortest_distances.append(shortest_distance)

# Add the shortest distances to universities to the real estate data
real_estate_data['Shortest_Distance_to_University'] = shortest_distances



# Save the merged data to a new CSV file
real_estate_data.to_csv('data_APT.csv', index=False)

data_APT = real_estate_data

data_APT

Unnamed: 0,Building_Age,JS_Price,JS_BA,Population,UR,LC_index,CA_index,TC_index,SDT_index,HSP_index,Sell_Price,Crime_Rates,위도,경도,IR,Region_Name,Building_Use,YearMonth,Shortest_Distance_to_Subway,Shortest_Distance_to_University
0,15,18000,59.97,299533,3.8,72.0,78.2,74.2,114.099327,81.2,31400.00,0.930480,37.550252,127.029502,2.75,성동구,아파트,2011-01-01,459.389192,1783.606149
1,32,30000,84.43,557563,3.8,72.0,78.2,74.2,113.009872,81.2,100404.17,0.842083,37.497418,127.065327,2.75,강남구,아파트,2011-01-01,540.778887,3723.160551
2,14,22000,59.82,368260,3.8,72.0,78.2,74.2,113.009872,81.2,56836.67,0.836749,37.537397,127.097621,2.75,광진구,아파트,2011-01-01,351.857009,1517.638732
3,25,18000,47.94,494724,3.8,72.0,78.2,74.2,114.099106,81.2,82850.00,0.943467,37.534642,126.884816,2.75,양천구,아파트,2011-01-01,1304.834707,3053.953581
4,12,23500,149.97,420803,3.8,72.0,78.2,74.2,114.270256,81.2,34030.00,0.854310,37.495264,126.862502,2.75,구로구,아파트,2011-01-01,1787.847611,718.700972
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,21,9000,41.30,338041,3.7,73.9,80.5,77.9,113.840279,77.1,17987.50,0.861823,37.624709,127.047114,3.25,강북구,아파트,2012-03-01,935.781672,995.939821
9996,20,15000,49.50,561431,3.7,73.9,80.5,77.9,113.790062,77.1,24366.67,1.462903,37.563229,126.859294,3.25,강서구,아파트,2012-03-01,2037.958053,1701.812476
9997,3,19000,84.98,422946,3.7,73.9,80.5,77.9,113.809578,77.1,32000.00,1.463401,37.486951,126.829451,3.25,구로구,아파트,2012-03-01,1139.915870,388.777674
9998,0,38000,87.05,669058,3.7,73.9,80.5,77.9,114.266663,77.1,57850.00,1.149124,37.502677,127.120006,3.25,송파구,아파트,2012-03-01,790.022872,2118.895096


In [3]:
import numpy as np
import pandas as pd
from math import radians, sin, cos, sqrt, atan2

# Load real estate and school data
school_data = pd.read_csv(path + 'School.csv', encoding = 'utf-8')  # Replace with your school data file

# Function to calculate Haversine distance
def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6371000  # Radius of Earth in meters
    phi1 = radians(lat1)
    phi2 = radians(lat2)

    delta_phi = radians(lat2 - lat1)
    delta_lambda = radians(lon2 - lon1)

    a = sin(delta_phi / 2) * sin(delta_phi / 2) + cos(phi1) * cos(phi2) * sin(delta_lambda / 2) * sin(delta_lambda / 2)
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c  # Distance in meters
    return distance

# Calculate shortest distance for each real estate to each school
shortest_distances = []

for _, real_estate_row in real_estate_data.iterrows():
    real_estate_lat = real_estate_row['위도']
    real_estate_lon = real_estate_row['경도']

    distances_to_schools = []
    for _, school_row in school_data.iterrows():
        school_lat = school_row['위도']
        school_lon = school_row['경도']

        distance = haversine_distance(real_estate_lat, real_estate_lon, school_lat, school_lon)
        distances_to_schools.append(distance)

    shortest_distance = min(distances_to_schools)
    shortest_distances.append(shortest_distance)

# Add the shortest distances to schools to the real estate data
real_estate_data['Shortest_Distance_to_School'] = shortest_distances

#

# Save the merged data to a new CSV file
real_estate_data.to_csv('data_APT.csv', index=False)

data_APT = real_estate_data

data_APT



Unnamed: 0,Building_Age,JS_Price,JS_BA,Population,UR,LC_index,CA_index,TC_index,SDT_index,HSP_index,...,Crime_Rates,위도,경도,IR,Region_Name,Building_Use,YearMonth,Shortest_Distance_to_Subway,Shortest_Distance_to_University,Shortest_Distance_to_School
0,15,18000,59.97,299533,3.8,72.0,78.2,74.2,114.099327,81.2,...,0.930480,37.550252,127.029502,2.75,성동구,아파트,2011-01-01,459.389192,1783.606149,370.003165
1,32,30000,84.43,557563,3.8,72.0,78.2,74.2,113.009872,81.2,...,0.842083,37.497418,127.065327,2.75,강남구,아파트,2011-01-01,540.778887,3723.160551,336.123079
2,14,22000,59.82,368260,3.8,72.0,78.2,74.2,113.009872,81.2,...,0.836749,37.537397,127.097621,2.75,광진구,아파트,2011-01-01,351.857009,1517.638732,470.686016
3,25,18000,47.94,494724,3.8,72.0,78.2,74.2,114.099106,81.2,...,0.943467,37.534642,126.884816,2.75,양천구,아파트,2011-01-01,1304.834707,3053.953581,180.644334
4,12,23500,149.97,420803,3.8,72.0,78.2,74.2,114.270256,81.2,...,0.854310,37.495264,126.862502,2.75,구로구,아파트,2011-01-01,1787.847611,718.700972,318.836190
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,21,9000,41.30,338041,3.7,73.9,80.5,77.9,113.840279,77.1,...,0.861823,37.624709,127.047114,3.25,강북구,아파트,2012-03-01,935.781672,995.939821,161.770027
9996,20,15000,49.50,561431,3.7,73.9,80.5,77.9,113.790062,77.1,...,1.462903,37.563229,126.859294,3.25,강서구,아파트,2012-03-01,2037.958053,1701.812476,171.601187
9997,3,19000,84.98,422946,3.7,73.9,80.5,77.9,113.809578,77.1,...,1.463401,37.486951,126.829451,3.25,구로구,아파트,2012-03-01,1139.915870,388.777674,415.705134
9998,0,38000,87.05,669058,3.7,73.9,80.5,77.9,114.266663,77.1,...,1.149124,37.502677,127.120006,3.25,송파구,아파트,2012-03-01,790.022872,2118.895096,153.496101
