In [1]:
# Basic Libraries
import numpy as np
import pandas as pd
import folium
import requests
import seaborn as sns
import matplotlib.pyplot as plt
from geopy.geocoders import Nominatim
from geopy.distance import great_circle
from math import radians, sin, cos, sqrt, atan2
from tqdm import tqdm
import plotly.figure_factory as ff
import plotly.express as px
from scipy import stats

# Machine Learning Libraries
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.feature_selection import SelectKBest, mutual_info_classif, f_regression
from sklearn.inspection import permutation_importance
from sklearn.feature_selection import SelectFromModel

# Warnings
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Load Apartment Excel Files into Pandas DataFrame
df = pd.read_excel('real_estate_data/cleaned_real_estate_data.xlsx')

# Load geographical data related to various locations
df2 = pd.read_excel('geospatial_data/geo_airport.xlsx')
df3 = pd.read_excel('geospatial_data/geo_chapels.xlsx')
df4 = pd.read_excel('geospatial_data/geo_city_center.xlsx')
df5 = pd.read_excel('geospatial_data/geo_custody_penitentiary.xlsx')
df6 = pd.read_excel('geospatial_data/geo_emergency_room.xlsx')
df7 = pd.read_excel('geospatial_data/geo_grocery_store.xlsx')
df8 = pd.read_excel('geospatial_data/geo_gym.xlsx')
df9 = pd.read_excel('geospatial_data/geo_high_school.xlsx')
df10 = pd.read_excel('geospatial_data/geo_hospital.xlsx')
df11 = pd.read_excel('geospatial_data/geo_park.xlsx')
df12 = pd.read_excel('geospatial_data/geo_police_station.xlsx')
df13 = pd.read_excel('geospatial_data/geo_primary_care_units.xlsx')
df14 = pd.read_excel('geospatial_data/geo_primary_school.xlsx')
df15 = pd.read_excel('geospatial_data/geo_river.xlsx')
df16 = pd.read_excel('geospatial_data/geo_roman_catholic_church.xlsx')
df17 = pd.read_excel('geospatial_data/geo_rossmann.xlsx')
df18 = pd.read_excel('geospatial_data/geo_shopping_center.xlsx')
df19 = pd.read_excel('geospatial_data/geo_technical_college.xlsx')
df20 = pd.read_excel('geospatial_data/geo_vocational_school.xlsx')
df21 = pd.read_excel('geospatial_data/geo_zabka.xlsx')
df22 = pd.read_excel('geospatial_data/geo_tax_offices.xlsx')
df23 = pd.read_excel('geospatial_data/geo_tax_administration_chambers.xlsx')
df24 = pd.read_excel('geospatial_data/geo_street_workouts.xlsx')
df25 = pd.read_excel('geospatial_data/geo_rope_parks.xlsx')
df26 = pd.read_excel('geospatial_data/geo_orlik_soccer_fields.xlsx')
df27 = pd.read_excel('geospatial_data/geo_natural_swimming_pools.xlsx')
df28 = pd.read_excel('geospatial_data/geo_museums.xlsx')
df29 = pd.read_excel('geospatial_data/geo_municipal_police_departments.xlsx')
df30 = pd.read_excel('geospatial_data/geo_indoor_swimming_pool.xlsx')
df31 = pd.read_excel('geospatial_data/geo_horseback_riding_center.xlsx')
df32 = pd.read_excel('geospatial_data/geo_football_stadiums.xlsx')
df33 = pd.read_excel('geospatial_data/geo_district_court.xlsx')
df34 = pd.read_excel('geospatial_data/geo_cinemas.xlsx')

In [3]:
# Define function to calculate the great circle distance between a reference point and the latitude and longitude of each row in the dataframe
def get_distance(reference_point, name):
    df[name] = df.apply(lambda row: great_circle((row['Latitude'], row['Longitude']), reference_point).m, axis=1)

In [4]:
def haversine(lat1, lon1, lat2, lon2):
    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

In [5]:
def calculate_number_of_places(df, df2, distancekm, name):

    # Initialize the progress bar
    pbar = tqdm(total=len(df))

    # Iterate through each row in df
    for index, row in df.iterrows():
        lat1 = row['Latitude']
        lon1 = row['Longitude']
        count = 0

        # Iterate through each row in df2
        for index2, row2 in df2.iterrows():
            lat2 = row2['Latitude']
            lon2 = row2['Longitude']

            # Calculate the distance between the two points using the Haversine formula
            distance = haversine(lat1, lon1, lat2, lon2)

            # If the distance is less than 5 km, increase the count
            if distance < distancekm:
                count += 1
        
        # Update the progress bar
        pbar.update(1)

        # Add the count to the new column in df
        df.at[index, name] = count

    pbar.close()

In [6]:
def calculate_min_straightline_distance(df, df2, name):

    # Initialize the progress bar
    pbar = tqdm(total=len(df))

    for index, row in df.iterrows():
        min_distance = float('inf')
        for index2, row2 in df2.iterrows():
            distance = haversine(row['Latitude'], row['Longitude'], row2['Latitude'], row2['Longitude'])
            if distance < min_distance:
                min_distance = distance
        df.loc[index, name] = min_distance

        # Update the progress bar
        pbar.update(1)
        
    pbar.close()

In [7]:
# Create new columns
dfx = df

calculate_number_of_places(df, dfx, 0.5, "number_of_apartments_for_sale_in_500m")
calculate_number_of_places(df, dfx, 1, "number_of_apartments_for_sale_in_1km")
calculate_number_of_places(df, dfx, 2, "number_of_apartments_for_sale_in_2km")
calculate_number_of_places(df, dfx, 3, "number_of_apartments_for_sale_in_3km")
calculate_number_of_places(df, dfx, 4, "number_of_apartments_for_sale_in_4km")
calculate_number_of_places(df, dfx, 5, "number_of_apartments_for_sale_in_5km")

100%|██████████| 8347/8347 [1:02:53<00:00,  2.21it/s]
100%|██████████| 8347/8347 [1:10:11<00:00,  1.98it/s]
100%|██████████| 8347/8347 [1:06:56<00:00,  2.08it/s]
100%|██████████| 8347/8347 [1:45:15<00:00,  1.32it/s]  
100%|██████████| 8347/8347 [1:37:47<00:00,  1.42it/s]  
100%|██████████| 8347/8347 [1:50:48<00:00,  1.26it/s]  


In [8]:
# Create new columns
calculate_number_of_places(df, df3, 0.5, "number_of_chapel_in_500m")
calculate_number_of_places(df, df5, 0.5, "number_of_custody_or_penitentiary_in_500m")
calculate_number_of_places(df, df6, 0.5, "number_of_emergency_room_in_500m")
calculate_number_of_places(df, df7, 0.5, "number_of_grocery_store_in_500m")
calculate_number_of_places(df, df8, 0.5, "number_of_gym_in_500m")
calculate_number_of_places(df, df9, 0.5, "number_of_high_school_in_500m")
calculate_number_of_places(df, df10, 0.5, "number_of_hospital_in_500m")
calculate_number_of_places(df, df11, 0.5, "number_of_park_in_500m")
calculate_number_of_places(df, df12, 0.5, "number_of_police_station_in_500m")
calculate_number_of_places(df, df13, 0.5, "number_of_primary_care_unit_in_500m")
calculate_number_of_places(df, df14, 0.5, "number_of_primary_school_in_500m")
calculate_number_of_places(df, df16, 0.5, "number_of_roman_catholic_church_in_500m")
calculate_number_of_places(df, df17, 0.5, "number_of_rossmannvin_500m")
calculate_number_of_places(df, df18, 0.5, "number_of_shopping_center_in_500m")
calculate_number_of_places(df, df19, 0.5, "number_of_technical_college_in_500m")
calculate_number_of_places(df, df20, 0.5, "number_of_vocational_school_in_500m")
calculate_number_of_places(df, df21, 0.5, "number_of_zabka_in_500m")
calculate_number_of_places(df, df22, 0.5, "number_of_tax_office_in_500m")
calculate_number_of_places(df, df23, 0.5, "number_of_tax_administration_chambers_in_500m")
calculate_number_of_places(df, df24, 0.5, "number_of_street_workouts_in_500m")
calculate_number_of_places(df, df25, 0.5, "number_of_rope_parks_in_500m")
calculate_number_of_places(df, df26, 0.5, "number_of_orlik_soccer_fields_in_500m")
calculate_number_of_places(df, df27, 0.5, "number_of_natural_swimming_pools_in_500m")
calculate_number_of_places(df, df28, 0.5, "number_of_museums_in_500m")
calculate_number_of_places(df, df29, 0.5, "number_of_municipal_police_departments_in_500m")
calculate_number_of_places(df, df30, 0.5, "number_of_indoor_swimming_pool_in_500m")
calculate_number_of_places(df, df31, 0.5, "number_of_horseback_riding_centers_in_500m")
calculate_number_of_places(df, df32, 0.5, "number_of_football_stadiums_in_500m")
calculate_number_of_places(df, df33, 0.5, "number_of_district_court_in_500m")
calculate_number_of_places(df, df34, 0.5, "number_of_cinemas_in_500m")

100%|██████████| 8347/8347 [00:20<00:00, 398.52it/s]
100%|██████████| 8347/8347 [00:05<00:00, 1614.69it/s]
100%|██████████| 8347/8347 [00:09<00:00, 902.58it/s] 
100%|██████████| 8347/8347 [02:13<00:00, 62.52it/s] 
100%|██████████| 8347/8347 [02:19<00:00, 59.76it/s] 
100%|██████████| 8347/8347 [00:35<00:00, 237.26it/s]
100%|██████████| 8347/8347 [00:08<00:00, 995.70it/s] 
100%|██████████| 8347/8347 [00:20<00:00, 412.92it/s]
100%|██████████| 8347/8347 [00:05<00:00, 1410.63it/s]
100%|██████████| 8347/8347 [01:14<00:00, 112.54it/s]
100%|██████████| 8347/8347 [01:18<00:00, 106.02it/s]
100%|██████████| 8347/8347 [00:58<00:00, 142.03it/s]
100%|██████████| 8347/8347 [00:19<00:00, 425.35it/s]
100%|██████████| 8347/8347 [00:06<00:00, 1372.14it/s]
100%|██████████| 8347/8347 [00:12<00:00, 676.36it/s]
100%|██████████| 8347/8347 [00:12<00:00, 654.49it/s]
100%|██████████| 8347/8347 [03:03<00:00, 45.57it/s]
100%|██████████| 8347/8347 [00:04<00:00, 1693.83it/s]
100%|██████████| 8347/8347 [00:03<00:00, 

In [9]:
# Create new columns
calculate_number_of_places(df, df3, 1, "number_of_chapel_in_1km")
calculate_number_of_places(df, df5, 1, "number_of_custody_or_penitentiary_in_1km")
calculate_number_of_places(df, df6, 1, "number_of_emergency_room_in_1km")
calculate_number_of_places(df, df7, 1, "number_of_grocery_store_in_1km")
calculate_number_of_places(df, df8, 1, "number_of_gym_in_1km")
calculate_number_of_places(df, df9, 1, "number_of_high_school_in_1km")
calculate_number_of_places(df, df10, 1, "number_of_hospital_in_1km")
calculate_number_of_places(df, df11, 1, "number_of_park_in_1km")
calculate_number_of_places(df, df12, 1, "number_of_police_station_in_1km")
calculate_number_of_places(df, df13, 1, "number_of_primary_care_unit_in_1km")
calculate_number_of_places(df, df14, 1, "number_of_primary_school_in_1km")
calculate_number_of_places(df, df16, 1, "number_of_roman_catholic_church_in_1km")
calculate_number_of_places(df, df17, 1, "number_of_rossmannvin_1km")
calculate_number_of_places(df, df18, 1, "number_of_shopping_center_in_1km")
calculate_number_of_places(df, df19, 1, "number_of_technical_college_in_1km")
calculate_number_of_places(df, df20, 1, "number_of_vocational_school_in_1km")
calculate_number_of_places(df, df21, 1, "number_of_zabka_in_1km")
calculate_number_of_places(df, df22, 1, "number_of_tax_office_in_1km")
calculate_number_of_places(df, df23, 1, "number_of_tax_administration_chambers_in_1km")
calculate_number_of_places(df, df24, 1, "number_of_street_workouts_in_1km")
calculate_number_of_places(df, df25, 1, "number_of_rope_parks_in_1km")
calculate_number_of_places(df, df26, 1, "number_of_orlik_soccer_fields_in_1km")
calculate_number_of_places(df, df27, 1, "number_of_natural_swimming_pools_in_1km")
calculate_number_of_places(df, df28, 1, "number_of_museums_in_1km")
calculate_number_of_places(df, df29, 1, "number_of_municipal_police_departments_in_1km")
calculate_number_of_places(df, df30, 1, "number_of_indoor_swimming_pool_in_1km")
calculate_number_of_places(df, df31, 1, "number_of_horseback_riding_centers_in_1km")
calculate_number_of_places(df, df32, 1, "number_of_football_stadiums_in_1km")
calculate_number_of_places(df, df33, 1, "number_of_district_court_in_1km")
calculate_number_of_places(df, df34, 1, "number_of_cinemas_in_1km")

100%|██████████| 8347/8347 [00:27<00:00, 300.74it/s]
100%|██████████| 8347/8347 [00:06<00:00, 1329.18it/s]
100%|██████████| 8347/8347 [00:08<00:00, 1024.91it/s]
100%|██████████| 8347/8347 [01:49<00:00, 76.51it/s]
100%|██████████| 8347/8347 [01:13<00:00, 113.76it/s]
100%|██████████| 8347/8347 [01:21<00:00, 102.30it/s]
100%|██████████| 8347/8347 [00:17<00:00, 474.52it/s]
100%|██████████| 8347/8347 [00:43<00:00, 190.84it/s]
100%|██████████| 8347/8347 [00:12<00:00, 682.07it/s]
100%|██████████| 8347/8347 [02:45<00:00, 50.34it/s]
100%|██████████| 8347/8347 [02:56<00:00, 47.16it/s]
100%|██████████| 8347/8347 [01:57<00:00, 70.86it/s]
100%|██████████| 8347/8347 [00:43<00:00, 190.71it/s]
100%|██████████| 8347/8347 [00:11<00:00, 718.52it/s] 
100%|██████████| 8347/8347 [00:14<00:00, 586.85it/s]
100%|██████████| 8347/8347 [00:13<00:00, 616.80it/s]
100%|██████████| 8347/8347 [02:24<00:00, 57.57it/s]
100%|██████████| 8347/8347 [00:03<00:00, 2314.14it/s]
100%|██████████| 8347/8347 [00:02<00:00, 3071.1

In [10]:
# Create new columns
calculate_number_of_places(df, df3, 2, "number_of_chapel_in_2km")
calculate_number_of_places(df, df5, 2, "number_of_custody_or_penitentiary_in_2km")
calculate_number_of_places(df, df6, 2, "number_of_emergency_room_in_2km")
calculate_number_of_places(df, df7, 2, "number_of_grocery_store_in_2km")
calculate_number_of_places(df, df8, 2, "number_of_gym_in_2km")
calculate_number_of_places(df, df9, 2, "number_of_high_school_in_2km")
calculate_number_of_places(df, df10, 2, "number_of_hospital_in_2km")
calculate_number_of_places(df, df11, 2, "number_of_park_in_2km")
calculate_number_of_places(df, df12, 2, "number_of_police_station_in_2km")
calculate_number_of_places(df, df13, 2, "number_of_primary_care_unit_in_2km")
calculate_number_of_places(df, df14, 2, "number_of_primary_school_in_2km")
calculate_number_of_places(df, df16, 2, "number_of_roman_catholic_church_in_2km")
calculate_number_of_places(df, df17, 2, "number_of_rossmannvin_2km")
calculate_number_of_places(df, df18, 2, "number_of_shopping_center_in_2km")
calculate_number_of_places(df, df19, 2, "number_of_technical_college_in_2km")
calculate_number_of_places(df, df20, 2, "number_of_vocational_school_in_2km")
calculate_number_of_places(df, df21, 2, "number_of_zabka_in_2km")
calculate_number_of_places(df, df22, 2, "number_of_tax_office_in_2km")
calculate_number_of_places(df, df23, 2, "number_of_tax_administration_chambers_in_2km")
calculate_number_of_places(df, df24, 2, "number_of_street_workouts_in_2km")
calculate_number_of_places(df, df25, 2, "number_of_rope_parks_in_2km")
calculate_number_of_places(df, df26, 2, "number_of_orlik_soccer_fields_in_2km")
calculate_number_of_places(df, df27, 2, "number_of_natural_swimming_pools_in_2km")
calculate_number_of_places(df, df28, 2, "number_of_museums_in_2km")
calculate_number_of_places(df, df29, 2, "number_of_municipal_police_departments_in_2km")
calculate_number_of_places(df, df30, 2, "number_of_indoor_swimming_pool_in_2km")
calculate_number_of_places(df, df31, 2, "number_of_horseback_riding_centers_in_2km")
calculate_number_of_places(df, df32, 2, "number_of_football_stadiums_in_2km")
calculate_number_of_places(df, df33, 2, "number_of_district_court_in_2km")
calculate_number_of_places(df, df34, 2, "number_of_cinemas_in_2km")

100%|██████████| 8347/8347 [00:11<00:00, 695.73it/s]
100%|██████████| 8347/8347 [00:01<00:00, 4280.62it/s]
100%|██████████| 8347/8347 [00:03<00:00, 2460.26it/s]
100%|██████████| 8347/8347 [00:48<00:00, 172.70it/s]
100%|██████████| 8347/8347 [00:37<00:00, 224.42it/s]
100%|██████████| 8347/8347 [00:30<00:00, 271.43it/s]
100%|██████████| 8347/8347 [00:07<00:00, 1052.71it/s]
100%|██████████| 8347/8347 [00:19<00:00, 432.60it/s]
100%|██████████| 8347/8347 [00:05<00:00, 1567.37it/s]
100%|██████████| 8347/8347 [01:13<00:00, 113.73it/s]
100%|██████████| 8347/8347 [01:18<00:00, 106.74it/s]
100%|██████████| 8347/8347 [00:52<00:00, 158.28it/s]
100%|██████████| 8347/8347 [00:19<00:00, 422.06it/s]
100%|██████████| 8347/8347 [00:06<00:00, 1298.74it/s]
100%|██████████| 8347/8347 [00:12<00:00, 680.36it/s]
100%|██████████| 8347/8347 [00:12<00:00, 681.71it/s]
100%|██████████| 8347/8347 [02:21<00:00, 58.81it/s]
100%|██████████| 8347/8347 [00:03<00:00, 2229.48it/s]
100%|██████████| 8347/8347 [00:02<00:00, 

In [11]:
# Create new columns
calculate_number_of_places(df, df3, 3, "number_of_chapel_in_3km")
calculate_number_of_places(df, df5, 3, "number_of_custody_or_penitentiary_in_3km")
calculate_number_of_places(df, df6, 3, "number_of_emergency_room_in_3km")
calculate_number_of_places(df, df7, 3, "number_of_grocery_store_in_3km")
calculate_number_of_places(df, df8, 3, "number_of_gym_in_3km")
calculate_number_of_places(df, df9, 3, "number_of_high_school_in_3km")
calculate_number_of_places(df, df10, 3, "number_of_hospital_in_3km")
calculate_number_of_places(df, df11, 3, "number_of_park_in_3km")
calculate_number_of_places(df, df12, 3, "number_of_police_station_in_3km")
calculate_number_of_places(df, df13, 3, "number_of_primary_care_unit_in_3km")
calculate_number_of_places(df, df14, 3, "number_of_primary_school_in_3km")
calculate_number_of_places(df, df16, 3, "number_of_roman_catholic_church_in_3km")
calculate_number_of_places(df, df17, 3, "number_of_rossmannvin_3km")
calculate_number_of_places(df, df18, 3, "number_of_shopping_center_in_3km")
calculate_number_of_places(df, df19, 3, "number_of_technical_college_in_3km")
calculate_number_of_places(df, df20, 3, "number_of_vocational_school_in_3km")
calculate_number_of_places(df, df21, 3, "number_of_zabka_in_3km")
calculate_number_of_places(df, df22, 3, "number_of_tax_office_in_3km")
calculate_number_of_places(df, df23, 3, "number_of_tax_administration_chambers_in_3km")
calculate_number_of_places(df, df24, 3, "number_of_street_workouts_in_3km")
calculate_number_of_places(df, df25, 3, "number_of_rope_parks_in_3km")
calculate_number_of_places(df, df26, 3, "number_of_orlik_soccer_fields_in_3km")
calculate_number_of_places(df, df27, 3, "number_of_natural_swimming_pools_in_3km")
calculate_number_of_places(df, df28, 3, "number_of_museums_in_3km")
calculate_number_of_places(df, df29, 3, "number_of_municipal_police_departments_in_3km")
calculate_number_of_places(df, df30, 3, "number_of_indoor_swimming_pool_in_3km")
calculate_number_of_places(df, df31, 3, "number_of_horseback_riding_centers_in_3km")
calculate_number_of_places(df, df32, 3, "number_of_football_stadiums_in_3km")
calculate_number_of_places(df, df33, 3, "number_of_district_court_in_3km")
calculate_number_of_places(df, df34, 3, "number_of_cinemas_in_3km")

100%|██████████| 8347/8347 [00:11<00:00, 707.30it/s]
100%|██████████| 8347/8347 [00:01<00:00, 4294.05it/s]
100%|██████████| 8347/8347 [00:03<00:00, 2501.79it/s]
100%|██████████| 8347/8347 [00:47<00:00, 174.54it/s]
100%|██████████| 8347/8347 [00:31<00:00, 266.46it/s]
100%|██████████| 8347/8347 [00:30<00:00, 274.49it/s]
100%|██████████| 8347/8347 [00:07<00:00, 1113.20it/s]
100%|██████████| 8347/8347 [00:19<00:00, 434.03it/s]
100%|██████████| 8347/8347 [00:05<00:00, 1501.47it/s]
100%|██████████| 8347/8347 [01:12<00:00, 115.12it/s]
100%|██████████| 8347/8347 [01:17<00:00, 108.16it/s]
100%|██████████| 8347/8347 [00:52<00:00, 159.52it/s]
100%|██████████| 8347/8347 [00:19<00:00, 420.65it/s]
100%|██████████| 8347/8347 [00:05<00:00, 1405.78it/s]
100%|██████████| 8347/8347 [00:12<00:00, 682.67it/s]
100%|██████████| 8347/8347 [00:12<00:00, 667.43it/s]
100%|██████████| 8347/8347 [02:20<00:00, 59.30it/s]
100%|██████████| 8347/8347 [00:03<00:00, 2189.65it/s]
100%|██████████| 8347/8347 [00:02<00:00, 

In [12]:
# Create new columns
calculate_number_of_places(df, df3, 4, "number_of_chapel_in_4km")
calculate_number_of_places(df, df5, 4, "number_of_custody_or_penitentiary_in_4km")
calculate_number_of_places(df, df6, 4, "number_of_emergency_room_in_4km")
calculate_number_of_places(df, df7, 4, "number_of_grocery_store_in_4km")
calculate_number_of_places(df, df8, 4, "number_of_gym_in_4km")
calculate_number_of_places(df, df9, 4, "number_of_high_school_in_4km")
calculate_number_of_places(df, df10, 4, "number_of_hospital_in_4km")
calculate_number_of_places(df, df11, 4, "number_of_park_in_4km")
calculate_number_of_places(df, df12, 4, "number_of_police_station_in_4km")
calculate_number_of_places(df, df13, 4, "number_of_primary_care_unit_in_4km")
calculate_number_of_places(df, df14, 4, "number_of_primary_school_in_4km")
calculate_number_of_places(df, df16, 4, "number_of_roman_catholic_church_in_4km")
calculate_number_of_places(df, df17, 4, "number_of_rossmannvin_4km")
calculate_number_of_places(df, df18, 4, "number_of_shopping_center_in_4km")
calculate_number_of_places(df, df19, 4, "number_of_technical_college_in_4km")
calculate_number_of_places(df, df20, 4, "number_of_vocational_school_in_4km")
calculate_number_of_places(df, df21, 4, "number_of_zabka_in_4km")
calculate_number_of_places(df, df22, 4, "number_of_tax_office_in_4km")
calculate_number_of_places(df, df23, 4, "number_of_tax_administration_chambers_in_4km")
calculate_number_of_places(df, df24, 4, "number_of_street_workouts_in_4km")
calculate_number_of_places(df, df25, 4, "number_of_rope_parks_in_4km")
calculate_number_of_places(df, df26, 4, "number_of_orlik_soccer_fields_in_4km")
calculate_number_of_places(df, df27, 4, "number_of_natural_swimming_pools_in_4km")
calculate_number_of_places(df, df28, 4, "number_of_museums_in_4km")
calculate_number_of_places(df, df29, 4, "number_of_municipal_police_departments_in_4km")
calculate_number_of_places(df, df30, 4, "number_of_indoor_swimming_pool_in_4km")
calculate_number_of_places(df, df31, 4, "number_of_horseback_riding_centers_in_4km")
calculate_number_of_places(df, df32, 4, "number_of_football_stadiums_in_4km")
calculate_number_of_places(df, df33, 4, "number_of_district_court_in_4km")
calculate_number_of_places(df, df34, 4, "number_of_cinemas_in_4km")

100%|██████████| 8347/8347 [00:11<00:00, 708.14it/s]
100%|██████████| 8347/8347 [00:01<00:00, 4178.44it/s]
100%|██████████| 8347/8347 [00:03<00:00, 2487.14it/s]
100%|██████████| 8347/8347 [00:48<00:00, 173.75it/s]
100%|██████████| 8347/8347 [00:30<00:00, 269.88it/s]
100%|██████████| 8347/8347 [00:31<00:00, 266.53it/s]
100%|██████████| 8347/8347 [00:07<00:00, 1115.17it/s]
100%|██████████| 8347/8347 [00:19<00:00, 428.74it/s]
100%|██████████| 8347/8347 [00:05<00:00, 1606.08it/s]
100%|██████████| 8347/8347 [01:12<00:00, 115.65it/s]
100%|██████████| 8347/8347 [01:17<00:00, 107.71it/s]
100%|██████████| 8347/8347 [00:52<00:00, 158.84it/s]
100%|██████████| 8347/8347 [00:19<00:00, 430.39it/s]
100%|██████████| 8347/8347 [00:06<00:00, 1390.41it/s]
100%|██████████| 8347/8347 [00:12<00:00, 682.50it/s]
100%|██████████| 8347/8347 [00:12<00:00, 664.66it/s]
100%|██████████| 8347/8347 [02:20<00:00, 59.43it/s]
100%|██████████| 8347/8347 [00:03<00:00, 2264.72it/s]
100%|██████████| 8347/8347 [00:02<00:00, 

In [13]:
# Create new columns
calculate_number_of_places(df, df3, 5, "number_of_chapel_in_5km")
calculate_number_of_places(df, df5, 5, "number_of_custody_or_penitentiary_in_5km")
calculate_number_of_places(df, df6, 5, "number_of_emergency_room_in_5km")
calculate_number_of_places(df, df7, 5, "number_of_grocery_store_in_5km")
calculate_number_of_places(df, df8, 5, "number_of_gym_in_5km")
calculate_number_of_places(df, df9, 5, "number_of_high_school_in_5km")
calculate_number_of_places(df, df10, 5, "number_of_hospital_in_5km")
calculate_number_of_places(df, df11, 5, "number_of_park_in_5km")
calculate_number_of_places(df, df12, 5, "number_of_police_station_in_5km")
calculate_number_of_places(df, df13, 5, "number_of_primary_care_unit_in_5km")
calculate_number_of_places(df, df14, 5, "number_of_primary_school_in_5km")
calculate_number_of_places(df, df16, 5, "number_of_roman_catholic_church_in_5km")
calculate_number_of_places(df, df17, 5, "number_of_rossmannvin_5km")
calculate_number_of_places(df, df18, 5, "number_of_shopping_center_in_5km")
calculate_number_of_places(df, df19, 5, "number_of_technical_college_in_5km")
calculate_number_of_places(df, df20, 5, "number_of_vocational_school_in_5km")
calculate_number_of_places(df, df21, 5, "number_of_zabka_in_5km")
calculate_number_of_places(df, df22, 5, "number_of_tax_office_in_5km")
calculate_number_of_places(df, df23, 5, "number_of_tax_administration_chambers_in_5km")
calculate_number_of_places(df, df24, 5, "number_of_street_workouts_in_5km")
calculate_number_of_places(df, df25, 5, "number_of_rope_parks_in_5km")
calculate_number_of_places(df, df26, 5, "number_of_orlik_soccer_fields_in_5km")
calculate_number_of_places(df, df27, 5, "number_of_natural_swimming_pools_in_5km")
calculate_number_of_places(df, df28, 5, "number_of_museums_in_5km")
calculate_number_of_places(df, df29, 5, "number_of_municipal_police_departments_in_5km")
calculate_number_of_places(df, df30, 5, "number_of_indoor_swimming_pool_in_5km")
calculate_number_of_places(df, df31, 5, "number_of_horseback_riding_centers_in_5km")
calculate_number_of_places(df, df32, 5, "number_of_football_stadiums_in_5km")
calculate_number_of_places(df, df33, 5, "number_of_district_court_in_5km")
calculate_number_of_places(df, df34, 5, "number_of_cinemas_in_5km")

100%|██████████| 8347/8347 [00:11<00:00, 703.88it/s]
100%|██████████| 8347/8347 [00:01<00:00, 4348.99it/s]
100%|██████████| 8347/8347 [00:03<00:00, 2488.98it/s]
100%|██████████| 8347/8347 [00:48<00:00, 172.90it/s]
100%|██████████| 8347/8347 [00:38<00:00, 219.01it/s]
100%|██████████| 8347/8347 [00:30<00:00, 273.81it/s]
100%|██████████| 8347/8347 [00:07<00:00, 1056.25it/s]
100%|██████████| 8347/8347 [00:18<00:00, 441.85it/s]
100%|██████████| 8347/8347 [00:05<00:00, 1618.70it/s]
100%|██████████| 8347/8347 [01:12<00:00, 115.87it/s]
100%|██████████| 8347/8347 [01:17<00:00, 107.83it/s]
100%|██████████| 8347/8347 [00:52<00:00, 159.77it/s]
100%|██████████| 8347/8347 [00:19<00:00, 428.97it/s]
100%|██████████| 8347/8347 [00:06<00:00, 1287.76it/s]
100%|██████████| 8347/8347 [00:11<00:00, 712.51it/s]
100%|██████████| 8347/8347 [00:12<00:00, 666.06it/s]
100%|██████████| 8347/8347 [02:21<00:00, 58.88it/s]
100%|██████████| 8347/8347 [00:03<00:00, 2463.80it/s]
100%|██████████| 8347/8347 [00:02<00:00, 

In [14]:
# Create new columns
calculate_number_of_places(df, df3, 10, "number_of_chapel_in_10km")
calculate_number_of_places(df, df5, 10, "number_of_custody_or_penitentiary_in_10km")
calculate_number_of_places(df, df6, 10, "number_of_emergency_room_in_10km")
calculate_number_of_places(df, df7, 10, "number_of_grocery_store_in_10km")
calculate_number_of_places(df, df8, 10, "number_of_gym_in_10km")
calculate_number_of_places(df, df9, 10, "number_of_high_school_in_10km")
calculate_number_of_places(df, df10, 10, "number_of_hospital_in_10km")
calculate_number_of_places(df, df11, 10, "number_of_park_in_10km")
calculate_number_of_places(df, df12, 10, "number_of_police_station_in_10km")
calculate_number_of_places(df, df13, 10, "number_of_primary_care_unit_in_10km")
calculate_number_of_places(df, df14, 10, "number_of_primary_school_in_10km")
calculate_number_of_places(df, df16, 10, "number_of_roman_catholic_church_in_10km")
calculate_number_of_places(df, df17, 10, "number_of_rossmannvin_10km")
calculate_number_of_places(df, df18, 10, "number_of_shopping_center_in_10km")
calculate_number_of_places(df, df19, 10, "number_of_technical_college_in_10km")
calculate_number_of_places(df, df20, 10, "number_of_vocational_school_in_10km")
calculate_number_of_places(df, df21, 10, "number_of_zabka_in_10km")
calculate_number_of_places(df, df22, 10, "number_of_tax_office_in_10km")
calculate_number_of_places(df, df23, 10, "number_of_tax_administration_chambers_in_10km")
calculate_number_of_places(df, df24, 10, "number_of_street_workouts_in_10km")
calculate_number_of_places(df, df25, 10, "number_of_rope_parks_in_10km")
calculate_number_of_places(df, df26, 10, "number_of_orlik_soccer_fields_in_10km")
calculate_number_of_places(df, df27, 10, "number_of_natural_swimming_pools_in_10km")
calculate_number_of_places(df, df28, 10, "number_of_museums_in_10km")
calculate_number_of_places(df, df29, 10, "number_of_municipal_police_departments_in_10km")
calculate_number_of_places(df, df30, 10, "number_of_indoor_swimming_pool_in_10km")
calculate_number_of_places(df, df31, 10, "number_of_horseback_riding_centers_in_10km")
calculate_number_of_places(df, df32, 10, "number_of_football_stadiums_in_10km")
calculate_number_of_places(df, df33, 10, "number_of_district_court_in_10km")
calculate_number_of_places(df, df34, 10, "number_of_cinemas_in_10km")

100%|██████████| 8347/8347 [00:11<00:00, 718.21it/s]
100%|██████████| 8347/8347 [00:01<00:00, 4320.42it/s]
100%|██████████| 8347/8347 [00:03<00:00, 2170.95it/s]
100%|██████████| 8347/8347 [00:48<00:00, 172.58it/s]
100%|██████████| 8347/8347 [00:31<00:00, 267.49it/s]
100%|██████████| 8347/8347 [00:30<00:00, 274.86it/s]
100%|██████████| 8347/8347 [00:07<00:00, 1069.27it/s]
100%|██████████| 8347/8347 [00:19<00:00, 432.41it/s]
100%|██████████| 8347/8347 [00:05<00:00, 1499.13it/s]
100%|██████████| 8347/8347 [01:13<00:00, 113.85it/s]
100%|██████████| 8347/8347 [01:17<00:00, 107.21it/s]
100%|██████████| 8347/8347 [00:53<00:00, 156.97it/s]
100%|██████████| 8347/8347 [00:20<00:00, 416.33it/s]
100%|██████████| 8347/8347 [00:06<00:00, 1387.01it/s]
100%|██████████| 8347/8347 [00:12<00:00, 676.77it/s]
100%|██████████| 8347/8347 [00:12<00:00, 671.91it/s]
100%|██████████| 8347/8347 [02:22<00:00, 58.58it/s]
100%|██████████| 8347/8347 [00:03<00:00, 2190.15it/s]
100%|██████████| 8347/8347 [00:02<00:00, 

In [15]:
# Create new columns
calculate_min_straightline_distance(df, df2, "nearest airport")
calculate_min_straightline_distance(df, df3, "nearest chapel")
calculate_min_straightline_distance(df, df4, "nearest city center")
calculate_min_straightline_distance(df, df5, "nearest custody or penitentiary")
calculate_min_straightline_distance(df, df6, "nearest emergency room")
calculate_min_straightline_distance(df, df7, "nearest grocery store")
calculate_min_straightline_distance(df, df8, "nearest gym")
calculate_min_straightline_distance(df, df9, "nearest high school")
calculate_min_straightline_distance(df, df10, "nearest hospital")
calculate_min_straightline_distance(df, df11, "nearest park")
calculate_min_straightline_distance(df, df12, "nearest police station")
calculate_min_straightline_distance(df, df13, "nearest primary care unit")
calculate_min_straightline_distance(df, df14, "nearest primary school")
calculate_min_straightline_distance(df, df15, "nearest river")
calculate_min_straightline_distance(df, df16, "nearest roman catholic church")
calculate_min_straightline_distance(df, df17, "nearest rossmann")
calculate_min_straightline_distance(df, df18, "nearest shopping center")
calculate_min_straightline_distance(df, df19, "nearest technical college")
calculate_min_straightline_distance(df, df20, "nearest vocational school")
calculate_min_straightline_distance(df, df21, "nearest zabka")
calculate_min_straightline_distance(df, df22, "nearest_tax_office")
calculate_min_straightline_distance(df, df23, "nearest_tax_administration_chambers")
calculate_min_straightline_distance(df, df24, "nearest_street_workouts")
calculate_min_straightline_distance(df, df25, "nearest_rope_parks")
calculate_min_straightline_distance(df, df26, "nearest_orlik_soccer_fields")
calculate_min_straightline_distance(df, df27, "nearest_natural_swimming_pools")
calculate_min_straightline_distance(df, df28, "nearest_museums")
calculate_min_straightline_distance(df, df29, "nearest_municipal_police_departments")
calculate_min_straightline_distance(df, df30, "nearest_indoor_swimming_pool")
calculate_min_straightline_distance(df, df31, "nearest_horseback_riding_centers")
calculate_min_straightline_distance(df, df32, "nearest_football_stadiums")
calculate_min_straightline_distance(df, df33, "nearest_district_court")
calculate_min_straightline_distance(df, df34, "nearest_cinemas")

100%|██████████| 8347/8347 [00:02<00:00, 3789.32it/s]
100%|██████████| 8347/8347 [00:14<00:00, 591.18it/s]
100%|██████████| 8347/8347 [00:02<00:00, 3905.43it/s]
100%|██████████| 8347/8347 [00:02<00:00, 3223.47it/s]
100%|██████████| 8347/8347 [00:04<00:00, 1759.61it/s]
100%|██████████| 8347/8347 [00:55<00:00, 149.66it/s]
100%|██████████| 8347/8347 [00:35<00:00, 231.91it/s]
100%|██████████| 8347/8347 [00:35<00:00, 233.28it/s]
100%|██████████| 8347/8347 [00:09<00:00, 912.10it/s]
100%|██████████| 8347/8347 [00:22<00:00, 364.53it/s]
100%|██████████| 8347/8347 [00:06<00:00, 1277.47it/s]
100%|██████████| 8347/8347 [01:24<00:00, 98.40it/s] 
100%|██████████| 8347/8347 [01:30<00:00, 92.08it/s]
100%|██████████| 8347/8347 [01:35<00:00, 87.09it/s]
100%|██████████| 8347/8347 [01:01<00:00, 135.99it/s]
100%|██████████| 8347/8347 [00:22<00:00, 365.48it/s]
100%|██████████| 8347/8347 [00:07<00:00, 1068.89it/s]
100%|██████████| 8347/8347 [00:14<00:00, 576.29it/s]
100%|██████████| 8347/8347 [00:14<00:00, 5

In [16]:
# Remove rows where "nearest city center" is greater than 20 as they are not in Cracow
df = df[df['nearest city center'] <= 14]

In [17]:
# Save df to an excel file as a checkpoint 
df.to_excel("final_real_estate_data_.xlsx")  