In [1]:
import os
import numpy as np
import pandas as pd
from shapely.geometry import Point
import pickle

# Import interactive maps module

In [2]:
import geopandas as gpd
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster

In [3]:
# Function for displaying the map

def embed_map(m, file_name):
    from IPython.display import IFrame
    m.save(file_name)
    return IFrame(file_name, width='100%', height='500px')

# Define functions for calculating distance on Earth

In [4]:
# Using geopy
import geopy.distance

def dist(a, b):
    return geopy.distance.geodesic((a.x, a.y), (b.x, b.y)).km

In [5]:
# Using geopandas
def dist_2(a, b):
    a = gpd.GeoSeries(Point(a.y, a.x), crs=({"init":"epsg:4326"})).to_crs(epsg=3310)
    b = gpd.GeoSeries(Point(b.y, b.x), crs=({"init":"epsg:4326"})).to_crs(epsg=3310)
    return a.distance(b) / 1000

# Load DataFrames for apartaments and metro

In [6]:
apartaments = pd.read_csv("apartaments.csv")
metro = pd.read_csv("metro.csv")

In [7]:
apartaments.head()

Unnamed: 0,Cena,Czynsz - dodatkowo,Kaucja,Powierzchnia,Liczba pokoi,Rodzaj zabudowy,Piętro,Liczba pięter,Materiał budynku,Okna,Ogrzewanie,Rok budowy,Stan wykończenia,Dostępne od,Data ogłoszenia,Adres,φ,λ,Link,Zdjęcia
0,2200,400 zł,2 600 zł,36 m²,2,blok,parter,4.0,cegła,drewniane,miejskie,2018.0,do zamieszkania,2019-12-15,2019-12-24 16:18:45,"Warszawa, Wilanów, Sarmacka 4 B",52.23614,21.00817,https://www.otodom.pl/oferta/bezposrednio-2-po...,'https://apollo-ireland.akamaized.net/v1/files...
1,2900,300 zł,2 900 zł,50 m²,2,apartamentowiec,> 10,14.0,cegła,drewniane,miejskie,2010.0,do zamieszkania,2019-12-02,2019-12-24 16:13:36,"Warszawa, Mokotów, Melody Park",52.169271,21.020154,https://www.otodom.pl/oferta/pulawska-dol-sluz...,'https://apollo-ireland.akamaized.net/v1/files...
2,2600,400 zł,3 000 zł,42 m²,2,kamienica,5,5.0,cegła,plastikowe,miejskie,1925.0,do zamieszkania,2019-12-16,2019-12-24 16:01:38,"Warszawa, Praga-Północ, Praga, Kaweczynska 16",52.20556,21.07452,https://www.otodom.pl/oferta/mieszkanie-nowocz...,'https://apollo-ireland.akamaized.net/v1/files...
3,2700,500 zł,3 000 zł,50 m²,2,apartamentowiec,1,8.0,cegła,plastikowe,miejskie,,do zamieszkania,2020-01-20,2019-12-24 15:36:46,"Warszawa, Mokotów, ul. Postępu 10",52.17801,20.996466,https://www.otodom.pl/oferta/postepu-10-nowe-k...,'https://apollo-ireland.akamaized.net/v1/files...
4,3000,200 zł,3 000 zł,45 m²,2,apartamentowiec,5,7.0,cegła,drewniane,miejskie,2016.0,do zamieszkania,2020-01-01,2019-12-24 14:50:54,"Warszawa, Mokotów, Dolny Mokotów, ul. Magazynowa",52.185539,21.004877,https://www.otodom.pl/oferta/magazynowa-45-m2-...,'https://apollo-ireland.akamaized.net/v1/files...


## First add `price` to `rental price` in order to get `real price`

In [8]:
# Check if value is Nan or not
def notNan(a):
    return a == a

In [9]:
def real_price(row):
    if notNan(row['Czynsz - dodatkowo']):
        return int(row['Cena']) + int(''.join(c for c in row['Czynsz - dodatkowo'] if c.isdigit()))
    else:
        return int(row['Cena'])

In [10]:
if 'Cena rzeczywista' in apartaments or not 'Cena' in apartaments:
    print("Reloading apartamens.csv file")
    apartaments = pd.read_csv("apartaments.csv")
    
apartaments.insert(1, "Cena rzeczywista", apartaments.apply(real_price, axis=1))
apartaments = apartaments.drop(columns = ['Cena', 'Czynsz - dodatkowo'])

# Plot locations of apartaments

In [11]:
# Create a map
m_2 = folium.Map(location=[52.2323,21.0000], tiles='cartodbpositron', zoom_start=11)

# Add points to the map
for idx, house in apartaments.iterrows():
    if notNan(house['φ']):
        Marker([house['φ'], house['λ']], popup=idx).add_to(m_2)

# Display the map
if not os.path.isdir("maps"):
     os.mkdir("maps")
embed_map(m_2, 'maps/m_2.html')

# Find distance to nearest metro station for all valid houses

In [13]:
output = []

for idx, house in apartaments.iterrows():
    if notNan(house['φ']):
        list_ = [dist(Point(house['φ'], house['λ']), Point(station['φ'], station['λ']))
                 for jdx, station in metro.iterrows()]
        output.append((house['Link'], house['Cena rzeczywista'], round(min(list_),2), metro.iloc[np.argmin(list_)]['Nazwa']))
                
# Display as an offer, sorted
for a,b,c,d in sorted(output, key=lambda tup: tup[1]):
    pass
    #print("Oferta: {}\n, cena: {} zł, {} km od stacji {}\n".format(a,b,c,d))