# Prague Neighbourhoods Project

In [1]:
import pandas as pd

In [2]:
# scrape a table of Prague postal codes
url = "https://postovnismerovacicisla.cz/psc/Praha"
df_ZIP = pd.read_html(url, encoding="utf8")
df_Prague_Postal_Data = df_ZIP[0]
df_Prague_Postal_Data.head()

Unnamed: 0,0,1,2,3
0,Psč,Město,Pošta,Okres
1,11800,Praha 1-Holešovice,Praha 011,Praha hl.m.
2,11800,Praha 1-Hradčany,Praha 011,Praha hl.m.
3,11800,Praha 1-Malá Strana,Praha 011,Praha hl.m.
4,11900,Praha 1-Hradčany,Praha 012,Praha hl.m.


In [3]:
header = df_Prague_Postal_Data.iloc[0]
df_Prague_Postal_Data = df_Prague_Postal_Data[1:]
df_Prague_Postal_Data.columns = header
df_Prague_Postal_Data.head()

Unnamed: 0,Psč,Město,Pošta,Okres
1,11800,Praha 1-Holešovice,Praha 011,Praha hl.m.
2,11800,Praha 1-Hradčany,Praha 011,Praha hl.m.
3,11800,Praha 1-Malá Strana,Praha 011,Praha hl.m.
4,11900,Praha 1-Hradčany,Praha 012,Praha hl.m.
5,11000,Praha 1-Josefov,Praha 1,Praha hl.m.


In [4]:
df_Prague_ZIP = df_Prague_Postal_Data\
    .drop(columns=["Pošta", "Okres"]) \
    .rename(columns={"Psč": "Postal_Code", "Město": "Neighbourhood"})

df_Prague_ZIP[["Borough", "Neighbourhood"]] = df_Prague_ZIP["Neighbourhood"].str.split("-", expand=True)
df_Prague_ZIP.reset_index(drop=True, inplace=True)
df_Prague_ZIP.head()


Unnamed: 0,Postal_Code,Neighbourhood,Borough
0,11800,Holešovice,Praha 1
1,11800,Hradčany,Praha 1
2,11800,Malá Strana,Praha 1
3,11900,Hradčany,Praha 1
4,11000,Josefov,Praha 1


In [5]:
df_Prague_neighbours = df_Prague_ZIP.groupby(["Postal_Code"])["Neighbourhood"].apply(", ".join).reset_index()
df_Prague_boroughs = df_Prague_ZIP.groupby(["Postal_Code"])["Borough"].apply(", ".join).reset_index() 
df_Prague = df_Prague_neighbours.join(df_Prague_boroughs.set_index("Postal_Code"), on="Postal_Code")

In [6]:
for col in ["Postal_Code", "Neighbourhood", "Borough"]:
    df_Prague[col]=df_Prague[col].str.split(", ").map(set).str.join(", ")
df_Prague.head()

Unnamed: 0,Postal_Code,Neighbourhood,Borough
0,10000,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",Praha 10
1,10100,"Vršovice, Záběhlice, Michle, Vinohrady",Praha 10
2,10200,"Štěrboholy, Hostivař",Praha 10
3,10300,"Kolovraty, Lipany, Benice, Nedvězí",Praha 10
4,10400,"Hájek, Uhříněves, Královice, Pitkovice, Křeslice",Praha 10


In [7]:
url2 = "https://realitymix.cz/statistika-nemovitosti/byty-pronajem-prumerna-cena-pronajmu-1m2-mesic.html"
df_url = pd.read_html(url2)
df_prices = df_url[0]
df = df_prices.drop([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]).reset_index(drop=True)
df.drop(df.columns[[-1,-2,-3]], axis=1, inplace=True)
df_realities = df.rename(columns={"Unnamed: 0":"Borough"})


In [8]:
df_realities["Average_rent_per_m2"]=df_realities.mean(axis=1)


In [9]:
cols = [1, 2, 3, 4]
df_realities.drop(df_realities.columns[cols], axis=1, inplace=True)

In [10]:
df_realities.head()

Unnamed: 0,Borough,Average_rent_per_m2
0,Praha 1,350.0
1,Praha 2,340.25
2,Praha 3,326.5
3,Praha 4,271.5
4,Praha 5,303.25


In [11]:
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

In [12]:
from geopy.geocoders import Nominatim

address = "Prague, Czechia"

geolocator = Nominatim(user_agent="Prague_explorer")
location = geolocator.geocode(address)
prague_lat = location.latitude
prague_lng = location.longitude
print(prague_lat, prague_lng)

50.0874654 14.4212535


In [13]:
from functools import partial

locator = Nominatim(user_agent="myGeocoder")
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)
df_Prague['location'] = df_Prague['Postal_Code'].apply(partial(geocode, country_codes="CZ"))

# 3 - create longitude, laatitude and altitude from location column (returns tuple)
df_Prague['point'] = df_Prague['location'].apply(lambda loc: tuple(loc.point) if loc else None)

# 4 - split point column into latitude, longitude and altitude columns
df_Prague[['latitude', 'longitude', 'altitude']] = pd.DataFrame(df_Prague['point'].tolist(), index=df_Prague.index)
df_Prague.head()

Unnamed: 0,Postal_Code,Neighbourhood,Borough,location,point,latitude,longitude,altitude
0,10000,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",Praha 10,"(Strašnice, Hlavní město Praha, Praha, 10000, ...","(50.0725798767135, 14.494589826631387, 0.0)",50.07258,14.49459,0.0
1,10100,"Vršovice, Záběhlice, Michle, Vinohrady",Praha 10,"(Vršovice, Hlavní město Praha, Praha, 10100, Č...","(50.06819336550105, 14.458153832936699, 0.0)",50.068193,14.458154,0.0
2,10200,"Štěrboholy, Hostivař",Praha 10,"(Hostivař, Hlavní město Praha, Praha, 10200, Č...","(50.05660518885992, 14.535479778688595, 0.0)",50.056605,14.53548,0.0
3,10300,"Kolovraty, Lipany, Benice, Nedvězí",Praha 10,"(Kolovraty, Hlavní město Praha, Praha, 10300, ...","(50.012998937391814, 14.62376800076954, 0.0)",50.012999,14.623768,0.0
4,10400,"Hájek, Uhříněves, Královice, Pitkovice, Křeslice",Praha 10,"(Uhříněves, Hlavní město Praha, Praha, 10400, ...","(50.030440782483105, 14.594096428184134, 0.0)",50.030441,14.594096,0.0


In [14]:
df_Prague_location = df_Prague.drop(["location", "point", "altitude"], axis=1).rename(columns={"longitude":"Longitude", "latitude":"Latitude"})
df_Prague_location.head()

Unnamed: 0,Postal_Code,Neighbourhood,Borough,Latitude,Longitude
0,10000,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",Praha 10,50.07258,14.49459
1,10100,"Vršovice, Záběhlice, Michle, Vinohrady",Praha 10,50.068193,14.458154
2,10200,"Štěrboholy, Hostivař",Praha 10,50.056605,14.53548
3,10300,"Kolovraty, Lipany, Benice, Nedvězí",Praha 10,50.012999,14.623768
4,10400,"Hájek, Uhříněves, Královice, Pitkovice, Křeslice",Praha 10,50.030441,14.594096


In [15]:
df_Prague = df_Prague_location.join(df_realities.set_index("Borough"), on="Borough")
df_Prague.dropna(inplace=True)

In [16]:
import folium


prague_map = folium.Map(location=[prague_lat, prague_lng], zoom_start=11)

for lat, lng, label in zip(df_Prague["Latitude"], df_Prague["Longitude"], df_Prague["Neighbourhood"]):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        location=[lat, lng],
        radius=5,
        popup=label,
        color="red",
        fill=True,
        fill_color="pink",
        fill_opacity=0.7).add_to(prague_map)
    
prague_map

In [17]:
# get Foursquare client information
CLIENT_ID = ''
CLIENT_SECRET = '' 
VERSION = '20180605' 
LIMIT = 100

In [18]:
import requests

# define a function that will get venues and their coordinates from Foursquare

def getVenues(names, latitudes, longitudes, radius=500):
    
    venues_list = []
    
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
    
        venues_list.append([(
            name, 
            lat, 
            lng,
            v["venue"]["name"],
            v["venue"]["location"]["lat"],
            v["venue"]["location"]["lng"],
            v["venue"]["categories"][0]["name"]) for v in results])
        
    
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [19]:
# run the function to get Prague neighbourhood venues
prague_venues = getVenues(names=df_Prague["Neighbourhood"], latitudes=df_Prague["Latitude"], longitudes=df_Prague["Longitude"])
prague_venues.head()

Malešice, Vinohrady, Žižkov, Vršovice, Strašnice
Vršovice, Záběhlice, Michle, Vinohrady
Štěrboholy, Hostivař
Kolovraty, Lipany, Benice, Nedvězí
Hájek, Uhříněves, Královice, Pitkovice, Křeslice
Záběhlice
Dubeč
Horní Měcholupy, Petrovice, Dolní Měcholupy
Josefov, Vinohrady, Staré Město, Nové Město
Hradčany, Malá Strana, Holešovice
Hradčany
Vinohrady, Nové Město
Nusle, Vyšehrad, Nové Město
Krč, Michle, Vinohrady, Nusle, Podolí
Záběhlice, Michle, Chodov
Krč, Libuš, Kamýk, Lhotka, Modřany, Braník, Písnice
Kamýk, Komořany, Točná, Cholupice, Modřany
Podolí, Braník, Hodkovičky
Kunratice, Chodov
Malá Strana, Radlice, Košíře, Motol, Smíchov
Hlubočepy, Malá Chuchle
Radotín
Lochkov, Holyně, Slivenec
Řeporyje, Stodůlky, Zadní Kopanina, Třebonice
Sobín, Zličín
Lipence
Zbraslav
Jinonice
Lahovice, Velká Chuchle, Malá Chuchle
Liboc, Ruzyně
Břevnov, Liboc, Střešovice, Veleslavín
Řepy
Přední Kopanina, Nebušice
Lysolaje, Suchdol
Holešovice, Bubeneč
Troja, Libeň
Troja, Žižkov, Libeň
Čimice, Bohnice, Troja


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",50.07258,14.49459,Sara Kebab,50.072194,14.49182,Kebab Restaurant
1,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",50.07258,14.49459,Gutovka,50.071389,14.491144,Athletics & Sports
2,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",50.07258,14.49459,Strašnický park,50.071564,14.494035,Park
3,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",50.07258,14.49459,U Kašpárka,50.069945,14.495971,Pub
4,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",50.07258,14.49459,Beachvolejbal Gutovka,50.070837,14.491167,Volleyball Court


In [20]:
# converts categorical values to int
prague_onehot = pd.get_dummies(prague_venues[["Venue Category"]], prefix="", prefix_sep="")
prague_onehot["Neighbourhood"] = prague_venues["Neighbourhood"]
prague_onehot = prague_onehot[[prague_onehot.columns[-1]] + list(prague_onehot.columns[:-1])]
prague_onehot.head()

Unnamed: 0,Neighbourhood,American Restaurant,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,...,Video Game Store,Vietnamese Restaurant,Vineyard,Volleyball Court,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0


In [21]:
# group the dataframe by neighbourhoods
prague_grouped = prague_onehot.groupby("Neighbourhood").mean().reset_index()
prague_grouped.head()

Unnamed: 0,Neighbourhood,American Restaurant,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,...,Video Game Store,Vietnamese Restaurant,Vineyard,Volleyball Court,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Běchovice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Břevnov, Liboc, Střešovice, Veleslavín",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Dolní Chabry,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Dolní Počernice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Dubeč,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

number_of_clusters = 5
prague_clustering = prague_grouped.drop("Neighbourhood", axis=1)
k_means = KMeans(n_clusters = number_of_clusters, random_state = 4).fit(prague_clustering)
prague_grouped.insert(0, "Cluster Group", k_means.labels_)
df_Prague = df_Prague.join(prague_grouped.set_index("Neighbourhood"), on="Neighbourhood")

In [23]:
df_Prague.dropna(axis=0, inplace=True)
df_Prague["Cluster Group"] = df_Prague["Cluster Group"].astype(int)
df_Prague.head()

Unnamed: 0,Postal_Code,Neighbourhood,Borough,Latitude,Longitude,Average_rent_per_m2,Cluster Group,American Restaurant,Aquarium,Art Gallery,...,Video Game Store,Vietnamese Restaurant,Vineyard,Volleyball Court,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,10000,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",Praha 10,50.07258,14.49459,278.75,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.022727,0.0,0.0,0.022727,0.0,0.0,0.0
1,10100,"Vršovice, Záběhlice, Michle, Vinohrady",Praha 10,50.068193,14.458154,278.75,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0
2,10200,"Štěrboholy, Hostivař",Praha 10,50.056605,14.53548,278.75,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,10300,"Kolovraty, Lipany, Benice, Nedvězí",Praha 10,50.012999,14.623768,278.75,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10400,"Hájek, Uhříněves, Královice, Pitkovice, Křeslice",Praha 10,50.030441,14.594096,278.75,0,0.0,0.0,0.0,...,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
df_Prague = df_Prague.loc[:, (df_Prague != 0).any(axis=0)]

Unnamed: 0,Postal_Code,Neighbourhood,Borough,Latitude,Longitude,Average_rent_per_m2,Cluster Group,American Restaurant,Aquarium,Art Gallery,...,Video Game Store,Vietnamese Restaurant,Vineyard,Volleyball Court,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,10000,"Malešice, Vinohrady, Žižkov, Vršovice, Strašnice",Praha 10,50.07258,14.49459,278.75,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.022727,0.0,0.0,0.022727,0.0,0.0,0.0
1,10100,"Vršovice, Záběhlice, Michle, Vinohrady",Praha 10,50.068193,14.458154,278.75,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0
2,10200,"Štěrboholy, Hostivař",Praha 10,50.056605,14.53548,278.75,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,10300,"Kolovraty, Lipany, Benice, Nedvězí",Praha 10,50.012999,14.623768,278.75,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10400,"Hájek, Uhříněves, Královice, Pitkovice, Křeslice",Praha 10,50.030441,14.594096,278.75,0,0.0,0.0,0.0,...,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,10600,Záběhlice,Praha 10,50.056908,14.498522,278.75,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,10700,Dubeč,Praha 10,50.058778,14.584235,278.75,2,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,10900,"Horní Měcholupy, Petrovice, Dolní Měcholupy",Praha 10,50.041985,14.557427,278.75,1,0.0,0.0,0.0,...,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,11000,"Josefov, Vinohrady, Staré Město, Nové Město",Praha 1,50.085273,14.423279,350.0,0,0.014706,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0
10,11800,"Hradčany, Malá Strana, Holešovice",Praha 1,50.087596,14.401993,350.0,0,0.0,0.0,0.02,...,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01
