# The Battle of Neighborhoods

This notebook contains an analysis of [smth] in [region]. 

### Getting started with the files

Importing all necessary libraries

In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library
from folium import plugins
import requests # library to handle requests
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import time

Save the coordinates of the centre of Moscow.

In [2]:
moscow_latitude = 55.751244
moscow_longitude = 37.618423
print('The geograpical coordinate of Moscow are {}, {}.'.format(moscow_latitude, moscow_longitude))

The geograpical coordinate of Moscow are 55.751244, 37.618423.


Create a Folium map of Moscow to observe the city.

In [3]:
map_Moscow = folium.Map(location=[moscow_latitude, moscow_longitude], zoom_start=10)
map_Moscow

### Visualising the neighbourhoods

Make a Maps of neighbourhoods.

In [4]:
moscow_geo = 'moscow.geojson'

moscow_map = folium.Map(location=[moscow_latitude, moscow_longitude], zoom_start=9)

fg = folium.FeatureGroup(name='neigbourhoods')
fg.add_child(folium.GeoJson(open(moscow_geo,encoding = "utf-8").read()))
moscow_map.add_child(fg)

minimap = plugins.MiniMap()
moscow_map.add_child(minimap)

moscow_map.save('lol.html')
moscow_map

There are different types of neighbourhoods in Moscow; therefore, I need to analyse the file.

Import json module and read the file.

In [5]:
import json
with open(moscow_geo, 'r', encoding='utf-8') as fhand:
    json_file = fhand.read()
results = json.loads(json_file)

Count the total number of neigbourhoods of every type.

In [6]:
list_mun_dist = list( filter(lambda item: item['properties']['TYPE_MO'] == 'Муниципальный округ', results["features"]) )
print('Total number of municipal districts: ', len(list_mun_dist))

list_settlements = list( filter(lambda item: item['properties']['TYPE_MO'] == 'Поселение', results["features"]) )
print('Total number of settlements: ', len(list_settlements))

list_city_dist = list( filter(lambda item: item['properties']['TYPE_MO'] == 'Городской округ', results["features"]) )
print('Total number of city districts: ', len(list_city_dist))

print('Total number of neighbourhoods:', len(results["features"]))

Total number of municipal districts:  125
Total number of settlements:  19
Total number of city districts:  2
Total number of neighbourhoods: 146


Draw a map and indicate different types of neighbourhood with different colours.

In [7]:
mun_json = '''{"type": "FeatureCollection",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },                                                                              
"features": ''' + json.dumps(list_mun_dist) + '}'
moscow_map = folium.Map(location=[moscow_latitude, moscow_longitude], zoom_start=9)

folium.GeoJson(
    json_file,
    style_function=lambda feature: {
        'fillColor': 'blue' if feature['properties']['TYPE_MO'] == 'Муниципальный округ' else 'green' if
        feature['properties']['TYPE_MO'] == 'Поселение' else 'red',
        'color': 'black',
        'weight': 1
    }
).add_to(moscow_map)

minimap = plugins.MiniMap()
moscow_map.add_child(minimap)

moscow_map.save('lol.html')
moscow_map

### Exctracting data from wikipedia and processing the data.

Excracting the data for analysis. The result will be saved in an excel file for future uses in order not to repeat the calls.

In [55]:
tables = pd.read_html("https://ru.wikipedia.org/wiki/%D0%A1%D0%BF%D0%B8%D1%81%D0%BE%D0%BA_%D1%80%D0%B0%D0%B9%D0%BE%D0%BD%D0%BE%D0%B2_%D0%B8_%D0%BF%D0%BE%D1%81%D0%B5%D0%BB%D0%B5%D0%BD%D0%B8%D0%B9_%D0%9C%D0%BE%D1%81%D0%BA%D0%B2%D1%8B")
df = tables[0]
df.head()

Unnamed: 0,№,Флаг,Герб,Название района[2]/поселения[3][4],Название cоответствующего внутригородского муниципального образования: муниципального округа / поселения / городского округа[5],Адми-нистра-тивныйокруг,"Пло-щадь,[6][7]км²","Насе-ление2019[8],чел.","Плот-ностьнасе-ления2019,чел. / км²","Пло-щадьжилого фонда(01.01.2010)[9],тыс. м²","Жил-площадьначело-века(01.01.2010),м²/чел."
0,1,,,Академический,Академический,ЮЗАО,583,↗109 387,18762.78,24670.0,227.0
1,2,,,Алексеевский,Алексеевский,СВАО,529,↗80 534,15223.82,16079.0,205.0
2,3,,,Алтуфьевский,Алтуфьевский,СВАО,325,↗57 596,17721.85,8393.0,155.0
3,4,,,Арбат,Арбат,ЦАО,211,↗36 125,17120.85,7310.0,260.0
4,5,,,Аэропорт,Аэропорт,САО,458,↗79 486,17355.02,19397.0,259.0


Drop unnecessary data like Flag, District, Administrative District, Living space per person and rename columns.

In [56]:
df.drop(['№', 'Флаг', 'Герб', 'Название района[2]/поселения[3][4]', 'Адми-нистра-тивныйокруг', 'Пло-щадьжилого фонда(01.01.2010)[9],тыс. м²', 'Жил-площадьначело-века(01.01.2010),м²/чел.'], axis=1, inplace = True)
df.columns = ['Neighborhood', 'Area', 'Population', 'Population Density']
df.head()

Unnamed: 0,Neighborhood,Area,Population,Population Density
0,Академический,583,↗109 387,18762.78
1,Алексеевский,529,↗80 534,15223.82
2,Алтуфьевский,325,↗57 596,17721.85
3,Арбат,211,↗36 125,17120.85
4,Аэропорт,458,↗79 486,17355.02


Edit Population column to number data.

In [57]:
df.Population = df.Population.apply(lambda x: int(''.join(x[1:].split())), 'columns')
df.head()

Unnamed: 0,Neighborhood,Area,Population,Population Density
0,Академический,583,109387,18762.78
1,Алексеевский,529,80534,15223.82
2,Алтуфьевский,325,57596,17721.85
3,Арбат,211,36125,17120.85
4,Аэропорт,458,79486,17355.02


Save information about the type of neighbourhood in a new column 'Type' and remove it from the 'Neighborhood' column

In [58]:
df['Type'] = np.nan
df.tail()

Unnamed: 0,Neighborhood,Area,Population,Population Density,Type
141,"Сосенское, поселение",6707,30651,457.0,
142,"Троицк, городской округ",1633,61079,3740.29,
143,"Филимонковское, поселение",3577,7026,196.42,
144,"Щаповское, поселение",8606,9572,111.22,
145,"Щербинка, городской округ",762,53281,6992.26,


In [59]:
df.Type = df.Neighborhood.apply(lambda x: x.split(', ')[1] if  ',' in x else 'район')
df.Neighborhood = df.Neighborhood.apply(lambda x: x.split(', ')[0] if  ',' in x else x)
df.tail()

Unnamed: 0,Neighborhood,Area,Population,Population Density,Type
141,Сосенское,6707,30651,457.0,поселение
142,Троицк,1633,61079,3740.29,городской округ
143,Филимонковское,3577,7026,196.42,поселение
144,Щаповское,8606,9572,111.22,поселение
145,Щербинка,762,53281,6992.26,городской округ


### Determine the centre of every neighbourhood.

A file with coordinates of centres of every neighbourhood was not found; therefore, it's needed to make one.

Create two new columns for every neighbourhood.

In [67]:
df['latitude'] = np.nan
df['longitude'] = np.nan
df.head()

Unnamed: 0,Neighborhood,Area,Population,Population Density,Type,latitude,longitude
0,Академический,583,109387,18762.78,район,,
1,Алексеевский,529,80534,15223.82,район,,
2,Алтуфьевский,325,57596,17721.85,район,,
3,Арбат,211,36125,17120.85,район,,
4,Аэропорт,458,79486,17355.02,район,,


Get the coordinates of centres of every neighbourhood using Nominatim

In [88]:
geolocator = Nominatim(user_agent="ny_explorer")
for index, row in df.iterrows():
    # in case Nominatim ended the session
    if np.isnan(df.loc[index, 'latitude']):
        #there is a difference in addresses, so have to modify the search
        if df.loc[index, 'Type'] == 'район' and df.loc[index, 'Neighborhood'][-1] == 'й':
            address = '{} район, Москва, Россия'.format(df.loc[index, 'Neighborhood'])
        else:
            address = '{} {}, Москва, Россия'.format(df.loc[index, 'Type'], df.loc[index, 'Neighborhood'])
        location = geolocator.geocode(address)

        if location != None:
            print(address, location.latitude, location.longitude)
            df.loc[index, 'latitude'] = location.latitude
            df.loc[index, 'longitude'] = location.longitude
        else:
            print('NONE!', address)
        time.sleep(1.5)

Московский район, Москва, Россия 55.6202807 37.7211499
поселение Мосрентген, Москва, Россия 55.61872655 37.469286079893024
поселение Новофёдоровское, Москва, Россия 55.43792585 36.99971817597381
поселение Первомайское, Москва, Россия 55.50190525 37.21279504750347
поселение Роговское, Москва, Россия 55.2290933 37.05519465465914
поселение Рязановское, Москва, Россия 55.478723099999996 37.505779026145376
поселение Сосенское, Москва, Россия 55.5740204 37.45658104304032
городской округ Троицк, Москва, Россия 55.479412249999996 37.29803052194669
поселение Филимонковское, Москва, Россия 55.56884855 37.30956742236274
поселение Щаповское, Москва, Россия 55.384577699999994 37.38527174994181
городской округ Щербинка, Москва, Россия 55.504821750000005 37.56051832537304


In [90]:
df.head()

Unnamed: 0,Neighborhood,Area,Population,Population Density,Type,latitude,longitude
0,Академический,583,109387,18762.78,район,55.689359,37.577971
1,Алексеевский,529,80534,15223.82,район,55.811044,37.648999
2,Алтуфьевский,325,57596,17721.85,район,55.878695,37.58677
3,Арбат,211,36125,17120.85,район,55.751199,37.589872
4,Аэропорт,458,79486,17355.02,район,55.800504,37.543864


Fix addresses for these neighbourhoods.

In [95]:
address = 'район Северный, Москва, Россия'
location = geolocator.geocode(address)
df.loc[df['Neighborhood'] == 'Северный', 'latitude'] = location.latitude
df.loc[df['Neighborhood'] == 'Северный', 'longitude'] = location.longitude
if location != None:
    print(address, location.latitude, location.longitude)
    df.loc[index, 'latitude'] = location.latitude
    df.loc[index, 'longitude'] = location.longitude
else:
    print('NONE!', address)
time.sleep(1.5)

address = 'район Восточный, Москва, Россия'
location = geolocator.geocode(address)
df.loc[df['Neighborhood'] == 'Восточный', 'latitude'] = location.latitude
df.loc[df['Neighborhood'] == 'Восточный', 'longitude'] = location.longitude
if location != None:
    print(address, location.latitude, location.longitude)
    df.loc[index, 'latitude'] = location.latitude
    df.loc[index, 'longitude'] = location.longitude
else:
    print('NONE!', address)
time.sleep(1.5)

район Северный, Москва, Россия 55.932539750000004 37.5430355899533
район Восточный, Москва, Россия 55.81336965 37.86258439674499


Save the result in an excel file for future uses in order not to repeat the calls.

In [103]:
df.to_excel('Moscow neighbourhoods.xlsx')

In [108]:
df = pd.read_excel('Moscow neighbourhoods.xlsx')
df.drop(columns = 'Unnamed: 0', inplace = True)
df.head()

Unnamed: 0,Neighborhood,Area,Population,Population Density,Type,latitude,longitude
0,Академический,583,109387,18762.78,район,55.689359,37.577971
1,Алексеевский,529,80534,15223.82,район,55.811044,37.648999
2,Алтуфьевский,325,57596,17721.85,район,55.878695,37.58677
3,Арбат,211,36125,17120.85,район,55.751199,37.589872
4,Аэропорт,458,79486,17355.02,район,55.800504,37.543864


The dataframe is now ready to be used.

### Making a choropleth map of neighborhoods ranging by Population Density

Create a choropleth map of neighborhoods in Moscow ranging by Population Density to check that choropleth works and the centres are correct.

In [107]:
moscow_map = folium.Map(location=[moscow_latitude, moscow_longitude], zoom_start=9, control_scale = True)

#feature_group = folium.FeatureGroup(name = 'Population Density (hum/km^2)').add_to(moscow_map)

choropleth = folium.Choropleth(
    name = "Population Density (hum/km^2)",
    geo_data = json_file,
    data = df,
    columns = ['Neighborhood', 'Population Density'],
    key_on = 'feature.properties.NAME',
    fill_color = 'YlOrRd',
    fill_opacity = 0.6,
    line_opacity  =0.2,
    legend_name = 'Population Density (hum/km^2)',
    highlight = True,
    nan_fill_color = 'purple',
    nan_fill_opacity = 0.4,
    line_color = 'black'
).add_to(moscow_map)

choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(['NAME'], labels = False, sticky = True)
)

for index, row in df.iterrows():
    latitude = df.loc[index, 'latitude']
    longitude = df.loc[index, 'longitude']
    folium.Marker([latitude, longitude]).add_to(moscow_map)


folium.TileLayer('cartodbpositron', overlay = True ,name = "Light Mode").add_to(moscow_map)
folium.LayerControl(collapsed=True).add_to(moscow_map)

moscow_map.save('lol.html')
moscow_map

In [17]:
import vincent

scatter_points = {
    'x': np.random.uniform(size=(100,)),
    'y': np.random.uniform(size=(100,)),
}

# Let's create the vincent chart.
scatter_chart = vincent.Scatter(scatter_points,
                                iter_idx='x',
                                width=600,
                                height=300)

# Let's convert it to JSON.
scatter_json = scatter_chart.to_json()

# Let's convert it to dict.
scatter_dict = json.loads(scatter_json)

In [18]:
df = pd.DataFrame(data=[['apple', 'oranges'], ['other', 'stuff']], columns=['cats', 'dogs'])

m = folium.Map([43, -100], zoom_start=4)

html = df.to_html(classes='table table-striped table-hover table-condensed table-responsive')

popup = folium.Popup(html)

folium.Marker([30, -100], popup=popup).add_to(m)

m

Get statistics about the virus

In [None]:
url = 'https://coronavirus-online.moscow/sluchai-koronavirusa-v-moskve/'
tables = pd.read_html(url)
df_virus = tables[0]
df_virus.head()

Remove the unnecessary rows and columns and rename columns.

In [None]:
df_virus.dropna(inplace = True)
df_virus.columns = ['address', 'building', 'date']
df_virus['address'] = df_virus['building'] + ', ' + df_virus['address']
df_virus.drop(columns = ['date', 'building'], inplace = True)
df_virus.head()

Create two empty columns.

In [None]:
df_virus['latitude'] = np.nan
df_virus['longitude'] = np.nan
df_virus.head()

Get the location of every address.

In [None]:
'''nones = []
geolocator = Nominatim(user_agent="ny_explorer")
for index, row in df_virus.iterrows():
    if np.isnan(df_virus.loc[index, 'latitude']):
        address = df_virus.loc[index, 'address']
        location = geolocator.geocode(address)
        if location != None:
            print(address, location.latitude, location.longitude)
            df_virus.loc[index, 'latitude'] = location.latitude
            df_virus.loc[index, 'longitude'] = location.longitude
        else:
            print('NONE!', address)
            nones.append(address)
        time.sleep(1.5)
df_virus.head(5)  '''

In [None]:
df_virus

Draw a map with all the addresses.

In [None]:
what_have = df_virus.dropna()

In [None]:
#for index, row in what_have.iterrows():
#   print(row)

In [None]:
moscow_map = folium.Map(location=[moscow_latitude, moscow_longitude], zoom_start=9)

for index, row in what_have.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']
    folium.Circle(
        location=[latitude, longitude],
        radius=float(30),
        color='crimson',
        fill=True,
        fill_color='red'
       ).add_to(moscow_map)

moscow_map

In [None]:
#Let's create a Figure, with a map inside.
f = branca.element.Figure()
folium.Map([-25, 150], zoom_start=3).add_to(f)

# Let's put the figure into an IFrame.
iframe = branca.element.IFrame(width=500, height=300)
f.add_to(iframe)

# Let's put the IFrame in a Popup
popup = folium.Popup(iframe, max_width=2650)

# Let's create another map.
m = folium.Map([43, -100], zoom_start=4)

# Let's put the Popup on a marker, in the second map.
folium.Marker([30, -100], popup=popup).add_to(m)

# We get a map in a Popup. Not really useful, but powerful.

m

Another way to do choropleth maps

In [None]:
from branca.colormap import linear

colormap = linear.YlGn_09.scale(
    unemployment.Unemployment.min(),
    unemployment.Unemployment.max())

print(colormap(5.0))

unemployment_dict = unemployment.set_index('State')['Unemployment']

unemployment_dict['AL']

color_dict = {key: colormap(unemployment_dict[key]) for key in unemployment_dict.keys()}

m = folium.Map([43, -100], zoom_start=4)

folium.GeoJson(
    geo_json_data,
    style_function=lambda feature: {
        'fillColor': color_dict[feature['id']],
        'color': 'black',
        'weight': 1,
        'dashArray': '5, 5',
        'fillOpacity': 0.9,
    }
).add_to(m)

m.save(os.path.join('results', 'GeoJSON_and_choropleth_6.html'))

m