Here I import all the libraries 

In [17]:
import pandas as pd
import numpy as np # library to handle data in a vectorized manner

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

Now I download my own file with regional centers' latitudes and longitudes. 
NB: All the names are in Russian.

In [18]:
df = pd.read_excel('regional_stats.xlsx')
df.reset_index(inplace = True)
df.columns
df.head()

Unnamed: 0,index,Neighbourhood,City,latitude,longitude
0,0,Республика Хакасия,Абакан,53.720976,91.4424
1,1,Чукотский автономный округ 2,Анадырь,64.7337,177.4968
2,2,Чукотский автономный округ,Анадырь,64.7337,177.4968
3,3,Архангельская область,Архангельск,64.539304,40.518735
4,4,Астраханская область,Астрахань,46.3588,48.0599


Here you need to insert your credentials if you want to run the code

In [28]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30

This function searches for the museums in 50000 radius

In [20]:
def museum_search(names, latitudes, longitudes, radius=50000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&categoryId=4bf58dd8d48988d181941735&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            lat, 
            lng, 
            VERSION, 
            radius, 
            LIMIT)
            
        # make the GET request
        v = requests.get(url).json()
        results = v['response']['venues']
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            a['name'], 
            a['location']['lat'], 
            a['location']['lng']) for a in results])

    museums = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    museums.columns = ['City', 
                  'Latitude', 
                  'Longitude', 
                  'Museum', 
                  'Museum Latitude', 
                  'Museum Longitude']
    
    return(museums)

Let's get all the museums:

In [21]:
russian_museums = museum_search(names=df['City'],
                                   latitudes=df['latitude'],
                                   longitudes=df['longitude']
                                  )

Абакан
Анадырь
Анадырь
Архангельск
Астрахань
Барнаул
Белгород
Биробиджан
Благовещенск
Брянск
Владивосток
Владикавказ
Владимир
Волгоград
Вологда
Воронеж
Горно-Алтайск
Грозный
Екатеринбург
Иваново
Ижевск
Иркутск
Йошкар-Ола
Казань
Калининград
Калуга
Кемерово
Киров
Кострома
Краснодар
Красноярск
Курган
Курск
Кызыл
Липецк
Магадан
Магас
Майкоп
Махачкала
Москва
Мурманск
Нальчик
Нарьян-Мар
Нижний Новгород
Великий Новгород
Новосибирск
Омск
Орел
Оренбург
Пенза
Пермь
Петрозаводск
Петропавловск-Камчатский
Псков
Ростов-на-Дону
Рязань
Салехард
Самара
Санкт-Петербург
Саранск
Саратов
Симферополь
Смоленск
Ставрополь
Сыктывкар
Тамбов
Тверь
Томск
Тула
Тюмень
Улан-Удэ
Ульяновск
Уфа
Хабаровск
Ханты-Мансийск
Чебоксары
Челябинск
Черкесск
Чита
Элиста
Южно-Сахалинск
Якутск
Ярославль


Let's drop the parameters we do not need and group our data. Afterwards, I'd like to make clusters. As the clusters are made only based on count(), the cluster and the number of museums correlate. However, it doesn't mean that the 1st cluster would be the least 'museum-rich' region. So let's look at our table and find out, which clusters are the least 'museum-rich' (the 4th one).

In [22]:
my_museums = russian_museums.drop(['Latitude','Longitude','Museum Latitude','Museum Longitude'],axis = 1)
grouped_museums = my_museums.groupby(['City']).count()

kclusters = 5
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_museums)

grouped_museums.insert(0, 'my_labels', kmeans.labels_)
grouped_museums.head()

Unnamed: 0_level_0,my_labels,Museum
City,Unnamed: 1_level_1,Unnamed: 2_level_1
Абакан,1,13
Анадырь,4,4
Архангельск,0,25
Астрахань,2,18
Барнаул,3,30


Now let's merge our data and visualize it.

In [23]:
grouped_museums.reset_index(inplace = True)
regional_clusters = grouped_museums.merge(df[['Neighbourhood', 'City']], on='City')

In [24]:
import folium
import json
import pandas as pd
#61.5240° N, 105.3188° E

latitude = 61.5240
longitude = 105.3188

russian_map = folium.Map(location=[latitude, longitude], zoom_start=3, tiles='OpenStreetMap')

#g = r'sf.geojson'

with open('Regions.json', encoding = 'utf-8') as f:
    data = json.load(f)

df_stats = regional_clusters[['my_labels','Neighbourhood']]

I need to change the format of my data to GeoJson, so the cells below are aimed at this.

In [25]:
help_array = []
for a in data:
    help_array.append(data[a])
help_array[0]
nha = []
a = len(help_array)

for i in range(a):
   nha.append(help_array[i]['0'])

for i in range(a):
    for j in range(len(nha[i])):
        first = nha[i][j][0]
        second = nha[i][j][1]
        nha[i][j][0] = second
        nha[i][j][1] = first
        
my_dict = {}
count = 0
for a in data:
    my_dict[a] = nha[count]
    count+=1
    
my_gd = {
    "type": "FeatureCollection",
    "features": [
    {
        "type": "Feature",
        "geometry" : {
            "type": "Polygon",
            "coordinates": [my_dict[d]],
            },
        "properties" : {"DISTRICT" : d},
     } for d in my_dict]
}

In [26]:
hha = []
for a in data['Чукотский автономный округ']:
    hha.append(data['Чукотский автономный округ'][a])
hha = hha[1:]

for i in range(len(hha)):
    for j in range(len(hha[i])):
        first = hha[i][j][0]
        second = hha[i][j][1]
        if second > 0 :
            hha[i][j][0] = second
            hha[i][j][1] = first
        else: 
            hha[i][j][0] = 360+second
            hha[i][j][1] = first
            
my_gd["features"].append({
        "type": "Feature",
        "geometry" : {
            "type": "MultiPolygon",
            "coordinates": [hha],
            },
        "properties" : {"DISTRICT" : 'Чукотский автономный округ 2'},
     })

Let's finally create a map

In [27]:
import folium

latitude = 64.5240
longitude = 105.3188

m = folium.Map(
    location=[latitude, longitude], zoom_start=2.5, tiles=None
)

m.choropleth(
    geo_data=my_gd,
    data=df_stats,
    columns=['Neighbourhood', 'my_labels'],
    key_on='feature.properties.DISTRICT',
    fill_color='RdYlBu', 
    fill_opacity=0.7, 
    line_opacity=0.4,
    legend_name='Russian map',
    reset=True,
    threshold_scale=[0, 1, 2, 3, 4.1]
)

m

Hooray!