In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2020.12.5          |   py36h5fab9bb_1         143 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-2.1.0                |     pyhd3deb0d_0          64 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         240 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-2.1.0-pyhd3deb0d_0

The following packages will be UPDATED:

  certifi                          2020.12.5-py36h5fab9bb_0 --> 202

<h2>Load & Clean Data from list of StartUps</h2>

In [2]:
#read csv file
df = pd.read_csv("it-startups.csv")

#removing rows with Post Code 'Not assigned'
df.dropna(subset = ["Post Code"], inplace=True)
df

#drop columns with data not required
df.drop(df.columns[[0, 3]], axis = 1, inplace = True)
df.reset_index(drop=True, inplace = True)
df


Unnamed: 0,Name,Post Code,Description
0,BULB,EC2M 3TQ,"The UK’s biggest green supplier, Bulb supplies..."
1,CITY PANTRY,EC4M 7RF,Believing food is the source of happiness (bec...
2,PERKBOX,EC1R 5EJ,"Perkbox enriches the employee experience, by p..."
3,AIRSORTED,EC1M 4NL,"Taking the hassle out of hosting, Airsorted is..."
4,ZOPA,SE1 2QG,Say goodbye to time-consuming middlemen with Z...
5,STREETBEES,WC2H 7DQ,Streetbees’ global intelligence platform enabl...
6,PAPIER,EC4M 7AN,Papier is kicking it old school with beautiful...
7,TRIPTEASE,EC1M 7AD,A company looking to preserve the hotel indust...
8,IMPROBABLE,WC2H 7DT,"Building, quite literally, whole new worlds us..."
9,APPEAR HERE,EC1R 5DW,As the leading online marketplace for short-te...


<h2>Add missing information to dataset</h2>

In [3]:

#Get latitude and longitude information for each startup
postcodes = df["Post Code"]
postcodes

lat=[]
long=[]

for p in postcodes:
    url = "http://api.getthedata.com/postcode/"+p
    #print (url)
    
    response = requests.get(url)
    #print (response.text)

    json_data = json.loads(response.text)

    #print(json_data["data"]["latitude"])
    #print(json_data["data"]["longitude"])
    
    lat.append(json_data["data"]["latitude"])
    long.append(json_data["data"]["longitude"])
    

#Add latitude and longitude information to dataframe
df['Latitude']= lat
df['Longitude'] = long

df["Latitude"] = pd.to_numeric(df["Latitude"])
df["Longitude"] = pd.to_numeric(df["Longitude"])

df
    

Unnamed: 0,Name,Post Code,Description,Latitude,Longitude
0,BULB,EC2M 3TQ,"The UK’s biggest green supplier, Bulb supplies...",51.518768,-0.080096
1,CITY PANTRY,EC4M 7RF,Believing food is the source of happiness (bec...,51.516445,-0.103125
2,PERKBOX,EC1R 5EJ,"Perkbox enriches the employee experience, by p...",51.522667,-0.108661
3,AIRSORTED,EC1M 4NL,"Taking the hassle out of hosting, Airsorted is...",51.521737,-0.103297
4,ZOPA,SE1 2QG,Say goodbye to time-consuming middlemen with Z...,51.506059,-0.084812
5,STREETBEES,WC2H 7DQ,Streetbees’ global intelligence platform enabl...,51.50908,-0.131128
6,PAPIER,EC4M 7AN,Papier is kicking it old school with beautiful...,51.515715,-0.10238
7,TRIPTEASE,EC1M 7AD,A company looking to preserve the hotel indust...,51.523126,-0.097949
8,IMPROBABLE,WC2H 7DT,"Building, quite literally, whole new worlds us...",51.509825,-0.131491
9,APPEAR HERE,EC1R 5DW,As the leading online marketplace for short-te...,51.522437,-0.111093


In [4]:
#Get the latitude and longitude data for London
from geopy.geocoders import Nominatim 
address = 'London, United Kingdom'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinate of London are {}, {}.'.format(latitude, longitude))

The geographical coordinate of London are 51.5073219, -0.1276474.


In [10]:
import folium

# create map of Toronto using latitude and longitude values
map_london = folium.Map(location=[latitude, longitude], tiles='Stamen Terrain', zoom_start=14)

# add markers to map
for Latitude, Longitude, Name, Description in zip(df['Latitude'], df['Longitude'], df['Name'], df['Description']):
    label = '{}, {}'.format(Name, Description)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [Latitude, Longitude],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_london)  
    
map_london

<h2>Using Foursquare to explore neighbourhoods</h2>

In [7]:
#Setting the credentials to use Four Square
CLIENT_ID = 'RNJ0DDXZB2L4SWVE4SQB4GDO1CYQKQTIXDK1ACQ2F4W3MQJP' # your Foursquare ID
CLIENT_SECRET = 'XZ0KMUQHZN3YIRY4UL1OU3JFXIP5F1LH2NYG0XKKAHOY0BVA'
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RNJ0DDXZB2L4SWVE4SQB4GDO1CYQKQTIXDK1ACQ2F4W3MQJP
CLIENT_SECRET:XZ0KMUQHZN3YIRY4UL1OU3JFXIP5F1LH2NYG0XKKAHOY0BVA


In [None]:
#getting all venues mentioning Primary school within a radius of 500 from each start up
search_query = 'Primary School'
radius = 500

v =[]

columns = ['name', 'location.lat','location.lng']
all_venues = pd.DataFrame(columns=columns)

#getting the latitude and longitude of every start up
lat = df["Latitude"]

for l in lat:
    
    latitude = l
    url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
    results = requests.get(url).json()

    # assign relevant part of JSON to venues
    venues = results['response']['venues']

    # tranform venues into a dataframe
    dataframe = pd.json_normalize(venues)
    df1 = dataframe[['name','location.lat','location.lng']]

    v.append(df1)

all_venues = pd.concat(v)


all_venues




In [46]:
#reset index
all_venues.reset_index(drop=True, inplace=True)


names = all_venues['name']
names.drop_duplicates(keep = False, inplace = True) 



primary_schools =[]

for n in names:
    if ("Primary School") in n:
        #print (n)
        lat = all_venues.loc[all_venues['name'] == n,'location.lat'].item()
        lng = all_venues.loc[all_venues['name'] == n,'location.lng'].item()
        #print(lat)
        df2 = [n,lat,lng]
        primary_schools.append(df2)
    

columns = ['name', 'location.lat','location.lng']
schools = pd.DataFrame(columns=columns)

schools = pd.DataFrame(primary_schools)
schools.columns = ['name','lat', 'long']

schools



#schools


        
        
    





Unnamed: 0,name,lat,long
0,Coleridge Primary School,51.575719,-0.127154
1,Brecknock Primary School,51.547299,-0.12739
2,Robert Blair Primary School,51.544872,-0.121724
3,Hungerford Primary School,51.548749,-0.126782
4,Woodmansterne Primary School,51.41844,-0.135864
5,Millbank Primary School,51.492023,-0.130362
6,St John's Upper Holloway CE Primary School,51.562584,-0.132689
7,Yerbury Primary School,51.561164,-0.128212
8,Ashmount Primary School,51.560739,-0.124368
9,Upper Holloway Primary School,51.562326,-0.132686


In [54]:
#drawing map with startups and primary school
import folium

# create map of London using latitude and longitude values
map_london = folium.Map(location=[latitude, longitude], tiles='Stamen Terrain', zoom_start=10)

# add markers to map for startups
for Latitude, Longitude, Name, Description in zip(df['Latitude'], df['Longitude'], df['Name'], df['Description']):
    label = '{}, {}'.format(Name, Description)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [Latitude, Longitude],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_london)  
    
# add markers to map for Primary schools
for Name, Longitude, Latitude, in zip(schools['name'], schools['long'], schools['lat']):
    label = '{}'.format(Name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [Latitude, Longitude],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_london)  
    
map_london