# Part II : Extracting Fourquare Data

> Jump to :  
* [Part 1](https://github.com/Niladri-B/Coursera_Captstone/blob/master/wk4/Capstone_part1.ipynb) *Extracting Street Addresses & Coordinates* 
* [Part 3](https://github.com/Niladri-B/Coursera_Captstone/blob/master/wk4/Capstone_part3.ipynb) , *Exploratory Data Analysis*
* [Part 4](https://github.com/Niladri-B/Coursera_Captstone/blob/master/wk4/Capstone_part4.ipynb), *Clustering and Visualising*
* [Part 5](https://github.com/Niladri-B/Coursera_Captstone/blob/master/wk4/Capstone_part5.ipynb), *Conclusion & Discussion*

## 1. Set up environment

In [9]:
import pandas as pd
import numpy as np
import folium
import requests
import re

In [2]:
streetData = pd.read_csv('./streetData_Midcoordinates.csv')

## 2. Basic Folium Map visualisation

In [3]:
streetData.head()

Unnamed: 0,Street,MidLatitude,MidLongitude
0,Charlotte Andersens vei,59.940584,10.696497
1,Heggelibakken,59.938909,10.692733
2,Forskningsveien,59.943733,10.7131
3,Risveien,59.94687,10.70402
4,Sandermosveien,60.019786,10.793857


In [5]:
#1.2.1 Make map with street info
latitude = 59.9133301
longitude = 10.7389701
map_oslo = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, street , in zip(streetData['MidLatitude'], streetData['MidLongitude'],
                                                      streetData['Street']):
    
    #Create pop-up label to display
    label = '{}'.format(street)#neighborhood, borough originally
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=2,#Change radius of circle marker
        popup=label,
        color='blue',
        fill=False,
        #fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_oslo)  
    
map_oslo


## 3. Use Foursquare API to extract various transport information

### 3.1 Find Trikk

#### Set up Foursquare developer credentials, including Client ID, Client Secret and Version (hidden here)

In [6]:
#2.1.1 Set up Foursquare credentials
CLIENT_ID = 'BM0CR1IATYT5MYOT22LOQUBZQIRHP5USSCRMHTUCLXBM0LXK'#'your-client-ID' # your Foursquare ID
CLIENT_SECRET = 'DVJJFZIYHTAZOU0AH5FBIU4X0LYIDSKNMCK3WRYG2U20G4JJ'#'your-client-secret' # your Foursquare Secret
VERSION = '20190605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


Your credentails:
CLIENT_ID: BM0CR1IATYT5MYOT22LOQUBZQIRHP5USSCRMHTUCLXBM0LXK
CLIENT_SECRET:DVJJFZIYHTAZOU0AH5FBIU4X0LYIDSKNMCK3WRYG2U20G4JJ


In [13]:
#FULL VERSION: Trikk for street

def getTrikkNearby(post, bydelLat, bydelLon, radius, query):#Where names = postcode
    
    venues_list=[]#Initialise empty list to store details
    for post, lat, lng in zip(post, bydelLat, bydelLon):
        print(post)

        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}'.format(
                CLIENT_ID,
               CLIENT_SECRET,
               lat,#VITAL to use lat, NOT bydelLat as otherwise get misc info like dtype, and name and that makes a weird URL
               lng,#VITAL: DO NOT USE bydelLat
               VERSION,
               query,
               radius)
        #print(url,'\n')

            
        # make the GET request
        results = requests.get(url).json() ##[#"response"]['groups'][0]['items']
        
        try:
            places = results['response']['venues'][0]
            # return only relevant information for each nearby venue
            venues_list.append([(
                post, #postcode
                #bydel,#I think this is useful to have, several post codes are in the same borough that may have similar char.
                lat, 
                lng, 
                places['name'],
                places['location']['distance'])]) #for v in results])
        except:
                venues_list.append([ (post, lat, lng, 'NA', 'NA')]) #for v in results ])
            
            
    
    #Create new dataframe and fill it with values from the venue_list array with 2 nested for loops
                                                ##Note: placement of 'item' below
    #print(venues_list)
    nearby_venues = pd.DataFrame([item for Venue_list in venues_list for item in Venue_list])
    nearby_venues.columns = ['Street',#'Neighborhood' 
                             
                  'Street Latitude',#'Neighborhood Latitude' 
                  'Street Longitude',#'Neighborhood Longitude' 
                  'Trikk', 
                  'Trikk Distance']#, 
                  #'Venue Longitude', 
                  #'Venue Category']
    
    return(nearby_venues)

#2.1. Run the above code

streetTrikk = getTrikkNearby(post=streetData['Street'],
               bydelLat=streetData['MidLatitude'],
               bydelLon=streetData['MidLongitude'],
               radius = 400,
               query = 'Trikk'
               )

Charlotte Andersens vei
Heggelibakken
Forskningsveien
Risveien
Sandermosveien
Landgangen
Johan Selmers gate
Eugenies gate
Jolly Kramer-Johansens gate
Wilsters vei
Ravnkollbakken
Christoffer Hellums vei
Gladengveien
Garver Ytteborgs vei
Edvard Storms gate
Eystein Torkildsens vei
Halvor Blinderens plass
Enerhaugkleiva
Abildsøfaret
Sunnmørgata
Gunnar Johnsons vei
Blokkaveien
Tvetenveien
Stålfjæra
Guristuveien
Humleveien
Jansbergveien
Jon Smestads vei
Bolette brygge
Rosenkrantz’ gate
Norderhovgata
Munkerudåsen
Havreveien
Kristoffer Aamots gate
Risløkkalléen
Oluf Ryghs gate
Antenneveien
Fagerborggata
Seljeveien
Micheletveien
Tromsøgata
Svingen
Steinbakken
Åsbråtstien
Hauchs gate
Sigurd Johannesens vei
Klemetsrudveien
Fjellhus allé
Meklenborgveien
Olaf Schous vei
Normannsgata
Tjernveien
Flaenveien
Olleveien
Lutvannsveien
Havnehagan
Peder Grøns vei
Hagan terrasse
Cort Adelers gate
Tormods vei
Christian Krohgs gate
Sommerrogata
Skjellveien
Maridalsveien
Priorveien
Askergata
Skøyenbrynet
Skulle

In [14]:
#Check shape of df
streetTrikk.shape

(2460, 5)

In [15]:
#View
streetTrikk.head()

Unnamed: 0,Street,Street Latitude,Street Longitude,Trikk,Trikk Distance
0,Charlotte Andersens vei,59.940584,10.696497,,
1,Heggelibakken,59.938909,10.692733,,
2,Forskningsveien,59.943733,10.7131,Rikshospitalet (trikk),457.0
3,Risveien,59.94687,10.70402,,
4,Sandermosveien,60.019786,10.793857,,


In [66]:
#Save to drive
streetTrikk.to_csv(path_or_buf='./streetTrikk.csv', index = False)

## Step 2: Obtain & clean Bus info

In [48]:
#FULL VERSION; CHANGED CODE
# Extend to all postcodes

busPost = {}
def getBusNearby (post, bydelLat, bydelLon, query, radius, count):
    
    for post, lat, lng in zip(post, bydelLat, bydelLon):
        count= count+1
        #if count in [1896,1897]:
        print('\n',post,'\t',count)

            # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}'.format(
                CLIENT_ID,
               CLIENT_SECRET,
               lat,#VITAL to use lat, NOT bydelLat as otherwise get misc info like dtype, and name and that makes a weird URL
               lng,#VITAL: DO NOT USE bydelLat
               VERSION,
               query,
               radius)
        #print(url)

            
        # make the GET request
        results = requests.get(url).json() ##[#"response"]['groups'][0]['items']
        #print(results)
        
        try:
            places = results['response']['venues']
            print('Number of entries in {} is {}'.format(post,len(places)))
            #print(places)
                
            if len(places) > 0:
                busList = list()
                for i in range(len(places)):# len creates the number, range creates an iterable list because for works on A LIST
                    
                    if re.search('(B|b)uss[the]*?', places[i]['name']):
                        print(places[i]['name'])   
                        #Add info to dictionary, if post does not exist already
                        if post not in busPost:
                            #Add info as tuple of bus route+distance
                            busList.append((places[i]['name'],places[i]['location']['distance']))
                            busPost[post] = busList #First entry
                                
                        #But if post is already in the dict, append to newBus, then append to busList
                        else:
                                #print('Post already exists')
                                #print(places[i]['name'],places[i]['location']['distance'])
                            newBus = places[i]['name'],places[i]['location']['distance']
                                #print(type(newBus))
                            busList.append(newBus)
                    
                    #If the search matches something else than Buss 
                    else:
                            
                            
                        if post not in busPost:
                            #Add info as tuple of bus route+distance
                            busList.append((places[i]['name'],places[i]['location']['distance']))
                            busPost[post] = busList #First entry
                                
                        #But if post is already in the dict, append to newBus, then append to busList
                        else:
                            continue

            #When places length = 0
            else: 
                busPost[post] = '' #'0'                        
            
        except:
        #continue
            busPost[post] = '' #'0'

        
    
#ColnNames
#Postcode 	Bydel/District 	Latitude 	Longitude
getBusNearby(post=streetData['Street'],#[0:2000],#[0:5],#[0:20],
             bydelLat=streetData['MidLatitude'],#[0:2000],#[0:5],#[0:20],
             bydelLon=streetData['MidLongitude'],#[0:2000],#[0:5],#[0:20],
             radius = 400,
             query = 'Bus Stops',
             count = 0
             )   



 Charlotte Andersens vei 	 1
Number of entries in Charlotte Andersens vei is 0

 Heggelibakken 	 2
Number of entries in Heggelibakken is 0

 Forskningsveien 	 3
Number of entries in Forskningsveien is 0

 Risveien 	 4
Number of entries in Risveien is 0

 Sandermosveien 	 5
Number of entries in Sandermosveien is 0

 Landgangen 	 6
Number of entries in Landgangen is 2
Buss 169 til Lierbyen

 Johan Selmers gate 	 7
Number of entries in Johan Selmers gate is 0

 Eugenies gate 	 8
Number of entries in Eugenies gate is 2
Buss 21
46 Bussen

 Jolly Kramer-Johansens gate 	 9
Number of entries in Jolly Kramer-Johansens gate is 2
Buss 33
Rosenhoff (trikk og buss)

 Wilsters vei 	 10
Number of entries in Wilsters vei is 0

 Ravnkollbakken 	 11
Number of entries in Ravnkollbakken is 1
Buss 302

 Christoffer Hellums vei 	 12
Number of entries in Christoffer Hellums vei is 0

 Gladengveien 	 13
Number of entries in Gladengveien is 0

 Garver Ytteborgs vei 	 14
Number of entries in Garver Ytteborgs v

#### Clean the street bus dictionary

In [49]:
len(busPost)

2460

In [50]:
[(k,v) for k,v in busPost.items()]

[('Røatoppen', [('Bussen til Kim', 503)]),
 ('Skjønnhaugveien', ''),
 ('Aslaug Vaas veg', ''),
 ('Egnehjemveien',
  [('Kværnerbyen Buss Stopp', 438), ('Underveis med 34 bussen', 266)]),
 ('Havreveien',
  [('Manglerud Bussholdeplass (Ring 3)', 448), ('Ryen T (Buss)', 406)]),
 ('Christies gate', [('Buss 30', 219)]),
 ('Sigurd Johannesens vei', ''),
 ('Solskinnsveien', ''),
 ('Amund Hellands vei', [('Buss 75A', 355), ('Buss 79', 375)]),
 ('Vidjeveien', ''),
 ('Larsbråtveien', ''),
 ('Kildals vei', ''),
 ('Henrichsens gate', [('Buss 21', 202), ('46-bussen, Ullerntoppen', 209)]),
 ('Vestlisvingen', ''),
 ('Sturlas vei', [('Gressbanen Bussholdeplass', 240)]),
 ('Rundtjernveien', ''),
 ('Vesleveien', [('Buss 65', 122)]),
 ('Gunnar Schjelderups vei', [('Buss #24', 348)]),
 ('Frits Kiærs vei', [('32-bussen mot Kværnerbyen', 274)]),
 ('Karlsrudveien', [('Underveis med 74 Bussen', 237)]),
 ('Lusetjernveien', ''),
 ('Hjemliveien', [('Buss 78B', 324)]),
 ('Planetveien', ''),
 ('Rødkleivfaret', ''),

In [54]:
 #Check that streets have been properly ascribed buses (by checking against some of the ones known to have busses)
    for k,v in busPost.items():
        if k == 'Hausmanns gate':
            print(k,v)

Hausmanns gate [('Buss 111', 117), ('Buss mot Grorud', 228), ('Buss 54', 424), ('Buss 30', 448), ('Buss 31', 408)]


In [55]:
#Make a clean dictionary containing only buses with route numbers on them
## This helps remove some miscellaneous places named like 'Bussola' which is a pizza place actually
cleanBus = dict()    
for k,v in busPost.items():
    #print(v)
    clean =[]
    for tuple in v:
        #print((tuple)[1])#Acces the distance
        string = tuple[0]
        if re.search('\d', string):#If this returns a match
            if k not in cleanBus:#If key does not exist
                clean.append(string)#append to list
                cleanBus[k] = clean#Assign first entry 
            else: #if key already exists
                clean.append(string)#Only append to list
        

In [56]:
print('Length of Clean street Bus dictionary is:', len(cleanBus))

Length of Clean street Bus dictionary is: 1191


In [40]:
#Stats
#Out of 100 streets, 46 have bus within 400m
#Out of 1000 streets, 481 have bus within 400m
#Out of 2460 streets, 1191 have bus within 400m

In [57]:
#Convert to a dataframe
streetBusesClean = pd.DataFrame.from_dict(cleanBus, orient = 'index')
streetBusesClean.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
Nydalen allé,Buss #24,,,,,,,,,,,,,,,,,
Kjelsåsløkka,25-bussen,,,,,,,,,,,,,,,,,
Revierstredet,Buss 83,Buss 542 Til Drøbak,Buss 32,Buss 82E,81B-Bussen,81A-bussen,,,,,,,,,,,,
Tunveien,163-bussen,,,,,,,,,,,,,,,,,
Sarpsborggata,37 Buss Nydalen,,,,,,,,,,,,,,,,,


In [59]:
#Reset index
streetBusesClean.reset_index(inplace = True)

#Change column name
streetBusesClean.columns.values[0] = 'Street'

#Change to list
streetBusesClean.columns = streetBusesClean.columns.tolist()

#Verfiy access to column
streetBusesClean[['Street']].head()

Unnamed: 0,Street
0,Nydalen allé
1,Kjelsåsløkka
2,Revierstredet
3,Tunveien
4,Sarpsborggata


In [60]:
#Save the cleaned up bus data frame locally
streetBusesClean.to_csv(path_or_buf= './streetBusesClean.csv', index = False)

In [62]:
#Merge with Street Trikk dataframe
streetTrikkBussClean = pd.merge(streetTrikk, streetBusesClean, on = 'Street', how = 'left')
streetTrikkBussClean.head()

Unnamed: 0,Street,Street Latitude,Street Longitude,Trikk,Trikk Distance,0,1,2,3,4,...,8,9,10,11,12,13,14,15,16,17
0,Charlotte Andersens vei,59.940584,10.696497,,,,,,,,...,,,,,,,,,,
1,Heggelibakken,59.938909,10.692733,,,,,,,,...,,,,,,,,,,
2,Forskningsveien,59.943733,10.7131,Rikshospitalet (trikk),457.0,,,,,,...,,,,,,,,,,
3,Risveien,59.94687,10.70402,,,,,,,,...,,,,,,,,,,
4,Sandermosveien,60.019786,10.793857,,,,,,,,...,,,,,,,,,,


In [65]:
#Check shape of merged Trikk + Buss
streetTrikkBussClean.shape

(2460, 23)

## Step 3: Obtain T-Bane info

In [68]:
#Extend to run through all postcodes
metroPost = dict()
def getMetroNearby (post, bydelLat, bydelLon, radius, query, count):
    
    for post, lat, lng in zip(post, bydelLat, bydelLon):
        count= count+1
        print('\n',post,'\t',count)

        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}'.format(
                CLIENT_ID,
               CLIENT_SECRET,
               lat,#VITAL to use lat, NOT bydelLat as otherwise get misc info like dtype, and name and that makes a weird URL
               lng,#VITAL: DO NOT USE bydelLat
               VERSION,
               query,
               radius)
        

            
        # make the GET request
        results = requests.get(url).json() ##[#"response"]['groups'][0]['items']
        #print(len(results))#of type Dictionary with 2 keys = 'meta', 'response'
        #print(results ['meta'])
        #print(results ['response'])
        #print(len(results ['response']['venues']))
        
        #Situation 1: GET gives result
        try:
            places = results['response']['venues']
            print('Number of entries in {} is {}'.format(post,len(places)))
            #print(places)
            
            #Situation 2: GET result > 0
            if len(places) > 0:     
                
                
                metroList = list()
                for i in results['response']['venues']:
                    
                    #Situation 2.1: len > 0 + matches Metro
                    if (len(i['categories']) > 0) and (i['categories'][0]['shortName'] == 'Metro'):#'Train Station'
                        
                        #Situation 2.1.1 if post/key does NOT EXIST already
                        if post not in metroPost:
                            metroTuple = i['name'], i['location']['distance']
                            metroList.append(metroTuple)
                            #First assignment
                            metroPost[post] = metroList #post as key from the loop function above
                            
                        #Situation 2.1.2 if post/key EXISTS already
                        elif post in metroPost:
                            metroTuple = i['name'], i['location']['distance']
                            metroList.append(metroTuple)
                            
                        
                    
                    #Situation 2.2: len > 0 + does NOT MATCH Train Station
                    elif (len(i['categories']) > 0) and (i['categories'][0]['shortName'] != 'Metro'):#If the short name is NOT 'Train Station'
                       
                        #Situation 2.2.1 key does NOT exist
                        if post not in metroPost:
                            #First assignment
                            metroPost[post] = metroList #post as key from the loop function above
                            
                        #Situation 2.2.2 key EXISTS
                        elif post in metroPost:
                            continue #Just skip no need to do anything as postcode already exists with a BLANK list OR a list that HAS INFO
                        
            #Situation 2: GET result = 0
            elif len(places) == 0:
                print('<<We have this situation>>')
                metroPost[post] = metroList #'0'

                                
                
        #Situation 1: GET does not give result
        except:
        #continue
            print('We have this situation')
            metroPost[post] = [] #Keeping blank list as value

        
    
    
    
    
getMetroNearby(post=streetData['Street'],         #[0:200],#[0:20],
               
               bydelLat=streetData['MidLatitude'],      #[0:200],#[0:20],
               bydelLon=streetData['MidLongitude'],     #[0:200],#[0:20],
               radius = 400,
               query = 'T-bane',
               count = 0
               )   



 Charlotte Andersens vei 	 1
Number of entries in Charlotte Andersens vei is 1

 Heggelibakken 	 2
Number of entries in Heggelibakken is 2

 Forskningsveien 	 3
Number of entries in Forskningsveien is 13

 Risveien 	 4
Number of entries in Risveien is 7

 Sandermosveien 	 5
Number of entries in Sandermosveien is 1

 Landgangen 	 6
Number of entries in Landgangen is 30

 Johan Selmers gate 	 7
Number of entries in Johan Selmers gate is 30

 Eugenies gate 	 8
Number of entries in Eugenies gate is 30

 Jolly Kramer-Johansens gate 	 9
Number of entries in Jolly Kramer-Johansens gate is 30

 Wilsters vei 	 10
Number of entries in Wilsters vei is 6

 Ravnkollbakken 	 11
Number of entries in Ravnkollbakken is 1

 Christoffer Hellums vei 	 12
Number of entries in Christoffer Hellums vei is 12

 Gladengveien 	 13
Number of entries in Gladengveien is 15

 Garver Ytteborgs vei 	 14
Number of entries in Garver Ytteborgs vei is 3

 Edvard Storms gate 	 15
Number of entries in Edvard Storms gate is

In [69]:
len(metroPost)

2446

In [70]:
#Convert to dictionary
streetMetro = pd.DataFrame.from_dict(metroPost, orient = 'Index')
streetMetro.head()

Unnamed: 0,0,1,2,3
Røatoppen,"(Ekraveien (T), 326)",,,
Skjønnhaugveien,,,,
Aslaug Vaas veg,"(Bjørnsletta (T), 455)",,,
Egnehjemveien,,,,
Havreveien,"(Ryen (T), 386)","(Manglerud (T), 437)","(Bergkrystallen 4 (T), 441)","(Ringen 4 (T), 448)"


In [71]:
#Reset index
streetMetro.reset_index(inplace = True)

#Change coln names from 0,1,2,3 to something bettee
streetMetro.columns = ['Street','T-bane_1','T-bane_2','T-bane_3','T-bane_4']
streetMetro.head()

Unnamed: 0,Street,T-bane_1,T-bane_2,T-bane_3,T-bane_4
0,Røatoppen,"(Ekraveien (T), 326)",,,
1,Skjønnhaugveien,,,,
2,Aslaug Vaas veg,"(Bjørnsletta (T), 455)",,,
3,Egnehjemveien,,,,
4,Havreveien,"(Ryen (T), 386)","(Manglerud (T), 437)","(Bergkrystallen 4 (T), 441)","(Ringen 4 (T), 448)"


In [72]:
#Save to disk
streetMetro.to_csv(path_or_buf='streetMetro.csv', index = False)

In [73]:
#Merge with street Trikk Buss
streetTrikkBussMetro = pd.merge(streetTrikkBussClean, streetMetro, on = 'Street', how = 'left')
streetTrikkBussMetro.head()

Unnamed: 0,Street,Street Latitude,Street Longitude,Trikk,Trikk Distance,0,1,2,3,4,...,12,13,14,15,16,17,T-bane_1,T-bane_2,T-bane_3,T-bane_4
0,Charlotte Andersens vei,59.940584,10.696497,,,,,,,,...,,,,,,,,,,
1,Heggelibakken,59.938909,10.692733,,,,,,,,...,,,,,,,,,,
2,Forskningsveien,59.943733,10.7131,Rikshospitalet (trikk),457.0,,,,,,...,,,,,,,"(Forskningsparken (T), 429)","(Gaustad (T), 276)",,
3,Risveien,59.94687,10.70402,,,,,,,,...,,,,,,,"(Vinderen (T), 461)","(Gaustad (T), 360)","(Ris (T), 142)",
4,Sandermosveien,60.019786,10.793857,,,,,,,,...,,,,,,,,,,


In [74]:
streetTrikkBussMetro.shape

(2460, 27)

## Step 4: Obtain Train info

In [75]:
#Extend to run through all postcodes
trainPost = dict()
def getTrainsNearby (post, bydelLat, bydelLon, radius, query, count):
    
    for post, lat, lng in zip(post, bydelLat, bydelLon):
        count= count+1
        print('\n',post,'\t',count)

        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}'.format(
                CLIENT_ID,
               CLIENT_SECRET,
               lat,#VITAL to use lat, NOT bydelLat as otherwise get misc info like dtype, and name and that makes a weird URL
               lng,#VITAL: DO NOT USE bydelLat
               VERSION,
               query,
               radius)
        

            
        # make the GET request
        results = requests.get(url).json() ##[#"response"]['groups'][0]['items']
        #print(len(results))#of type Dictionary with 2 keys = 'meta', 'response'
        #print(results ['meta'])
        #print(results ['response'])
        #print(len(results ['response']['venues']))
        
        #Situation 1: GET gives result
        try:
            places = results['response']['venues']
            print('Number of entries in {} is {}'.format(post,len(places)))
            print(places)
            
            #Situation 2: GET result > 0
            if len(places) > 0:     
                
                
                trainList = list()
                for i in results['response']['venues']:
                    
                    #Situation 2.1: len > 0 + matches Train Station
                    if (len(i['categories']) > 0) and (i['categories'][0]['shortName'] == 'Train Station'):#'Train Station'
                        
                        #Situation 2.1.1 if post/key does NOT EXIST already
                        if post not in trainPost:
                            trainTuple = i['name'], i['location']['distance']
                            trainList.append(trainTuple)
                            #First assignment
                            trainPost[post] = trainList #post as key from the loop function above
                            
                        #Situation 2.1.2 if post/key EXISTS already
                        elif post in trainPost:
                            trainTuple = i['name'], i['location']['distance']
                            trainList.append(trainTuple)
                            
                        
                    
                    #Situation 2.2: len > 0 + does NOT MATCH Train Station
                    elif (len(i['categories']) > 0) and (i['categories'][0]['shortName'] != 'Train Station'):#If the short name is NOT 'Train Station'
                       
                        #Situation 2.2.1 key does NOT exist
                        if post not in trainPost:
                            #First assignment
                            trainPost[post] = trainList #post as key from the loop function above
                            
                        #Situation 2.2.2 key EXISTS
                        elif post in trainPost:
                            continue #Just skip no need to do anything as postcode already exists with a BLANK list OR a list that HAS INFO
                        
            #Situation 2: GET result = 0
            elif len(places) == 0:
                print('<<We have this situation>>')
                trainPostPost[post] = trainList #'0'

                                
                
        #Situation 1: GET does not give result
        except:
        #continue
            print('We have this situation')
            trainPost[post] = [] #Keeping blank list as value

        
    
    
    
    
getTrainsNearby(post=streetData['Street'],         #[0:200],#[0:20],
               
               bydelLat=streetData['MidLatitude'],      #[0:200],#[0:20],
               bydelLon=streetData['MidLongitude'],     #[0:200],#[0:20],
               radius = 400,
               query = 'Train Station',
               count = 0
               )   



 Charlotte Andersens vei 	 1
Number of entries in Charlotte Andersens vei is 0
[]
<<We have this situation>>
We have this situation

 Heggelibakken 	 2
Number of entries in Heggelibakken is 0
[]
<<We have this situation>>
We have this situation

 Forskningsveien 	 3
Number of entries in Forskningsveien is 0
[]
<<We have this situation>>
We have this situation

 Risveien 	 4
Number of entries in Risveien is 0
[]
<<We have this situation>>
We have this situation

 Sandermosveien 	 5
Number of entries in Sandermosveien is 0
[]
<<We have this situation>>
We have this situation

 Landgangen 	 6
Number of entries in Landgangen is 0
[]
<<We have this situation>>
We have this situation

 Johan Selmers gate 	 7
Number of entries in Johan Selmers gate is 0
[]
<<We have this situation>>
We have this situation

 Eugenies gate 	 8
Number of entries in Eugenies gate is 0
[]
<<We have this situation>>
We have this situation

 Jolly Kramer-Johansens gate 	 9
Number of entries in Jolly Kramer-Johansen

In [2747]:
#sorted( [(k,v) for k,v in trainPost.items()] )

[('7.juni Plassen', ['Nationaltheatret stasjon']),
 ('Aagots vei', []),
 ('Aasmund Vinjes vei', []),
 ('Aasta Hansteens vei', []),
 ('Abbediengen terrasse', []),
 ('Abbediengveien', []),
 ('Abbedikollen', []),
 ('Abbedisvingen', []),
 ('Abildsøfaret', []),
 ('Abildsøveien', []),
 ('Adam Hiorths vei', []),
 ('Admiral Børresens vei', []),
 ('Adventveien', []),
 ('Agathe Grøndahls gate', []),
 ('Agdergata', []),
 ('Agmund Bolts vei', []),
 ('Agronomveien', []),
 ('Akebakkeskogen', []),
 ('Akerlia', []),
 ('Akersbakken', []),
 ('Akersborg terrasse', []),
 ('Akersgata', []),
 ('Akershusstranda', []),
 ('Akersveien', []),
 ('Albert Nordengens plass', []),
 ('Alf Bjerckes vei', []),
 ('Alfaset 1. industrivei', []),
 ('Alfaset 3. industrivei', []),
 ('Alfasetveien', []),
 ('Almeveien', []),
 ('Alnabruveien', []),
 ('Alnafetgata', []),
 ('Alnagata', []),
 ('Alnaparkveien', []),
 ('Alundamveien', []),
 ('Alunsjøveien', []),
 ('Alvheimveien', []),
 ('Amagerveien', []),
 ('Ammerudgrenda', []),
 ('

In [76]:
print('Length of dictionary is {}'.format(len(trainPost)))

Length of dictionary is 2460


In [None]:
#Manually add Nationaltheatret Stasjon to Ruseløkkveien

In [2698]:
#for k,v in trainPost.items():
#    if k == 'Ruseløkkveien':
#        trainPost[k].append('Nationaltheatret stasjon')
#        print(k,v)

#See below for 1. Finding streets within 400m of Ruseløkkveien that has Nationaltheatret stasjon 2. Adding station info to those streets

Ruseløkkveien ['Nationaltheatret stasjon']


### Important: Foursquare API completely misses the Nationaltheatret station

I manually found it to be located on the street named: Ruseløkkveien. This info needs to added.  
Additionally, to _approximately_ find other streets in the 400m vicinity of this station, I will try to find all streets within 400m of Ruseløkkveien and assign the station to those streets as well.

### Try to find streets that are within 400m of Ruseløkkveien

In [77]:
#Import the library that allows one to estimate distance from two geo-coordinate sets.
import geopy.distance

In [78]:
#We will work on a copy of the original Street Data df, just to ensure the original remains intact
streetDataModified = streetData
streetDataModified.head()

Unnamed: 0,Street,MidLatitude,MidLongitude
0,Charlotte Andersens vei,59.940584,10.696497
1,Heggelibakken,59.938909,10.692733
2,Forskningsveien,59.943733,10.7131
3,Risveien,59.94687,10.70402
4,Sandermosveien,60.019786,10.793857


In [79]:
#Find geo-coordinates of Ruseløkkveien
streetDataModified[streetDataModified.Street == 'Ruseløkkveien']#dataframe

Unnamed: 0,Street,MidLatitude,MidLongitude
1597,Ruseløkkveien,59.912344,10.725406


In [80]:
#Try to see how to access the latitude/longitude
streetDataModified[streetDataModified.Street == 'Ruseløkkveien'].iloc[0,2]#This access the longitude

10.725405555555557

In [81]:
#Create a function that will estimate the distances
## Here we will estimate the distance of each street in the dataframe to Ruseløøkveien and output those that are within 400m (and skip Ruseløkkveien)

#Empty list to store streets within 400m
streetsNearby = list()
def distanceFromStreet (dataframe, fullData):
    
    #Load up the coordinates of Ruseløkkveien
    lat = dataframe.iloc[0,1]
    lon = dataframe.iloc[0,2]
    coords_1 = lat,lon
    
    #Find nearby streets
    for i in range(fullData.shape[0]):
        df = fullData.iloc[i:i+1,]
        #print(df)
        lat2 = df.iloc[0,1]
        lon2 = df.iloc[0,2]
        coords_2 = lat2, lon2
        #print(coords_2)
        distance = geopy.distance.distance(coords_1, coords_2).m
        if distance <= 400 and distance > 0:
            print(df, round(distance, 2) )
            streetsNearby.append(df.iloc[0,0])
        
    
    
    #return (coords_1, coords_2)

    

distanceFromStreet(streetDataModified[streetDataModified.Street == 'Ruseløkkveien'], fullData= streetDataModified)

               Street  MidLatitude  MidLongitude
58  Cort Adelers gate     59.91328     10.723113 165.26
            Street  MidLatitude  MidLongitude
154  Bervens løkke      59.9126       10.7202 292.63
         Street  MidLatitude  MidLongitude
254  Grundingen     59.91054      10.72772 239.13
        Street  MidLatitude  MidLongitude
321  Dokkveien      59.9107       10.7286 255.95
         Street  MidLatitude  MidLongitude
542  Bryggegata    59.910271     10.726257 235.82
                Street  MidLatitude  MidLongitude
617  Kronprinsens gate     59.91442       10.7275 259.23
             Street  MidLatitude  MidLongitude
630  Rådhusbrygge 4      59.9104       10.7304 353.57
      Street  MidLatitude  MidLongitude
774  Sjøgata    59.910367       10.7244 227.41
               Street  MidLatitude  MidLongitude
775  Filipstad brygge     59.91005      10.72275 295.67
              Street  MidLatitude  MidLongitude
847  Reichweins gate    59.912243     10.720493 275.09
             Str

In [82]:
#Have a look at streets whose mid points are within 400m from Ruseløkkveien
streetsNearby

['Cort Adelers gate',
 'Bervens løkke',
 'Grundingen',
 'Dokkveien',
 'Bryggegata',
 'Kronprinsens gate',
 'Rådhusbrygge 4',
 'Sjøgata',
 'Filipstad brygge',
 'Reichweins gate',
 'Hansteens gate',
 '7.juni Plassen',
 'Stranden',
 'Løkkegangen',
 'Lassons gate',
 'Arbins gate',
 'Støperigata',
 'Brynjulf Bulls plass',
 'Observatoriegata',
 'Victoria terrasse',
 'Huitfeldts gate',
 'Henrik Ibsens gate',
 'Munkedamsveien',
 'Fjordalléen',
 'Gustav Bloms gate',
 'Bryggetorget',
 'Løkkeveien',
 'Solligata',
 'Beddingen',
 'Holmens gate',
 'Dronning Mauds gate']

In [83]:
#Add info about Nationaltheatret stasjon to above list
for k,v in trainPost.items():
    if k in streetsNearby:
        trainPost[k].append('Nationaltheatret stasjon')
        print(k,v)

Filipstad brygge ['Nationaltheatret stasjon']
Observatoriegata ['Nationaltheatret stasjon']
Reichweins gate ['Nationaltheatret stasjon']
Huitfeldts gate ['Nationaltheatret stasjon']
Løkkegangen ['Nationaltheatret stasjon']
Dronning Mauds gate ['Nationaltheatret stasjon']
Bervens løkke ['Nationaltheatret stasjon']
Victoria terrasse ['Nationaltheatret stasjon']
Hansteens gate ['Nationaltheatret stasjon']
7.juni Plassen ['Nationaltheatret stasjon']
Stranden ['Nationaltheatret stasjon']
Fjordalléen ['Nationaltheatret stasjon']
Holmens gate ['Nationaltheatret stasjon']
Beddingen ['Nationaltheatret stasjon']
Henrik Ibsens gate ['Nationaltheatret stasjon']
Cort Adelers gate ['Nationaltheatret stasjon']
Dokkveien ['Nationaltheatret stasjon']
Støperigata ['Nationaltheatret stasjon']
Bryggetorget ['Nationaltheatret stasjon']
Lassons gate ['Nationaltheatret stasjon']
Brynjulf Bulls plass ['Nationaltheatret stasjon']
Solligata ['Nationaltheatret stasjon']
Bryggegata ['Nationaltheatret stasjon']
Kr

In [84]:
#Convert to the updated dictionary to dataframe
streetTrain = pd.DataFrame.from_dict(trainPost, orient = 'index')
streetTrain.head()

#Reset index
streetTrain.reset_index(inplace = True)

#Change column names
streetTrain.columns = ['Street','Train Station']
streetTrain.head()

Unnamed: 0,Street,Train Station
0,Røatoppen,
1,Skjønnhaugveien,
2,Aslaug Vaas veg,
3,Egnehjemveien,
4,Havreveien,


In [85]:
#Save to local drive
streetTrain.to_csv(path_or_buf= './streetTrain.csv', index = False)

In [86]:
#Merge on full street Trikk, Buss, T-Bane df
streetTrikkBussMetroTog = pd.merge(streetTrikkBussMetro, streetTrain, on = 'Street', how = 'left')
streetTrikkBussMetroTog.shape

(2460, 28)

In [87]:
#View your accomplishment and hardwork!
streetTrikkBussMetroTog.head()

Unnamed: 0,Street,Street Latitude,Street Longitude,Trikk,Trikk Distance,0,1,2,3,4,...,13,14,15,16,17,T-bane_1,T-bane_2,T-bane_3,T-bane_4,Train Station
0,Charlotte Andersens vei,59.940584,10.696497,,,,,,,,...,,,,,,,,,,
1,Heggelibakken,59.938909,10.692733,,,,,,,,...,,,,,,,,,,
2,Forskningsveien,59.943733,10.7131,Rikshospitalet (trikk),457.0,,,,,,...,,,,,,"(Forskningsparken (T), 429)","(Gaustad (T), 276)",,,
3,Risveien,59.94687,10.70402,,,,,,,,...,,,,,,"(Vinderen (T), 461)","(Gaustad (T), 360)","(Ris (T), 142)",,
4,Sandermosveien,60.019786,10.793857,,,,,,,,...,,,,,,,,,,


In [2760]:
#Save to harddrive
streetTrikkBussMetroTog.to_csv(index = False, path_or_buf= './streetData_TrikkBusMetroTog.csv')