This is a sample notebook of cleaning up a city's info and adding lat/long

In [1]:
import pandas as pd
from config import gkey
import gmaps
import numpy as np
import requests

In [2]:
# pulling in San Antonio's data

newfile = pd.read_csv("San Antonio.csv")
SA_df = pd.DataFrame(newfile)
SA_df.head()

Unnamed: 0,School ID,School Name,Percent in Poverty,Google Place ID
0,15907126,FRANKLIN EL,90.63,ChIJZ1nRc5NpXIYRWqa1idCgEJg
1,15907127,GATES EL,97.52,ChIJdyGkXZL2XIYRhh0JiXBbhig
2,15907129,CHARLES GRAEBNER EL,93.24,ChIJdaDOif1YXIYR6sEyEDYLmU4
3,15907131,ROBERT B GREEN EL,88.46,ChIJNy8ZhCD2XIYRm385ZPZbeRM
4,15907132,HERFF EL,96.73,ChIJI5f_Sxf2XIYRhXwvlowTISY


In [3]:
# Check column names to make sure they look good

list(SA_df)

['School ID', ' School Name', ' Percent in Poverty', ' Google Place ID']

In [5]:
# Column names have leading spaces from imperfect csv data, so I'll clean up by changing names

SA_df = SA_df.rename(columns = {' School Name': 'School Name', ' Percent in Poverty': 'Percent in Poverty',
                                       ' Google Place ID': 'Google Place ID'})
list(SA_df)

['School ID', 'School Name', 'Percent in Poverty', 'Google Place ID']

In [8]:
# We'll need to run Google Place IDs through the API to pull lat/long, so change datatype to list of strings

schools = SA_df['Google Place ID']
schools = np.array(schools)
schools = schools.tolist()
schools

['ChIJZ1nRc5NpXIYRWqa1idCgEJg',
 'ChIJdyGkXZL2XIYRhh0JiXBbhig',
 'ChIJdaDOif1YXIYR6sEyEDYLmU4',
 'ChIJNy8ZhCD2XIYRm385ZPZbeRM',
 'ChIJI5f_Sxf2XIYRhXwvlowTISY',
 'ChIJj51ZaKr3XIYRAuYnK-hHuPU',
 'ChIJF6riVz_2XIYRPZWRaCgNyhc',
 'ChIJ3Y6xI41YXIYRJBXfmYKxJQY',
 'ChIJASkEdMz2XIYRmxyuzGvby_4',
 'ChIJ62FceihcXIYRy3JdozZccLU',
 'ChIJ5SvGds1eXIYRh76jP9qVmss',
 'ChIJ7YCUUzH2XIYRS4vwaODq3Vo',
 'ChIJ467TWZb2XIYRUzzj1gZjWZI',
 'ChIJ4x7rDxdZXIYRFSQwOzahEjw',
 'ChIJVafkHiVZXIYRjVS1WuG-p68',
 'ChIJve2sBJf1XIYRW-fSrWUL4ds',
 'ChIJZXlp7uP1XIYRh0hy8owLlHo',
 'ChIJv9c6n5JeXIYRlpPv3U_T_Mc',
 'ChIJs3ZM_TBfXIYRfvU8Yc0vFpc',
 'ChIJm3b3H11eXIYRBxiwPhMMLPs',
 'ChIJwY-chYn2XIYRwkzXvRgBRMY',
 'ChIJH7sZ47pfXIYRzEgoIp3BIMM',
 'ChIJt4CU_NpeXIYRVJLgVn0_z84',
 'ChIJ36TRJMX1XIYRkVsQJfZEruM',
 'ChIJt19osyn2XIYRzFL7NpA1tb8',
 'ChIJn0JIUJ5fXIYRV60SgUejuhI',
 ' ChIJ2c9HkdZYXIYRrM4-NXzSW5Q',
 'ChIJYT7sdm5fXIYRBWuDvf9r-98',
 'ChIJtR6MDQv3XIYRUO9VXG8vRZE',
 'ChIJceDlGWz2XIYRkOEIHC0C9dU']

In [11]:
# One of the strings has a leading space and will break the loop, so cleanup is needed. 
# Pull fourth from last string
schools[-4]

' ChIJ2c9HkdZYXIYRrM4-NXzSW5Q'

In [14]:
# Change value of that string
schools[-4] = 'ChIJ2c9HkdZYXIYRrM4-NXzSW5Q'
schools[-4]

'ChIJ2c9HkdZYXIYRrM4-NXzSW5Q'

In [15]:
# Looks good. Quick re-check to make sure all strings have no extraneous spaces

schools

['ChIJZ1nRc5NpXIYRWqa1idCgEJg',
 'ChIJdyGkXZL2XIYRhh0JiXBbhig',
 'ChIJdaDOif1YXIYR6sEyEDYLmU4',
 'ChIJNy8ZhCD2XIYRm385ZPZbeRM',
 'ChIJI5f_Sxf2XIYRhXwvlowTISY',
 'ChIJj51ZaKr3XIYRAuYnK-hHuPU',
 'ChIJF6riVz_2XIYRPZWRaCgNyhc',
 'ChIJ3Y6xI41YXIYRJBXfmYKxJQY',
 'ChIJASkEdMz2XIYRmxyuzGvby_4',
 'ChIJ62FceihcXIYRy3JdozZccLU',
 'ChIJ5SvGds1eXIYRh76jP9qVmss',
 'ChIJ7YCUUzH2XIYRS4vwaODq3Vo',
 'ChIJ467TWZb2XIYRUzzj1gZjWZI',
 'ChIJ4x7rDxdZXIYRFSQwOzahEjw',
 'ChIJVafkHiVZXIYRjVS1WuG-p68',
 'ChIJve2sBJf1XIYRW-fSrWUL4ds',
 'ChIJZXlp7uP1XIYRh0hy8owLlHo',
 'ChIJv9c6n5JeXIYRlpPv3U_T_Mc',
 'ChIJs3ZM_TBfXIYRfvU8Yc0vFpc',
 'ChIJm3b3H11eXIYRBxiwPhMMLPs',
 'ChIJwY-chYn2XIYRwkzXvRgBRMY',
 'ChIJH7sZ47pfXIYRzEgoIp3BIMM',
 'ChIJt4CU_NpeXIYRVJLgVn0_z84',
 'ChIJ36TRJMX1XIYRkVsQJfZEruM',
 'ChIJt19osyn2XIYRzFL7NpA1tb8',
 'ChIJn0JIUJ5fXIYRV60SgUejuhI',
 'ChIJ2c9HkdZYXIYRrM4-NXzSW5Q',
 'ChIJYT7sdm5fXIYRBWuDvf9r-98',
 'ChIJtR6MDQv3XIYRUO9VXG8vRZE',
 'ChIJceDlGWz2XIYRkOEIHC0C9dU']

In [17]:
# Now that we have clean strings, pull data we need from Google API

base_url = "https://maps.googleapis.com/maps/api/place/details/json?"

In [19]:
# Creating arrays for the new fields
Name = []
Lat = []
Long = []

# Running the schools through API to get our fields and printing school names to check that they're read
# (after checking loop with break to make sure it works)

for school in schools:
    print(school)
    params = {"key": gkey, "place_id": school}
    data = requests.get(base_url, params = params).json()
    Name.append(data['result']['name'])
    Lat.append(data['result']['geometry']['location']['lat'])
    Long.append(data['result']['geometry']['location']['lng'])


ChIJZ1nRc5NpXIYRWqa1idCgEJg
ChIJdyGkXZL2XIYRhh0JiXBbhig
ChIJdaDOif1YXIYR6sEyEDYLmU4
ChIJNy8ZhCD2XIYRm385ZPZbeRM
ChIJI5f_Sxf2XIYRhXwvlowTISY
ChIJj51ZaKr3XIYRAuYnK-hHuPU
ChIJF6riVz_2XIYRPZWRaCgNyhc
ChIJ3Y6xI41YXIYRJBXfmYKxJQY
ChIJASkEdMz2XIYRmxyuzGvby_4
ChIJ62FceihcXIYRy3JdozZccLU
ChIJ5SvGds1eXIYRh76jP9qVmss
ChIJ7YCUUzH2XIYRS4vwaODq3Vo
ChIJ467TWZb2XIYRUzzj1gZjWZI
ChIJ4x7rDxdZXIYRFSQwOzahEjw
ChIJVafkHiVZXIYRjVS1WuG-p68
ChIJve2sBJf1XIYRW-fSrWUL4ds
ChIJZXlp7uP1XIYRh0hy8owLlHo
ChIJv9c6n5JeXIYRlpPv3U_T_Mc
ChIJs3ZM_TBfXIYRfvU8Yc0vFpc
ChIJm3b3H11eXIYRBxiwPhMMLPs
ChIJwY-chYn2XIYRwkzXvRgBRMY
ChIJH7sZ47pfXIYRzEgoIp3BIMM
ChIJt4CU_NpeXIYRVJLgVn0_z84
ChIJ36TRJMX1XIYRkVsQJfZEruM
ChIJt19osyn2XIYRzFL7NpA1tb8
ChIJn0JIUJ5fXIYRV60SgUejuhI
ChIJ2c9HkdZYXIYRrM4-NXzSW5Q
ChIJYT7sdm5fXIYRBWuDvf9r-98
ChIJtR6MDQv3XIYRUO9VXG8vRZE
ChIJceDlGWz2XIYRkOEIHC0C9dU


In [20]:
# Appending our arrays to the dataframe, as well as adding a new column telling us city

SA_df["Location"] = "San Antonio"
SA_df["Name"] = Name
SA_df["Lat"] = Lat
SA_df["Long"] = Long
SA_df.head()

Unnamed: 0,School ID,School Name,Percent in Poverty,Google Place ID,Location,Name,Lat,Long
0,15907126,FRANKLIN EL,90.63,ChIJZ1nRc5NpXIYRWqa1idCgEJg,San Antonio,Kay Franklin Elementary School,29.527388,-98.731561
1,15907127,GATES EL,97.52,ChIJdyGkXZL2XIYRhh0JiXBbhig,San Antonio,Gates Elementary School,29.409923,-98.427857
2,15907129,CHARLES GRAEBNER EL,93.24,ChIJdaDOif1YXIYR6sEyEDYLmU4,San Antonio,Charles Graebner Elementary School,29.384539,-98.528102
3,15907131,ROBERT B GREEN EL,88.46,ChIJNy8ZhCD2XIYRm385ZPZbeRM,San Antonio,Robert B Green Academy,29.399431,-98.48383
4,15907132,HERFF EL,96.73,ChIJI5f_Sxf2XIYRhXwvlowTISY,San Antonio,Herff Elementary School,29.40919,-98.472454


In [21]:
# We don't need School ID field, so drop it

SA_df = SA_df.drop(['School ID'], axis = 1)
SA_df.head()

Unnamed: 0,School Name,Percent in Poverty,Google Place ID,Location,Name,Lat,Long
0,FRANKLIN EL,90.63,ChIJZ1nRc5NpXIYRWqa1idCgEJg,San Antonio,Kay Franklin Elementary School,29.527388,-98.731561
1,GATES EL,97.52,ChIJdyGkXZL2XIYRhh0JiXBbhig,San Antonio,Gates Elementary School,29.409923,-98.427857
2,CHARLES GRAEBNER EL,93.24,ChIJdaDOif1YXIYR6sEyEDYLmU4,San Antonio,Charles Graebner Elementary School,29.384539,-98.528102
3,ROBERT B GREEN EL,88.46,ChIJNy8ZhCD2XIYRm385ZPZbeRM,San Antonio,Robert B Green Academy,29.399431,-98.48383
4,HERFF EL,96.73,ChIJI5f_Sxf2XIYRhXwvlowTISY,San Antonio,Herff Elementary School,29.40919,-98.472454


In [22]:
# Rearrange columns

organized_SA = SA_df[["School Name", "Name", "Location","Percent in Poverty", "Lat", "Long", "Google Place ID"]]
organized_SA.head()

Unnamed: 0,School Name,Name,Location,Percent in Poverty,Lat,Long,Google Place ID
0,FRANKLIN EL,Kay Franklin Elementary School,San Antonio,90.63,29.527388,-98.731561,ChIJZ1nRc5NpXIYRWqa1idCgEJg
1,GATES EL,Gates Elementary School,San Antonio,97.52,29.409923,-98.427857,ChIJdyGkXZL2XIYRhh0JiXBbhig
2,CHARLES GRAEBNER EL,Charles Graebner Elementary School,San Antonio,93.24,29.384539,-98.528102,ChIJdaDOif1YXIYR6sEyEDYLmU4
3,ROBERT B GREEN EL,Robert B Green Academy,San Antonio,88.46,29.399431,-98.48383,ChIJNy8ZhCD2XIYRm385ZPZbeRM
4,HERFF EL,Herff Elementary School,San Antonio,96.73,29.40919,-98.472454,ChIJI5f_Sxf2XIYRhXwvlowTISY


In [24]:
# Google provided the school name (Name field), so drop School Name
# I waited until this point because I wanted to check all names against each other to confirm they're correct

organized_SA = organized_SA.drop(['School Name'], axis = 1)
organized_SA.head()


Unnamed: 0,Name,Location,Percent in Poverty,Lat,Long,Google Place ID
0,Kay Franklin Elementary School,San Antonio,90.63,29.527388,-98.731561,ChIJZ1nRc5NpXIYRWqa1idCgEJg
1,Gates Elementary School,San Antonio,97.52,29.409923,-98.427857,ChIJdyGkXZL2XIYRhh0JiXBbhig
2,Charles Graebner Elementary School,San Antonio,93.24,29.384539,-98.528102,ChIJdaDOif1YXIYR6sEyEDYLmU4
3,Robert B Green Academy,San Antonio,88.46,29.399431,-98.48383,ChIJNy8ZhCD2XIYRm385ZPZbeRM
4,Herff Elementary School,San Antonio,96.73,29.40919,-98.472454,ChIJI5f_Sxf2XIYRhXwvlowTISY


In [25]:
# Rename 'Name' to 'School Name' to match other dataframes for future merge

organized_SA = organized_SA.rename(columns = {'Name': 'School Name'})
organized_SA.head(2)

Unnamed: 0,School Name,Location,Percent in Poverty,Lat,Long,Google Place ID
0,Kay Franklin Elementary School,San Antonio,90.63,29.527388,-98.731561,ChIJZ1nRc5NpXIYRWqa1idCgEJg
1,Gates Elementary School,San Antonio,97.52,29.409923,-98.427857,ChIJdyGkXZL2XIYRhh0JiXBbhig


In [26]:
# Looks good, so export to csv to use later

organized_SA.to_csv("San_Antonio_coords.csv", index = False)