#This notebook loads the raw open space dataset and find the exact location of these open spaces at a best effort.

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
open_space = '/content/gdrive/My Drive/Colab Notebooks/CS506FinalProject/Open_Space.csv'

In [None]:
import pandas as pd
import re
import requests
import urllib.parse
import json
import ast
import geopy
from geopy.geocoders import Nominatim

# Load and preprocess the raw open space dataset.

In [None]:
osdf = pd.read_csv(open_space)
osdf = osdf[['SITE_NAME', 'DISTRICT', 'ADDRESS']]
osdf = osdf.rename(columns={'SITE_NAME': 'name'})
osdf['type'] = 'open space'
osdf.head(10)

Unnamed: 0,name,DISTRICT,ADDRESS,type
0,Wilson Park,Allston-Brighton,Off 1686 Commonwealth Ave.,open space
1,Nickerson Field,Allston-Brighton,,open space
2,Hooker-Sorrento Street Playground,Allston-Brighton,120 Hooker Str.,open space
3,Smith Playground,Allston-Brighton,235 Western Ave.,open space
4,Fern Square,Allston-Brighton,Opposite 101 Franklin Str.,open space
5,Ringer Playground,Allston-Brighton,,open space
6,Kennedy Rock,Allston-Brighton,,open space
7,McKinney Playground,Allston-Brighton,61-74 Faneuil Str.,open space
8,Hobart Park,Allston-Brighton,Opposite 18 Ranlegh Rd.,open space
9,Market Street Burying Ground,Allston-Brighton,Opposite 381 Market Str.,open space


In [None]:
all_dis = set(osdf['DISTRICT'].to_list())
print(all_dis)

{'West Roxbury', 'Mattapan', 'Dorchester', 'East Boston', 'Mission Hill', 'Jamaica Plain', 'Back Bay/Beacon Hill', 'Hyde Park', 'North Dorchester', 'Multi-District', 'Harbor Islands', 'Roslindale', 'Fenway/Kenmore', 'Central Boston', 'Charlestown', 'Allston-Brighton', 'Roxbury', 'South End', 'South Boston'}


# Use Nominatim to find the locations of the open space.
More than a half are successfully found. The others are not found due to lack of infomation in the raw dataset. Some open spaces in the raw dataset have wrong names that are not officially registered on an online map, and some of them cannot be located even by mannually searching on the internet. Some are simply not included in the Nominatim dataset (which is a free online map dataset).

In [None]:
def get_location(geolocator, name, district):
  address = name + ', ' + district
  location = geolocator.geocode(address)
  if location is None:
    address = name + ', ' + 'Boston'
    location = geolocator.geocode(address)
    if location is None:
      print(address)
      return None
    lat = float(location.raw['lat'])
    lon = float(location.raw['lon'])
    return [lat, lon]

  lat = float(location.raw['lat'])
  lon = float(location.raw['lon'])
  return [lat, lon]

Open spaces that are not successfully found are printed below.

In [None]:
geolocator = Nominatim(user_agent='test')
osdf[['info']] = osdf.apply(lambda row: get_location(geolocator, row['name'], row['DISTRICT']), axis=1)

Hooker-Sorrento Street Playground, Boston
Kennedy Rock, Boston
Market Street Burying Ground, Boston
Reilly Playground, Boston
The Cenacles, Boston
Turnpike Overlook, Boston
Foster Street Rock, Boston
Boston College Athletic Fields, Boston
Dorchester North Burying Ground, Boston
The Humps, Boston
Phillips Street Play Area, Boston
Clarendon Street Playlot, Boston
Foster Street Play Area, Boston
Cardinal Cushing Park I, Boston
Langone Park, Boston
Phipps Street Burying Ground, Boston
East Boston Memorial Park I, Boston
Cuneo Park, Boston
Joe Ciampa Garden, Boston
Eagle Hill Memorial Park Garden, Boston
Fenway Park Field, Boston
Wentworth Field, Boston
Connell Fields/Hickey Courts, Boston
Weider Park, Boston
Moynihan Playground, Boston
Iacono/Readville Playground, Boston
Jeremiah Hurley Memorial Park, Boston
Stony Brook Recreation Complex, Boston
Dana Avenue Urban Wild I, Boston
Oak Lawn Golf Range, Boston
Paul Gore/Beecher Street Garden, Boston
English H.S. Athletic Fields, Boston
Helleni

# Do more preprocessing and save the result.

In [None]:
osdf = osdf.drop(columns=['ADDRESS'])
osdf = osdf.dropna()
osdf[['lat', 'lon']] = pd.DataFrame(osdf['info'].tolist(), index = osdf.index)
osdf = osdf.drop(columns=['info'])
osdf.head()

Unnamed: 0,name,DISTRICT,type,lat,lon
0,Wilson Park,Allston-Brighton,open space,42.340985,-71.14593
1,Nickerson Field,Allston-Brighton,open space,42.353369,-71.119283
3,Smith Playground,Allston-Brighton,open space,42.364359,-71.131411
4,Fern Square,Allston-Brighton,open space,41.211271,-81.496505
5,Ringer Playground,Allston-Brighton,open space,42.350446,-71.138224


In [None]:
osdf.to_csv('/content/gdrive/My Drive/Colab Notebooks/CS506FinalProject/Open_Space_processed.csv')

In [None]:
osdf = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/CS506FinalProject/Open_Space_processed.csv')
print(osdf.shape)

(654, 6)


In [None]:
osdf = osdf.rename(columns={'DISTRICT': 'address'})
osdf.insert(2, 'zip', None)
osdf = osdf[['name', 'type', 'address', 'zip', 'lat', 'lon']]
osdf.head(10)

Unnamed: 0,name,type,address,zip,lat,lon
0,Wilson Park,open space,Allston-Brighton,,42.340985,-71.14593
1,Nickerson Field,open space,Allston-Brighton,,42.353369,-71.119283
2,Smith Playground,open space,Allston-Brighton,,42.364359,-71.131411
3,Fern Square,open space,Allston-Brighton,,41.211271,-81.496505
4,Ringer Playground,open space,Allston-Brighton,,42.350446,-71.138224
5,McKinney Playground,open space,Allston-Brighton,,42.354751,-71.152922
6,Hobart Park,open space,Allston-Brighton,,42.355735,-71.16127
7,Jackson Square,open space,Allston-Brighton,,42.322596,-71.100257
8,Boyden Park,open space,Allston-Brighton,,42.339545,-71.166317
9,Evergreen Cemetery,open space,Allston-Brighton,,42.338336,-71.162957


In [None]:
osdf.to_csv('/content/gdrive/My Drive/Colab Notebooks/CS506FinalProject/Open_Space_fully_processed.csv', index=False)