# Data Exploration

This notebook is used to get an insight on all the data sets considered.

In [3]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

## Methods

In [4]:
def getStatsForColumn(column_name):
    print('Unique values: %s \nUnique counts: %d.' % 
          (np.sort(crime_data[column_name].unique()),
           crime_data[column_name].unique().shape[0]))

In [5]:
def getStatsForColumn(data, column_name):
    print('Unique values: %s \nUnique counts: %d.' % 
          (np.sort(data[column_name].unique()),
           data[column_name].unique().shape[0]))

In [6]:
def identifyNullValues(data):  
    #Column indexes which have null value
    meta_column = np.flatnonzero((data.isnull().any()== True))
    dict={}
    for x in meta_column:     
        dict[data.columns.values[x]] = data.iloc[:, x].isnull().sum()
    
    return dict

## Data

### Crime Data

In [4]:
crime_data = pd.read_csv('data/crime_data.csv')

In [5]:
crime_data.head()

Unnamed: 0,IncidntNum,Category,Descript,DayOfWeek,Date,Time,PdDistrict,Resolution,Address,X,Y,Location,PdId
0,150060275,NON-CRIMINAL,LOST PROPERTY,Monday,01/19/2015,14:00,MISSION,NONE,18TH ST / VALENCIA ST,-122.421582,37.761701,"(37.7617007179518, -122.42158168137)",15006027571000
1,150098210,ROBBERY,"ROBBERY, BODILY FORCE",Sunday,02/01/2015,15:45,TENDERLOIN,NONE,300 Block of LEAVENWORTH ST,-122.414406,37.784191,"(37.7841907151119, -122.414406029855)",15009821003074
2,150098210,ASSAULT,AGGRAVATED ASSAULT WITH BODILY FORCE,Sunday,02/01/2015,15:45,TENDERLOIN,NONE,300 Block of LEAVENWORTH ST,-122.414406,37.784191,"(37.7841907151119, -122.414406029855)",15009821004014
3,150098210,SECONDARY CODES,DOMESTIC VIOLENCE,Sunday,02/01/2015,15:45,TENDERLOIN,NONE,300 Block of LEAVENWORTH ST,-122.414406,37.784191,"(37.7841907151119, -122.414406029855)",15009821015200
4,150098226,VANDALISM,"MALICIOUS MISCHIEF, VANDALISM OF VEHICLES",Tuesday,01/27/2015,19:00,NORTHERN,NONE,LOMBARD ST / LAGUNA ST,-122.431119,37.800469,"(37.8004687042875, -122.431118543788)",15009822628160


In [6]:
print('Rows: %d, Columns: %d' % (crime_data.shape[0], crime_data.shape[1]))

Rows: 2188068, Columns: 13


In [7]:
print('Null values: %s.' % identifyNullValues(crime_data))

Null values: {'PdDistrict': 1}.


###### Incident Number

In [8]:
getStatsForColumn('IncidntNum')

Unique values: [     3979     10128     10736 ... 991549731 991564488 991582377] 
Unique counts: 1726468.


###### Category

In [9]:
getStatsForColumn('Category')

Unique values: ['ARSON' 'ASSAULT' 'BAD CHECKS' 'BRIBERY' 'BURGLARY' 'DISORDERLY CONDUCT'
 'DRIVING UNDER THE INFLUENCE' 'DRUG/NARCOTIC' 'DRUNKENNESS'
 'EMBEZZLEMENT' 'EXTORTION' 'FAMILY OFFENSES' 'FORGERY/COUNTERFEITING'
 'FRAUD' 'GAMBLING' 'KIDNAPPING' 'LARCENY/THEFT' 'LIQUOR LAWS' 'LOITERING'
 'MISSING PERSON' 'NON-CRIMINAL' 'OTHER OFFENSES'
 'PORNOGRAPHY/OBSCENE MAT' 'PROSTITUTION' 'RECOVERED VEHICLE' 'ROBBERY'
 'RUNAWAY' 'SECONDARY CODES' 'SEX OFFENSES, FORCIBLE'
 'SEX OFFENSES, NON FORCIBLE' 'STOLEN PROPERTY' 'SUICIDE' 'SUSPICIOUS OCC'
 'TREA' 'TRESPASS' 'VANDALISM' 'VEHICLE THEFT' 'WARRANTS' 'WEAPON LAWS'] 
Unique counts: 39.


###### Description

In [10]:
getStatsForColumn('Descript')

Unique values: ['ABANDONMENT OF CHILD' 'ABORTION'
 'ACCESS CARD INFORMATION, PUBLICATION OF'
 'ACCESS CARD INFORMATION, THEFT OF' 'ACCIDENTAL BURNS'
 'ACCIDENTAL LACERATIONS' 'ACCIDENTAL SHOOTING'
 'ACTS AGAINST PUBLIC TRANSIT' 'ADVERTISING DISTRIBUTORS PERMIT VIOLATION'
 'AEROSOL CONTAINER; SALE, PURCHASE OR POSSESSION OF'
 'AFFIXING ADVERTISMENTS TO POLES'
 'AGGRAVATED ASSAULT OF POLICE OFFICER, SNIPING'
 'AGGRAVATED ASSAULT OF POLICE OFFICER,BODILY FORCE'
 'AGGRAVATED ASSAULT ON POLICE OFFICER WITH A GUN'
 'AGGRAVATED ASSAULT ON POLICE OFFICER WITH A KNIFE'
 'AGGRAVATED ASSAULT WITH A DEADLY WEAPON' 'AGGRAVATED ASSAULT WITH A GUN'
 'AGGRAVATED ASSAULT WITH A KNIFE' 'AGGRAVATED ASSAULT WITH BODILY FORCE'
 'AGGRESSIVE SOLICITING' 'AID OR HARBOR FELON' 'AIDED CASE'
 'AIDED CASE -PROPERTY FOR DESTRUCTION' 'AIDED CASE, DOG BITE'
 'AIDED CASE, INJURED PERSON' 'AIDED CASE, MENTAL DISTURBED'
 'AIDED CASE, SICK PERSON' 'ALCOHOLIC BEVERAGE, PROCURING SALE OF'
 'AMMUNITION, POSS. BY PROHIBITED

###### Day of Week

In [11]:
getStatsForColumn('DayOfWeek')

Unique values: ['Friday' 'Monday' 'Saturday' 'Sunday' 'Thursday' 'Tuesday' 'Wednesday'] 
Unique counts: 7.


###### Date

In [12]:
getStatsForColumn('Date')

Unique values: ['01/01/2003' '01/01/2004' '01/01/2005' ... '12/31/2015' '12/31/2016'
 '12/31/2017'] 
Unique counts: 5560.


###### Time

In [13]:
getStatsForColumn('Time')

Unique values: ['00:01' '00:02' '00:03' ... '23:57' '23:58' '23:59'] 
Unique counts: 1439.


###### Police Department District

In [14]:
crime_data = crime_data.replace(np.nan, 'N/A')
getStatsForColumn('PdDistrict')

Unique values: ['BAYVIEW' 'CENTRAL' 'INGLESIDE' 'MISSION' 'N/A' 'NORTHERN' 'PARK'
 'RICHMOND' 'SOUTHERN' 'TARAVAL' 'TENDERLOIN'] 
Unique counts: 11.


###### Resolution

In [15]:
getStatsForColumn('Resolution')

Unique values: ['ARREST, BOOKED' 'ARREST, CITED' 'CLEARED-CONTACT JUVENILE FOR MORE INFO'
 'COMPLAINANT REFUSES TO PROSECUTE'
 'DISTRICT ATTORNEY REFUSES TO PROSECUTE' 'EXCEPTIONAL CLEARANCE'
 'JUVENILE ADMONISHED' 'JUVENILE BOOKED' 'JUVENILE CITED'
 'JUVENILE DIVERTED' 'LOCATED' 'NONE' 'NOT PROSECUTED'
 'PROSECUTED BY OUTSIDE AGENCY' 'PROSECUTED FOR LESSER OFFENSE'
 'PSYCHOPATHIC CASE' 'UNFOUNDED'] 
Unique counts: 17.


###### Address

In [16]:
getStatsForColumn('Address')

Unique values: ['0 Block of  HARRISON ST' '0 Block of 10TH AV' '0 Block of 10TH ST' ...
 'ZOE ST / BRYANT ST' 'ZOE ST / FREELON ST' 'ZOE ST / WELSH ST'] 
Unique counts: 25130.


###### X (Latitude)

In [17]:
getStatsForColumn('X')

Unique values: [-122.51364206 -122.51364206 -122.51364206 ... -122.36493749 -122.3647507
 -120.5       ] 
Unique counts: 60256.


###### Y (Longitude)

In [18]:
getStatsForColumn('Y')

Unique values: [37.70787902 37.70791996 37.7079219  ... 37.81997549 37.82062084
 90.        ] 
Unique counts: 58337.


###### Location

In [19]:
getStatsForColumn('Location')

Unique values: ['(37.7078790224135, -122.463626254961)'
 '(37.7079199575616, -122.46092149191)'
 '(37.7079219034586, -122.428716681874)' ...
 '(37.81997549229705, -122.37427517670966)'
 '(37.8206208380702, -122.364750704393)' '(90, -120.5)'] 
Unique counts: 61054.


###### Police Department ID

In [20]:
getStatsForColumn('PdId')

Unique values: [     397963010     1012863010     1073663010 ... 99154973163010
 99156448863010 99158237763010] 
Unique counts: 2188068.


### City Facilities Data

In [22]:
city_facilities = pd.read_csv('data/city_facilities_data.csv')

In [23]:
city_facilities.head()

Unnamed: 0,facility_id,common_name,address,city,zip_code,block_lot,owned_leased,dept_id,department_name,gross_sq_ft,longitude,latitude,geom,supervisor_district,city_tenants
0,1203,Oceanside Water Pollution Control Plant,3500 Great Hwy,San Francisco,94132,7281009.0,Own,47,Public Utilities Commission,,-122.504876,37.727062,"(37.72706160500002, -122.50487569699999)",7.0,
1,1202,Westside Pump Station,2814 Great Hwy,San Francisco,94132,7281009.0,Own,47,Public Utilities Commission,,-122.506462,37.734737,"(37.734737284999994, -122.50646214)",7.0,
2,380,Camp Mather Office,35250 Mather Rd,Groveland,95321,,Own,49,Recreation And Parks,,-119.85569,37.882703,"(37.882703144000004, -119.85569019600001)",,
3,3342,500 Pine Rooftop Park,500 Pine St,San Francisco,94108,258042.0,Own,49,Recreation And Parks,,-122.404581,37.791879,"(37.79187874399997, -122.40458094299998)",3.0,
4,3288,Top of the Hill Terminal Operator Restroom,6232 Mission St,San Francisco,94014,2819021.0,Lease,9,Non-City,,-122.460114,37.70658,"(37.706580492, -122.46011434100001)",,Municipal Transportation Agency


In [11]:
print('Rows: %d, Columns: %d' % (city_facilities.shape[0], city_facilities.shape[1]))
print(city_facilities.columns.values)

Rows: 1805, Columns: 15
['facility_id' 'common_name' 'address' 'city' 'zip_code' 'block_lot'
 'owned_leased' 'dept_id' 'department_name' 'gross_sq_ft' 'longitude'
 'latitude' 'geom' 'supervisor_district' 'city_tenants']


In [12]:
print('Null values: %s.' % identifyNullValues(city_facilities))

Null values: {'city_tenants': 1670, 'block_lot': 343, 'supervisor_district': 334, 'longitude': 9, 'gross_sq_ft': 879, 'geom': 9, 'latitude': 9}.


###### Common name

In [21]:
getStatsForColumn(city_facilities, 'common_name')

Unique values: ['1 Ave of the Palms Building' '1 Bayview Park' '100 Font Blvd' ...
 'Zaida T Rodriguez Early Education' 'Zellerbach Rehearsal Hall'
 'Zoo Wet Weather Lift Station'] 
Unique counts: 1803.


###### City Tenants

In [13]:
getStatsForColumn(city_facilities, 'city_tenants')

Unique values: [nan 'Adult Probation' 'Airport (Sfo)' 'Arts Commission'
 'Child Support Services'
 'City Attorney: Public Health: Children Youth And Families: Human Services Agency'
 'City College' 'District Attorney'
 'District Attorney: Superior Court: Police Department: Sheriff: Adult Probation: Gsa - Medical Examiner'
 'Elections' 'Emergency Management'
 'Environment: Municipal Transportation Agency: County Transportation Authority'
 "Ethics Commission: Gsa - Real Estate Division: Public Health: Human Rights Commission: Sheriff: Rent Arbitration Board: Status Of Women: Human Services Agency: Gsa - City Administrator's Office: Civil"
 'Fine Arts Museums' 'Fire Department'
 'Fire Department: Public Works: Planning: Public Utilities Commission: Building Inspection'
 "Gsa - City Administrator's Office"
 'Gsa - Office Of Contract Administration' 'Gsa - Real Estate Division'
 'Health Service System: Law Library: Retirement System' 'Human Resources'
 'Human Services Agency'
 'Human Servic

###### Block Lot

In [14]:
getStatsForColumn(city_facilities, 'block_lot')

Unique values: [nan '0004002' '0007001' '0015001' '0019001' '0033005' '0036001' '0040002'
 '0058001' '0067014' '0070001' '0072001' '0075002' '0077024' '0078061'
 '0082001' '0087072' '0090035' '0092006' '0093024' '0102001' '0130043'
 '0137001' '0140008' '0143003' '0147025' '0148008' '0158078' '0159044'
 '0159046' '0160015' '0160031' '0161015' '0178001' '0180004' '0186007'
 '0187012' '0190005' '0191004' '0192006' '0195005' '0195010' '0202018'
 '0203013' '0204020' '0206017' '0208009' '0209017' '0211001' '0211007'
 '0213001' '0216003' '0220004' '0221044' '0225018' '0226010' '0228006'
 '0245002' '0258003' '0258042' '0274010' '0286017' '0308001' '0317010A'
 '0321039' '0322005' '0324023' '0327021' '0331008' '0331028' '0332005'
 '0332006' '0334044' '0336030' '0338023' '0339005' '0341014' '0345007'
 '0345015' '0345031' '0346003A' '0347008' '0349002' '0351039' '0351051'
 '0353001' '0354001' '0405004' '0405006' '0409002' '0454001' '0459033'
 '0468A001' '0469001' '0490009' '0509003' '0509009' '051

###### Supervisor District

In [15]:
getStatsForColumn(city_facilities, 'supervisor_district')

Unique values: [ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. nan] 
Unique counts: 12.


###### Latitude

In [20]:
getStatsForColumn(city_facilities, 'latitude')

Unique values: [37.30462474 37.30479053 37.30485102 ... 37.88350152 37.8837037
         nan] 
Unique counts: 1262.


###### Longitude

In [16]:
getStatsForColumn(city_facilities, 'longitude')

Unique values: [-122.51020567 -122.51005212 -122.50976544 ... -119.85278212 -119.85186268
           nan] 
Unique counts: 1261.


###### Geom(Co-ordinates)

In [18]:
getStatsForColumn(city_facilities, 'geom')

Unique values: [nan '(37.30462474199999, -122.25690722799999)'
 '(37.30479052599998, -122.25645900699999)' ...
 '(37.883087639999985, -119.85752439599997)'
 '(37.88350151899999, -119.85711392500002)'
 '(37.883703701, -119.85603588999999)'] 
Unique counts: 1262.


###### Gross Square feet

In [17]:
getStatsForColumn(city_facilities, 'gross_sq_ft')

Unique values: [0.000000e+00 1.000000e+00 3.800000e+01 4.800000e+01 5.200000e+01
 6.400000e+01 6.800000e+01 7.500000e+01 8.400000e+01 9.500000e+01
 1.050000e+02 1.200000e+02 1.220000e+02 1.300000e+02 1.500000e+02
 1.560000e+02 1.700000e+02 1.800000e+02 1.900000e+02 1.920000e+02
 2.550000e+02 2.580000e+02 2.640000e+02 2.660000e+02 2.700000e+02
 2.880000e+02 2.890000e+02 2.920000e+02 3.080000e+02 3.120000e+02
 3.230000e+02 3.240000e+02 3.250000e+02 3.300000e+02 3.410000e+02
 3.900000e+02 4.000000e+02 4.160000e+02 4.200000e+02 4.250000e+02
 4.260000e+02 4.320000e+02 4.400000e+02 4.420000e+02 4.480000e+02
 4.500000e+02 4.680000e+02 4.800000e+02 4.830000e+02 5.000000e+02
 5.060000e+02 5.250000e+02 5.760000e+02 5.880000e+02 5.920000e+02
 6.000000e+02 6.120000e+02 6.150000e+02 6.210000e+02 6.290000e+02
 6.360000e+02 6.630000e+02 6.720000e+02 6.750000e+02 6.840000e+02
 6.900000e+02 6.950000e+02 7.000000e+02 7.020000e+02 7.130000e+02
 7.200000e+02 7.210000e+02 7.600000e+02 7.680000e+02 7.980000

### Landmark Data

In [27]:
landmark_data = pd.read_csv('data/landmarks_data.csv')

In [24]:
landmark_data.head()

Unnamed: 0,the_geom,OBJECTID,LMNO,Name
0,MULTIPOLYGON (((-122.413174898672 37.771587449...,269,199,The Jackson Brewery Complex
1,MULTIPOLYGON (((-122.468991065336 37.727319809...,181,213,THE JOSEPH LEONARD/CECIL F. POOLE HOUSE
2,MULTIPOLYGON (((-122.403214798149 37.796528410...,120,26,"Bank of Lucas, Turner, & Co."
3,MULTIPOLYGON (((-122.412754597481 37.804377006...,62,129,BAUER & SCHWEITZER MALTING COMPANY
4,MULTIPOLYGON (((-122.415453055903 37.749474856...,206,206,HOWARD /26TH STREET COTTAGES


In [28]:
print('Rows: %d, Columns: %d' % (landmark_data.shape[0], landmark_data.shape[1]))
print(landmark_data.columns.values)

Rows: 304, Columns: 4
['the_geom' 'OBJECTID' 'LMNO' 'Name']


In [26]:
print('Null values: %s.' % identifyNullValues(landmark_data))

Null values: {'Name': 2}.


###### the_geom(coordinates)

In [30]:
getStatsForColumn(landmark_data, 'the_geom')

Unique values: ['MULTIPOLYGON (((-122.375541480414 37.731398357835, -122.375591843407 37.731342554004, -122.375802821283 37.731462340839, -122.375752458367 37.731518144759, -122.375551003679 37.731741360254, -122.375340025417 37.731621572974, -122.375541480414 37.731398357835)))'
 'MULTIPOLYGON (((-122.375591843407 37.731342554004, -122.37574293191 37.731175141479, -122.375953910688 37.731294928028, -122.375802821283 37.731462340839, -122.375591843407 37.731342554004)))'
 'MULTIPOLYGON (((-122.375629763559 37.732309351061, -122.375780855076 37.732141938495, -122.375851182172 37.732181866457, -122.375700089621 37.732349280031, -122.375629763559 37.732309351061)))'
 'MULTIPOLYGON (((-122.388363669828 37.78996167932, -122.388415891444 37.789946113611, -122.388577879755 37.790234432986, -122.388624987137 37.790318276739, -122.388062275758 37.790512686997, -122.388025098335 37.790436420006, -122.388382447531 37.790312960887, -122.388299747548 37.790166177962, -122.388334034651 37.7901555731

###### Object id

In [33]:
getStatsForColumn(landmark_data, 'OBJECTID')

Unique values: [  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
 235 236 237 238 239 240 241 242 243

###### LMNO

In [32]:
getStatsForColumn(landmark_data, 'LMNO')

Unique values: [  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
 109 110 111 112 113 114 115 117 118 119 120 121 122 123 124 125 126 127
 128 129 130 131 132 133 134 135 136 137 140 141 143 144 145 146 147 148
 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 201 202 203
 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 220 221 222
 223 224 225 226 227 228 229 231 232 234 235 236 237 238 239 240 241 242
 243 244 245 246 247 248 249 250 251

###### Name of location

In [34]:
getStatsForColumn(landmark_data, 'Name')

Unique values: [nan '#37 ENGINE, #9 TRUCK FIREHOUSE' '2168 Market St AB' 'A. Borel & Co.'
 'ALFRED G. HANSON RESIDENCE' 'ALHAMBRA THEATER' 'AXFORD HOUSE'
 'Abner Phelps House' 'Alemany Emergency Hospital and Health Center'
 'Alfred E. (Nobby) Clarke Mansion' 'Atherton House' 'Atkinson House'
 'Audiffred Building' "B'nai David Synagogue" 'BAKER & HAMILTON BUILDING'
 'BALBOA HIGH SCHOOL' 'BAUER & SCHWEITZER MALTING COMPANY'
 'BRANDENSTEIN (BRANSTEN) HOUSE' "BROWN'S OPERA HOUSE"
 'BUICH BLDG/TADICH GRILL' 'Bank of California'
 'Bank of Lucas, Turner, & Co.' 'Beach Chalet'
 "Belli Building (Langerman's Building)"
 'Beltline Railroad Roundhouse Complex' 'Bourdette Building'
 'Bourn Mansion' 'Burr House' 'Bush Street Temple (Soto Mission)'
 'CADILLAC HOTEL' 'CALIFORNIA HALL' 'CAMPFIRE GIRLS BUILDING'
 'CARMEL FALLON BUILDING' 'CASTRO CAMERA'
 'CATHEDRAL HOUSE-DEMOLISHED 1994' 'CHARLES L. HINKEL HOUSE'
 'CHINATOWN BRANCH LIBRARY' 'CITY LIGHTS BOOKSTORE' 'CLUNIE HOUSE'
 'COIT TOWER' 'COLUMBIA 

### Map of Colleges

In [25]:
map_of_colleges = pd.read_csv('data/colleges_map_data.csv')

In [39]:
map_of_colleges.head()

Unnamed: 0,ID,Institution,Campus,Address,Phone,Web Address,Location
0,22,University of California Hastings College of Law,,100 McAllister St,4155655000.0,http://www.uchastings.edu,"(37.7811643, -122.4140251)"
1,23,University of California Hastings College of Law,,198 McAllister St,4155655000.0,http://www.uchastings.edu,"(37.78105093, -122.4149724)"
2,24,University of California Hastings College of Law,,376 Larkin St,4155655000.0,http://www.uchastings.edu,"(37.78122201, -122.4166513)"
3,25,Academy of Art University,,2300 Stockton St,,,"(37.80747093, -122.4102403)"
4,26,Academy of Art University,,701 Chestnut St,,,"(37.80323611, -122.4150483)"


In [40]:
print('Rows: %d, Columns: %d' % (map_of_colleges.shape[0], map_of_colleges.shape[1]))
print(map_of_colleges.columns.values)

Rows: 46, Columns: 7
['ID' 'Institution' 'Campus' 'Address' 'Phone' 'Web Address' 'Location']


In [48]:
print('Null values: %s.' % identifyNullValues(map_of_colleges))

Null values: {'Phone': 22, 'Campus': 36, 'Web Address': 23}.


###### ID

In [41]:
getStatsForColumn(map_of_colleges, 'ID')

Unique values: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 40 41 42 46 47 48 49 50 60] 
Unique counts: 46.


###### Institution

In [42]:
getStatsForColumn(map_of_colleges, 'Institution')

Unique values: ['Academy of Art University'
 'American College of Traditional Chinese Medicine'
 'American Conservatory Theater' 'California Culinary Academy'
 'California Institute of Integral Studies'
 'City College of San Francisco' 'Everest College-San Francisco'
 'Golden Gate University-San Francisco' 'Heald College-San Francisco'
 'Marinello Schools of Beauty' 'Miami Ad School-San Francisco'
 'San Francisco Art Institute' 'San Francisco Conservatory of Music'
 'San Francisco Institute of Esthetics and Cosmetology'
 'San Francisco State University'
 'Saybrook Graduate School and Research Center'
 'The Art Institute of California-San Francisco'
 'The Fashion Institute of Design & Merchandising-San Francisco'
 'University of California Hastings College of Law'
 'University of California-San Francisco' 'University of San Francisco'] 
Unique counts: 21.


###### Campus

In [43]:
getStatsForColumn(map_of_colleges, 'Campus')

Unique values: [nan 'Civic Center' 'Downtown' 'Evans' 'John Adams' 'Mission'
 'Mission Bay' 'Mount Zion' 'Ocean' 'Parnassus' 'Southeast'] 
Unique counts: 11.


###### Address

In [44]:
getStatsForColumn(map_of_colleges, 'Address')

Unique values: ['100 McAllister St' '1067 Folsom St' '1085 Mission St' '1125 Valencia St'
 '1170 Market St' '1400 Evans Ave' '1453 Mission St' '1600 Divisadero St'
 '1600 Holloway Ave' '180 New Montgomery St' '1800 Oakdale Ave'
 '1849 Washington St' '1860 Hayes St' '198 McAllister St'
 '200 McAllister St' '2130 Fulton St' '2151 Van Ness Ave'
 '2300 Stockton St' '30 Grant Ave' '350 Mission St' '350 Rhode Island St'
 '376 Larkin St' '410 Bush St' '415 Jackson St' '455 Arkansas St'
 '466 Townsend St' '491 Post St' '50 Oak St' '50 Phelan Ave'
 '500 Parnassus Ave' '536 Mission St' '540 Powell St' '55 Stockton St'
 '60 Federal St' '600 16th St' '601 Brannan St' '625 Sutter St'
 '688 Sutter St' '701 Chestnut St' '740 Taylor St' '747 Front St'
 '750 Eddy St\n\n750 Eddy Street\n\n750 Eddy Street*'
 '79 New Montgomery St' '800 Chestnut St' '814 Mission St  Suite 500'
 '88 4th St'] 
Unique counts: 46.


###### Phone

In [45]:
getStatsForColumn(map_of_colleges, 'Phone')

Unique values: [4.15239300e+09 4.15274220e+09 4.15282760e+09 4.15338111e+09
 4.15355173e+09 4.15422556e+09 4.15433920e+09 4.15442780e+09
 4.15476900e+09 4.15565460e+09 4.15575610e+09 4.15675520e+09
 4.15771350e+09 4.15771702e+09 4.15808300e+09 4.15834320e+09
 4.15837097e+09 4.15864733e+09 4.15865020e+09 5.62945221e+09
 8.88741427e+09            nan] 
Unique counts: 22.


###### Web Address

In [46]:
getStatsForColumn(map_of_colleges, 'Web Address')

Unique values: [nan 'http://WWW.FIDM.EDU' 'http://www.academyart.edu'
 'http://www.act-sf.org' 'http://www.actcm.edu'
 'http://www.artinstitutes.edu/sanfrancisco/' 'http://www.caculinary.edu'
 'http://www.ccsf.edu' 'http://www.ciis.edu'
 'http://www.everest.edu/campus/san_francisco?' 'http://www.ggu.edu'
 'http://www.heald.edu' 'http://www.marinello.com'
 'http://www.miamiadschool.com' 'http://www.saybrook.edu'
 'http://www.sfai.edu' 'http://www.sfcm.edu' 'http://www.sfsu.edu'
 'http://www.uchastings.edu' 'http://www.ucsf.edu' 'http://www.usfca.edu'] 
Unique counts: 21.


###### Location

In [47]:
getStatsForColumn(map_of_colleges, 'Location')

Unique values: ['(37.72391332, -122.4801364)' '(37.72576996, -122.450972)'
 '(37.73744737, -122.3940977)' '(37.74179876, -122.3847078)'
 '(37.75484683, -122.4203042)' '(37.76052191, -122.3979702)'
 '(37.76292238, -122.4586196)' '(37.76574019, -122.4030029)'
 '(37.76768652, -122.3921299)' '(37.77413509, -122.3992706)'
 '(37.77429933, -122.4470124)' '(37.77456069, -122.4161598)'
 '(37.77554947, -122.4202696)' '(37.77589188, -122.3987812)'
 '(37.77618928, -122.4509351)' '(37.77705101, -122.4070272)'
 '(37.77936284, -122.4103312)' '(37.77950511, -122.4143194)'
 '(37.78105093, -122.4149724)' '(37.78115545, -122.4156701)'
 '(37.7811643, -122.4140251)' '(37.78122201, -122.4166513)'
 '(37.78322474, -122.4200359)' '(37.78330745, -122.3920745)'
 '(37.78439716, -122.4048526)' '(37.78450932, -122.404624)'
 '(37.78482062, -122.4393106)' '(37.78603672, -122.4065154)'
 '(37.78625102, -122.3995208)' '(37.78709023, -122.4047315)'
 '(37.78782523, -122.4103705)' '(37.78789864, -122.4005462)'
 '(37.788726

### Map of Schools

In [28]:
map_of_schools = pd.read_csv('data/schools_map_data.csv')

In [50]:
map_of_schools.head()

Unnamed: 0,Campus Name,CCSF Entity,Lower Grade,Upper Grade,Grade Range,Category,Map Label,Lower Age,Upper Age,General Type,CDS Code,Campus Address,Supervisor District,County FIPS,County Name,Location 1
0,Alamo Elementary School,SFUSD,0,5,K-5,USD Grades K-5,PS001,5,10,PS,38684786040695,"250 23RD AVE, San Francisco, CA 94121",1,6075,SAN FRANCISCO,"CA\n(37.7830048, -122.4822998)"
1,Alvarado Elementary School,SFUSD,0,5,K-5,USD Grades K-5,PS002,5,10,PS,38684786040703,"625 DOUGLASS ST, San Francisco, CA 94114",8,6075,SAN FRANCISCO,"CA\n(37.7536812, -122.4381943)"
2,Aptos Middle School,SFUSD,6,8,6-8,USD Grades 6-8,PS003,11,13,PS,38684786062020,"105 APTOS AVE, San Francisco, CA 94127",7,6075,SAN FRANCISCO,"CA\n(37.7296715, -122.4657822)"
3,Argonne Early Education School,SFUSD,-2,0,PK-TK,USD PreK/TK,PS004,3,5,PS,384000981,"750 16TH AVE, San Francisco, CA 94118",1,6075,SAN FRANCISCO,"CA\n(37.7739677, -122.4740601)"
4,Argonne Elementary School,SFUSD,0,5,K-5,USD Grades K-5,PS005,5,10,PS,38684786040737,"680 18TH AVE, San Francisco, CA 94121",1,6075,SAN FRANCISCO,"CA\n(37.7753067, -122.4763107)"


In [51]:
print('Rows: %d, Columns: %d' % (map_of_schools.shape[0], map_of_schools.shape[1]))
print(map_of_schools.columns.values)

Rows: 445, Columns: 16
['Campus Name' 'CCSF Entity' 'Lower Grade' 'Upper Grade' 'Grade Range'
 'Category' 'Map Label' 'Lower Age' 'Upper Age' 'General Type' 'CDS Code'
 'Campus Address' 'Supervisor District' 'County FIPS' 'County Name'
 'Location 1']


In [52]:
print('Null values: %s.' % identifyNullValues(map_of_schools))

Null values: {}.


###### Campus Name

In [53]:
getStatsForColumn(map_of_schools, 'Campus Name')

Unique values: ['ABC Preschool' 'Adda Clevenger School' 'Alamo Elementary School'
 'Alt School - Alamo Square' 'Alt School - Dogpatch 1'
 'Alt School - Dogpatch 2' 'Alt School - Fort Mason'
 'Alt School - North Beach' 'Alt School - Potrero Hill'
 'Alt School - Soma' 'Alt School - Yerba Buena' 'Alta Plaza Preschool'
 'Alta Vista School' 'Alvarado Elementary School' 'Amici World School'
 'Angelas Children Center' 'Angelinas Preschool'
 'Angelinas School Day Care' 'Aptos Middle School'
 'Archbishop Riordan High School' 'Argonne Early Education School'
 'Argonne Elementary School' 'Ark Christian Preschool'
 'Asawa, Ruth Asawa San Francisco School Of The Arts\n\n / Academy Of Arts And Sciences'
 'Bais Menachem Yeshiva Day School' 'Balboa High School'
 'Balboa Preschool' 'Bay School Of San Francisco'
 'Big City Montessori School' 'Brandeis Hillel School Of San Francisco'
 'Bright Horizons - 2Nd St' 'Bright Horizons - Kansas St'
 'Bright Horizons - Letterman Digital Arts Child Care Center'
 '

###### CCSF Entity

In [54]:
getStatsForColumn(map_of_schools, 'CCSF Entity')

Unique values: ['Private' 'SFCCD' 'SFUSD'] 
Unique counts: 3.


###### Lower Grade

In [55]:
getStatsForColumn(map_of_schools, 'Lower Grade')

Unique values: [-4 -3 -2 -1  0  1  2  3  4  5  6  7  8  9 13] 
Unique counts: 15.


###### Upper Grade

In [56]:
getStatsForColumn(map_of_schools, 'Upper Grade')

Unique values: [-2 -1  0  1  2  4  5  6  8 12 14] 
Unique counts: 11.


###### Grade Range

In [None]:
getStatsForColumn(map_of_schools, 'Grade Range')

###### Category

In [57]:
getStatsForColumn(map_of_schools, 'Category')

Unique values: ['Community College District' 'Independent / Private' 'USD Charter School'
 'USD Charter School; PreK-5' 'USD County School' 'USD Grades 6-12'
 'USD Grades 6-8' 'USD Grades 9-12' 'USD Grades K-4' 'USD Grades K-5'
 'USD Grades K-5, PreK-5' 'USD Grades K-8' 'USD Grades PK-8' 'USD PreK'
 'USD PreK, Grades K-5' 'USD PreK-4' 'USD PreK-4, Grades K-5' 'USD PreK-5'
 'USD PreK-5                                   ' 'USD PreK-5, Grades K-5'
 'USD PreK-8' 'USD PreK/TK' 'USD PreK/TK-4' 'USD PreK/TK-5'] 
Unique counts: 24.


###### Map Label

In [58]:
getStatsForColumn(map_of_schools, 'Map Label')

Unique values: ['CC01' 'CC02' 'CC03' 'CC04' 'CC05' 'CC06' 'CC07' 'CC08' 'CC09' 'CC10'
 'CDC001' 'CDC002' 'CDC003' 'CDC004' 'CDC005' 'CDC006' 'CDC007' 'CDC008'
 'CDC009' 'CDC010' 'CDC011' 'CDC012' 'CDC013' 'CDC014' 'CDC015' 'CDC016'
 'CDC017' 'CDC018' 'CDC019' 'CDC020' 'CDC021' 'CDC022' 'CDC023' 'CDC024'
 'CDC025' 'CDC026' 'CDC027' 'CDC028' 'CDC029' 'CDC030' 'CDC031' 'CDC032'
 'CDC033' 'CDC034' 'CDC035' 'CDC036' 'CDC037' 'CDC038' 'CDC039' 'CDC040'
 'CDC041' 'CDC042' 'CDC043' 'CDC044' 'CDC045' 'CDC046' 'CDC047' 'CDC048'
 'CDC049' 'CDC050' 'CDC051' 'CDC052' 'CDC053' 'CDC054' 'CDC055' 'CDC056'
 'CDC057' 'CDC058' 'CDC059' 'CDC060' 'CDC061' 'CDC062' 'CDC063' 'CDC064'
 'CDC065' 'CDC066' 'CDC067' 'CDC068' 'CDC069' 'CDC070' 'CDC071' 'CDC072'
 'CDC073' 'CDC074' 'CDC075' 'CDC076' 'CDC077' 'CDC078' 'CDC079' 'CDC080'
 'CDC081' 'CDC082' 'CDC083' 'CDC084' 'CDC085' 'CDC086' 'CDC087' 'CDC088'
 'CDC089' 'CDC090' 'CDC091' 'CDC092' 'CDC093' 'CDC094' 'CDC095' 'CDC096'
 'CDC097' 'CDC098' 'CDC099' 'CDC100' '

###### Lower Age

In [59]:
getStatsForColumn(map_of_schools, 'Lower Age')

Unique values: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 18] 
Unique counts: 15.


###### Upper Age

In [60]:
getStatsForColumn(map_of_schools, 'Upper Age')

Unique values: [ 3  4  5  6  7  9 10 11 13 17 19] 
Unique counts: 11.


###### General Type

In [61]:
getStatsForColumn(map_of_schools, 'General Type')

Unique values: ['CC' 'CDC' 'IND' 'PS'] 
Unique counts: 4.


###### CDS Code

In [62]:
getStatsForColumn(map_of_schools, 'CDS Code')

Unique values: ['380500086' '380500099' '380500240' '380500254' '380500308' '380500371'
 '380500487' '380500491' '380500512' '380500619' '380501318' '380503028'
 '380503540' '380503588' '380503970' '380504016' '380504038' '380504052'
 '380504057' '380504073' '380504229' '380504276' '380504308' '380504311'
 '380504314' '380504346' '380504356' '380504362' '380504369' '380504379'
 '380504390' '380504419' '380504445' '380504451' '380504523' '380504535'
 '380504700' '380504714' '380504742' '380504776' '380504783' '380504793'
 '380504876' '380504889' '380504982' '380505027' '380505035' '380505039'
 '380505138' '380505246' '380505268' '380505313' '380505485' '380505563'
 '380505671' '380505707' '380505730' '380505779' '380505898' '380506088'
 '380506198' '380506270' '380506275' '380506320' '380506340' '380506430'
 '380506485' '380506487' '380506527' '38103893830445' '384000037'
 '384000060' '384000093' '384000125' '384000227' '384000303' '384000322'
 '384000345' '384000353' '384000406' '38400

###### Campus Address

In [63]:
getStatsForColumn(map_of_schools, 'Campus Address')

Unique values: ['1 CASHMERE ST, SAN FRANCISCO, CA 94124'
 '1 DR CARLTON B GOODLETT PL, RM 68, SAN FRANCISCO, CA 94102'
 '1 LETTERMAN DR BLDG B, SAN FRANCISCO, CA 94129'
 '1 TRENTON ST, San Francisco, CA 94108'
 '1 WEBSTER ST, SAN FRANCISCO, CA 94123'
 '10 KIRKHAM ST, SAN FRANCISCO, CA 94143'
 '100 WHITNEY YOUNG CIR, SAN FRANCISCO, CA 94124'
 '1000 CAYUGA AVE, San Francisco, CA 94112'
 '101 HOWARD ST, SAN FRANCISCO, CA 94105'
 '1010 MONTGOMERY ST, SAN FRANCISCO, CA 94133'
 '102 SAN JOSE AVE, SAN FRANCISCO, CA 94110'
 '1025 14TH ST, San Francisco, CA 94114'
 '1025 LAGUNA ST, SAN FRANCISCO CA 94115'
 '1035 GILMAN AVE, San Francisco, CA 94124'
 '1042 JAMESTOWN AVE, SAN FRANCISCO, CA 94124'
 '105 APTOS AVE, San Francisco, CA 94127'
 '1050 KIRKHAM ST, SAN FRANCISCO, CA 94122'
 '1050 YORK ST, SAN FRANCISCO, CA 94110'
 '1055 ELLIS ST, SAN FRANCISCO, CA 94109'
 '1060 KEY AVE, SAN FRANCISCO CA 94124'
 '1060 TENNESSEE ST STE A, SAN FRANCISCO, CA 94107'
 '1090 QUINTARA ST, SAN FRANCISCO, CA 94116'

###### County FIPS

In [64]:
getStatsForColumn(map_of_schools, 'County FIPS')

Unique values: [6075] 
Unique counts: 1.


###### County Name

In [65]:
getStatsForColumn(map_of_schools, 'County Name')

Unique values: ['SAN FRANCISCO'] 
Unique counts: 1.


###### Location 1

In [66]:
getStatsForColumn(map_of_schools, 'Location 1')

Unique values: ['CA\n(37.7091217, -122.4095459)' 'CA\n(37.7100372, -122.4477539)'
 'CA\n(37.7101822, -122.4180984)' 'CA\n(37.7101822, -122.4340286)'
 'CA\n(37.7104225, -122.4467392)' 'CA\n(37.7122993, -122.4733887)'
 'CA\n(37.7125244, -122.4098969)' 'CA\n(37.7131233, -122.4752960)'
 'CA\n(37.7131310, -122.4231873)' 'CA\n(37.7133789, -122.4095535)'
 'CA\n(37.7134209, -122.4756393)' 'CA\n(37.7138405, -122.4783173)'
 'CA\n(37.7143364, -122.4358292)' 'CA\n(37.7144775, -122.4596329)'
 'CA\n(37.7144966, -122.4655762)' 'CA\n(37.7157249, -122.4345322)'
 'CA\n(37.7157440, -122.4120789)' 'CA\n(37.7167816, -122.4666748)'
 'CA\n(37.7174644, -122.4719696)' 'CA\n(37.7178993, -122.3892212)'
 'CA\n(37.7182655, -122.4809113)' 'CA\n(37.7186584, -122.4078522)'
 'CA\n(37.7187843, -122.4246674)' 'CA\n(37.7191620, -122.3942413)'
 'CA\n(37.7195320, -122.3965607)' 'CA\n(37.7195358, -122.4251480)'
 'CA\n(37.7203407, -122.4291916)' 'CA\n(37.7209511, -122.4296951)'
 'CA\n(37.7211418, -122.4413986)' 'CA\n(37.7213

### Commuter Shuttle Stops

In [26]:
map_of_commutershuttles = pd.read_csv('data/commutershuttles_stops_data.csv')

In [69]:
map_of_commutershuttles.head()

Unnamed: 0,the_geom,OBJECTID,FID_,STOP_ID,LOCATION,BUS_ZONE_T,LENGTH,POSITION,NUB_PARKIN,PARKINGTYP,...,CREATIONDA,LABEL_PLAC,DECAL_WORK,SIGN_WORK,PAINT_WORK,WORK_ORDER,CREATED_USER,CREATED_DATE,LAST_EDITED_USER,LAST_EDITED_DATE
0,POINT (-122.450600008436 37.744389948705),55,,5829,"100 O'Shaughnessy Blvd east side, mid-block",MuniZone,FLAG STOP,Mid-block,0,Bus,...,Pilot,Shelter (old),Shelter,,,03/10/2016 12:00:00 AM +0000,,,AJONLIN,06/30/2017 10:38:44 PM +0000
1,POINT (-122.427289307526 37.751661489099),643,,20049,"24th St & Church St SE corner, far-side, 6-10am",WhiteZone,113,Farside,5,Meter,...,9/25/17,,,,,,AJONLIN,09/25/2017 06:28:09 PM +0000,AJONLIN,09/25/2017 06:30:06 PM +0000
2,POINT (-122.423569008696 37.788519947211),100,,20036,"Franklin St&Bush St NE corner, white zone, 4-8pm",WhiteZone,100,Farside,5,Meter,...,6/6/16,,,,,,,,AJONLIN,06/30/2017 10:38:46 PM +0000
3,POINT (-122.458770009206 37.781079948155),25,,3650,"Arguello Blvd&Geary Blvd SE corner, near-side ...",MuniZone,106,Nearside,0,Bus,...,Pilot,Shelter,Shelter,,,03/10/2016 12:00:00 AM +0000,,,AJONLIN,06/30/2017 10:38:42 PM +0000
4,POINT (-122.414270009673 37.77826994755),86,,20015,"8th St & Market St SW corner, white zone, 6-10...",WhiteZone,85,Farside,4,Meter,...,Pilot,ExistingPole,,,,03/10/2016 12:00:00 AM +0000,,,AJONLIN,09/25/2017 06:19:37 PM +0000


In [70]:
print('Rows: %d, Columns: %d' % (map_of_commutershuttles.shape[0], map_of_commutershuttles.shape[1]))
print(map_of_commutershuttles.columns.values)

Rows: 159, Columns: 27
['the_geom' 'OBJECTID' 'FID_' 'STOP_ID' 'LOCATION' 'BUS_ZONE_T' 'LENGTH'
 'POSITION' 'NUB_PARKIN' 'PARKINGTYP' 'LATITUDE' 'LONGITUDE' 'ARTERIAL'
 'ZONE_HOURS' 'STATUS' 'CURRENTSTO' 'PILOTSTOPE' 'CREATIONDA' 'LABEL_PLAC'
 'DECAL_WORK' 'SIGN_WORK' 'PAINT_WORK' 'WORK_ORDER' 'CREATED_USER'
 'CREATED_DATE' 'LAST_EDITED_USER' 'LAST_EDITED_DATE']


In [71]:
print('Null values: %s.' % identifyNullValues(map_of_commutershuttles))

Null values: {'WORK_ORDER': 26, 'FID_': 159, 'PARKINGTYP': 28, 'PAINT_WORK': 148, 'CURRENTSTO': 10, 'PILOTSTOPE': 12, 'ZONE_HOURS': 107, 'CREATED_USER': 98, 'LENGTH': 1, 'LABEL_PLAC': 33, 'CREATED_DATE': 98, 'DECAL_WORK': 56, 'SIGN_WORK': 159}.


In [73]:
getStatsForColumn(map_of_commutershuttles, 'WORK_ORDER')

Unique values: [nan '03/10/2016 12:00:00 AM +0000' '03/18/2016 12:00:00 AM +0000'
 '04/19/2016 12:00:00 AM +0000'] 
Unique counts: 4.


In [74]:
getStatsForColumn(map_of_commutershuttles, 'FID_')

Unique values: [nan] 
Unique counts: 1.


###### Parking Type

In [75]:
getStatsForColumn(map_of_commutershuttles, 'PARKINGTYP')

Unique values: [nan '24hr' 'Bus' 'Meter' 'RPP' 'Unreg'] 
Unique counts: 6.


In [76]:
getStatsForColumn(map_of_commutershuttles, 'CURRENTSTO')

Unique values: [  0.   1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  13.  14.  16.
  17.  18.  19.  23.  25.  27.  28.  30.  31.  32.  33.  34.  35.  36.
  37.  38.  39.  40.  41.  42.  43.  44.  47.  48.  51.  56.  58.  59.
  61.  64.  69.  70.  74.  77.  81.  82.  85.  89.  90.  92. 100. 101.
 104. 117. 118.  nan] 
Unique counts: 60.


In [77]:
getStatsForColumn(map_of_commutershuttles, 'PILOTSTOPE')

Unique values: [  0.   1.   2.   3.   4.   6.   7.   8.   9.  10.  11.  12.  13.  14.
  16.  17.  18.  19.  20.  21.  22.  23.  24.  26.  27.  28.  29.  30.
  31.  34.  35.  37.  39.  40.  42.  44.  46.  47.  48.  50.  56.  58.
  60.  61.  64.  65.  66.  69.  70.  71.  72.  78.  85.  86.  97. 102.
 103. 142.  nan] 
Unique counts: 59.


###### Zone hours

In [78]:
getStatsForColumn(map_of_commutershuttles, 'ZONE_HOURS')

Unique values: [nan 'AM' 'AM PM' 'PM'] 
Unique counts: 4.


###### Created User

In [79]:
getStatsForColumn(map_of_commutershuttles, 'CREATED_USER')

Unique values: [nan 'AJONLIN'] 
Unique counts: 2.


###### Length

In [80]:
getStatsForColumn(map_of_commutershuttles, 'LENGTH')

Unique values: [nan '100' '100-BI' '106' '107' '110' '112' '113' '116' '118' '119' '120'
 '125' '129' '134' '135' '141' '144' '145' '15' '150' '156' '160'
 '23 to 130 feet south' '244 to 440 feet south' '245'
 '32 to 153 feet west' '40' '45' '50' '55' '55 to 181 feet north' '60'
 '65' '67' '70' '70-115' '73' '75' '75 to 205 feet south' '75-130' '78'
 '80' '81' '82' '85' '88' '89' '90' '90-134' '92' '95' '99' 'FLAG STOP'] 
Unique counts: 54.


###### Label Place

In [81]:
getStatsForColumn(map_of_commutershuttles, 'LABEL_PLAC')

Unique values: [nan 'ExistingPole' 'Landor' 'Landor (vandalized)' 'New' 'NewLandor'
 'Shelter' 'Shelter (old)' 'Shelter + Landor'] 
Unique counts: 9.


###### Created Date

In [82]:
getStatsForColumn(map_of_commutershuttles, 'CREATED_DATE')

Unique values: [nan '01/25/2018 10:18:57 PM +0000' '01/25/2018 10:22:03 PM +0000'
 '01/25/2018 10:23:31 PM +0000' '01/25/2018 10:27:01 PM +0000'
 '01/25/2018 10:46:12 PM +0000' '02/24/2017 02:07:25 AM +0000'
 '06/21/2017 04:52:30 PM +0000' '09/25/2017 06:26:35 PM +0000'
 '09/25/2017 06:28:09 PM +0000' '11/20/2017 04:52:49 PM +0000'
 '12/01/2017 05:44:06 PM +0000' '12/01/2017 05:44:33 PM +0000'
 '12/01/2017 11:09:14 PM +0000'] 
Unique counts: 14.


###### Decal Work

In [84]:
getStatsForColumn(map_of_commutershuttles, 'DECAL_WORK')

Unique values: [nan 'Landor' 'Remove' 'Shelter'] 
Unique counts: 4.


###### Sign Work

In [86]:
getStatsForColumn(map_of_commutershuttles, 'SIGN_WORK')

Unique values: [nan] 
Unique counts: 1.


###### Cleaning the null values

In [87]:
map_of_commutershuttles = map_of_commutershuttles.replace(np.nan, 'N/A')

### OffStreet Parking

In [10]:
offstreetparking = pd.read_csv('data/offstreetparking_data.csv')

In [11]:
offstreetparking.head()

Unnamed: 0,the_geom,OBJECTID,OSP_ID,FACILITY_NAME,STREET_ADDRESS,LOCATION,PHONE,PM_DISTRICT_ID,AREA_TYPE,BLOCKFACE_ID,...,MAIN_ENTRANCE_LAT,CREATED_DT,LAST_UPD_DT,LAST_UPD_USER,LAST_UPD_PGM,GLOBALID,CREATED_USER,CREATED_DATE,LAST_EDITED_USER,LAST_EDITED_DATE
0,POINT (-122.406529411678 37.758368903393),4177,920,SF General Hospital Lot,993 Potrero Ave,Potrero between 21st St & 22nd St,,29.0,-,615091,...,37.758364,02/08/2011 06:51:00 PM +0000,04/20/2011 06:15:13 PM +0000,Randy/Eric,SQL Dev-Upd GEOM from TEST,{87D5C91D-10A8-4B59-91AF-1DB7EB26FCE2},MTA,03/16/2018 11:13:55 PM +0000,MTA,03/16/2018 11:13:55 PM +0000
1,POINT (-122.463870720108 37.763353000101),4171,913,7th and Irving Lot,1340 07th Avenue,7th Ave between Irving & Judah,,24.0,-,107132,...,37.763348,02/08/2011 06:51:00 PM +0000,04/20/2011 06:15:13 PM +0000,Randy/Eric,SQL Dev-Upd GEOM from TEST,{A938FEDB-96C7-4150-9FB5-F1D9A1F6BA81},MTA,03/16/2018 11:13:55 PM +0000,MTA,03/16/2018 11:13:55 PM +0000
2,POINT (-122.432879384429 37.784959687448),4187,936,Japan Center Annex Garage,1680 Fillmore St,Fillmore between Geary & Post,(415) 567-4573,5.0,Pilot,415162,...,37.784955,02/08/2011 06:51:00 PM +0000,10/14/2016 04:27:23 PM +0000,ddunham2,OSP Manager Tool,{6041F578-88EE-4BF7-8908-B11BAC0E7E66},MTA,03/16/2018 11:13:55 PM +0000,MTA,03/16/2018 11:13:55 PM +0000
3,POINT (-122.417780328727 37.780251512173),4182,931,Civic Center Garage,355 McAllister Street,McAllister between Larkin & Polk,(415) 863-1537,3.0,Pilot,563031,...,37.780247,02/08/2011 06:51:00 PM +0000,10/14/2016 12:38:54 PM +0000,ddunham2,OSP Manager Tool,{09446F0D-4E9D-425A-B2E5-A246525EA3D1},MTA,03/16/2018 11:13:55 PM +0000,MTA,03/16/2018 11:13:55 PM +0000
4,POINT (-122.387063310206 37.775318866354),3886,890,Pier 48 Lot,Terry A Francois Blvd,Terry Francois @ Pier 48,,2.0,Pilot,869011,...,37.775314,02/08/2011 06:51:00 PM +0000,04/20/2011 06:15:13 PM +0000,Randy/Eric,SQL Dev-Upd GEOM from TEST,{7FF1C047-093F-4844-B52C-DA4265527CEE},MTA,03/16/2018 11:13:55 PM +0000,MTA,03/16/2018 11:13:55 PM +0000


In [12]:
print('Rows: %d, Columns: %d' % (offstreetparking.shape[0], offstreetparking.shape[1]))
print(offstreetparking.columns.values)

Rows: 45, Columns: 43
['the_geom' 'OBJECTID' 'OSP_ID' 'FACILITY_NAME' 'STREET_ADDRESS'
 'LOCATION' 'PHONE' 'PM_DISTRICT_ID' 'AREA_TYPE' 'BLOCKFACE_ID'
 'STREET_SEG_CTRLN_ID' 'FACILITY_TYPE' 'OWNER' 'SENSOR_FLAG' 'METER_FLAG'
 'DATA_FEED_FLAG' 'SERVICES' 'WEB_SITE' 'VEH_ENTRY_LANES' 'MC_ENTRY_LANES'
 'VEH_EXIT_LANES' 'MC_EXIT_LANES' 'SYSTEM' 'HI_VOL_DISC_FLAG'
 'VALIDATION_PGM' 'SP_EVT_RATES' 'ACTIVATION_FEE' 'CARD_REPLACE_FEE'
 'LATE_FEE' 'REOPEN_FEE' 'NO_KEY_VALET_FEE' 'CAPACITY'
 'MAIN_ENTRANCE_LONG' 'MAIN_ENTRANCE_LAT' 'CREATED_DT' 'LAST_UPD_DT'
 'LAST_UPD_USER' 'LAST_UPD_PGM' 'GLOBALID' 'CREATED_USER' 'CREATED_DATE'
 'LAST_EDITED_USER' 'LAST_EDITED_DATE']


In [13]:
print('Null values: %s.' % identifyNullValues(offstreetparking))

Null values: {'WEB_SITE': 39, 'SYSTEM': 25, 'PM_DISTRICT_ID': 1, 'PHONE': 25, 'VALIDATION_PGM': 39, 'SERVICES': 25, 'STREET_ADDRESS': 1}.


In [14]:
getStatsForColumn(offstreetparking, 'the_geom')

Unique values: ['POINT (-122.385983209193 37.769634946593)'
 'POINT (-122.386004109541 37.770187116626)'
 'POINT (-122.387063310206 37.775318866354)'
 'POINT (-122.394536795306 37.796378795)'
 'POINT (-122.398616414123 37.795446487676)'
 'POINT (-122.399492713075 37.784528736752)'
 'POINT (-122.404532710042 37.754192043535)'
 'POINT (-122.404560814648 37.792004216999)'
 'POINT (-122.404819608283 37.729769391774)'
 'POINT (-122.405051942706 37.794870494286)'
 'POINT (-122.405550773113 37.783282619205)'
 'POINT (-122.405649379362 37.775347195944)'
 'POINT (-122.406529411678 37.758368903393)'
 'POINT (-122.406868834313 37.78978786967)'
 'POINT (-122.407167272538 37.786397491172)'
 'POINT (-122.407461161246 37.787683860832)'
 'POINT (-122.409500685065 37.79838644797)'
 'POINT (-122.409768460947 37.798676927151)'
 'POINT (-122.417519912833 37.752094292733)'
 'POINT (-122.417780328727 37.780251512173)'
 'POINT (-122.420113816683 37.78852168545)'
 'POINT (-122.420446776851 37.756844984844)'
 

###### object id

In [15]:
getStatsForColumn(offstreetparking, 'OBJECTID')

Unique values: [3886 3887 3888 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171
 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185
 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199
 4200 4201 4202] 
Unique counts: 45.


###### Facility name

In [17]:
getStatsForColumn(offstreetparking, 'FACILITY_NAME')

Unique values: ['16th and Hoff Garage' '18th and Collingwood Lot' '18th and Geary Lot'
 '19th and Ocean Lot' '20th and Irving Lot' '21st and Geary Lot'
 '24th and Capp Lot' '24th and Noe Lot' '7th & Harrison Lot'
 '7th and Irving Lot' '8th and Clement Lot' '8th and Irving Lot'
 '9th and Clement Lot' 'California and Steiner Lot' 'Castro Theater Lot'
 'Civic Center Garage' 'Claremont and Ulloa Lot' 'Ellis OFarrell Garage'
 'Felton and San Bruno Lot' 'Fifth and Mission Garage'
 'Golden Gateway Garage' 'Japan Center Annex Garage' 'Japan Center Garage'
 'Junipero Serra and Ocean Lot' 'Lombard Street Garage'
 'Mission Bartlett Garage' 'Mission and Norton Lot'
 'Moscone Center Garage' 'North Beach Garage' 'Performing Arts Garage'
 'Phelan Loop Lot' 'Pier 1/2 Motorcycle Lot' 'Pier 48 Lot' 'Pier 52 Lot'
 'Pier 52 Lot B' 'Pierce Street Garage' 'Polk Bush Garage'
 'Portsmouth Square Garage' 'SF General Hospital Garage'
 'SF General Hospital Lot' 'St. Marys Square Garage'
 'Sutter Stockton Garage'

###### Street Address

In [20]:
getStatsForColumn(offstreetparking, 'STREET_ADDRESS')

Unique values: [nan '1000 Ocean Avenue' "123 O'Farrell Street" '1275 20th Avenue'
 '1325 08th Avenue' '1340 07th Avenue' '1399 Bush Street'
 '1610 Geary Blvd' '1680 Fillmore St' '174 West Portal Avenue'
 '20 Norton Street' '2055 Lombard Street' '2450 California Street'
 '25 Felton Street' '250 Clay Street' '2500 Ocean Avenue'
 '2501 23rd Street' '255 3rd Street' '3000 19th Avenue' '324 08th Avenue'
 '3252 Pierce Street' '3255 21st Street' '3255 24th Street'
 '330 09th Avenue' '333 Post Street' '355 McAllister Street'
 '360 Grove Street' '4061 24th Street' '4116 18th Street' '415 7th Street'
 '421 18th Avenue' '433 Kearny Street' '44 Hoff Street'
 '444 Stockton Street' '457 Castro Street' '5732 Geary Blvd'
 '601 Terry A Francois Blvd' '733 Kearny Street' '735 Vallejo Street'
 '766 Vallejo Street' '807 Ulloa' '833 Mission Street' '993 Potrero Ave'
 'Terry A Francois Blvd'] 
Unique counts: 44.


###### Location

In [19]:
getStatsForColumn(offstreetparking, 'LOCATION')

Unique values: ['18th Ave between Geary & Anza' '18th between Castro & Collingwood'
 '20th Ave between Lincoln & Irving' '21st St between Mission & Valencia'
 '24th St between Noe & Castro' '24th St between San Bruno & Utah'
 '3rd St between Folsom & Howard' '7th Ave between Irving & Judah'
 '7th St between Harrison & Bryant' '8th Ave between Clement & Geary'
 '8th Ave between Irving & Judah' '9th Ave between Clement & Geary'
 'Bush between Polk & Larkin' 'Bush between Stockton & Grant'
 'California between Fillmore & Steiner' 'Castro between 17th & 18th'
 'Clay between Front & Davis' 'Felton between San Bruno & Girard'
 'Fillmore between Geary & Post' 'Geary between 21st Ave & 22nd Ave'
 'Geary between Laguna & Webster' 'Geary between Stockton & Powell'
 'Grove between Franklin & Gough' 'Hoff between 16th St & 17th St'
 'Kearny between Clay & Washington' 'Kearny between Pine & California'
 'Lilac @ 24th St' 'McAllister between Larkin & Polk'
 'Mission between 5th and 4th Streets'
 'Mo

###### Phone

In [21]:
getStatsForColumn(offstreetparking, 'PHONE')

Unique values: [nan '(415) 206-4907' '(415) 252-8238' '(415) 397-0631' '(415) 399-9564'
 '(415) 433-4722' '(415) 440-1984' '(415) 558-9052' '(415) 567-4573'
 '(415) 567-7816' '(415) 777-2782' '(415) 821-6715' '(415) 861-4048'
 '(415) 863-1537' '(415) 956-8106' '(415) 982-6353' '(415) 982-8370'
 '(415) 982-8522' '(415) 986-4800' '(415) 989-4490'] 
Unique counts: 20.


######  district id

In [22]:
getStatsForColumn(offstreetparking, 'PM_DISTRICT_ID')

Unique values: [ 1.  2.  3.  4.  5.  7.  9. 10. 12. 15. 16. 21. 24. 28. 29. 31. 33. 35.
 36. nan] 
Unique counts: 20.


######  area type

In [23]:
getStatsForColumn(offstreetparking, 'AREA_TYPE')

Unique values: ['-' 'Control' 'Pilot'] 
Unique counts: 3.


######  blockface id

In [24]:
getStatsForColumn(offstreetparking, 'BLOCKFACE_ID')

Unique values: [107132 108032 108131 109032 118041 120121 203021 207042 218412 221321
 223251 224401 336131 350242 352041 359022 415162 416001 440162 440572
 446032 473002 520041 520071 549201 563031 568081 584002 591102 591252
 591261 593011 607322 614031 615091 664041 690081 705071 705072 725012
 798001 830012 869011 869051] 
Unique counts: 44.


###### street segment

In [25]:
getStatsForColumn(offstreetparking, 'STREET_SEG_CTRLN_ID')

Unique values: [  168000   360000   374000   389000   396000   425000   830000   892000
  1005000  1111000  1250000  1362000  3443000  3558000  3794000  4081000
  5462000  5547000  6040201  6078201  6556000  6935000  7754000  7758000
  8331000  8459101  8893000  9096000  9714000  9722000  9819201  9847000
  9849000 10445000 10628000 10667101 12199001 12531000 12532004 12553201
 12784000 13095000 13096000 13578201] 
Unique counts: 44.


###### facility type

In [26]:
getStatsForColumn(offstreetparking, 'FACILITY_TYPE')

Unique values: ['G' 'L'] 
Unique counts: 2.


###### owner

In [27]:
getStatsForColumn(offstreetparking, 'OWNER')

Unique values: ['CALTRANS' 'Port' 'RPD' 'SFMTA'] 
Unique counts: 4.


######  sensor flag

In [28]:
getStatsForColumn(offstreetparking, 'SENSOR_FLAG')

Unique values: ['N' 'Y'] 
Unique counts: 2.


### Public park open space data

In [29]:
park_open_space_map = pd.read_csv('data/public_park_and_open_space_data.csv')

In [30]:
park_open_space_map.head()

Unnamed: 0,ParkName,ParkType,ParkServiceArea,PSAManager,email,Number,Zipcode,Acreage,SupDist,ParkID,Location 1,Lat
0,ParkName,ParkType,ParkServiceArea,PSAManager,email,Number,,,,,,
1,10TH AVE/CLEMENT MINI PARK,Mini Park,PSA 1,"Elder, Steve",steven.elder@sfgov.org,(415) 601-6501,94118.0,0.66,1.0,156.0,"351 9th Ave\nSan Francisco, CA\n(37.78184397, ...",
2,15TH AVENUE STEPS,Mini Park,PSA 4,"Sheehy, Chuck",charles.sheehy@sfgov.org,(415) 218-2226,94122.0,0.26,7.0,185.0,"15th Ave b w Kirkham\nSan Francisco, CA\n(37.7...",
3,24TH/YORK MINI PARK,Mini Park,PSA 6,"Field, Adrian",adrian.field@sfgov.org,(415) 717-2872,94110.0,0.12,9.0,51.0,"24th\nSan Francisco, CA\n(37.75306042, -122.40...",
4,29TH/DIAMOND OPEN SPACE,Neighborhood Park or Playground,PSA 5,"O'Brien, Teresa",teresa.o'brien@sfgov.org,(415) 819-2699,94131.0,0.82,8.0,194.0,"Diamond\nSan Francisco, CA\n(37.74360211, -122...",


In [96]:
print('Rows: %d, Columns: %d' % (park_open_space_map .shape[0], park_open_space_map.shape[1]))

Rows: 230, Columns: 12


In [97]:
print(park_open_space_map.columns.values)

['ParkName' 'ParkType' 'ParkServiceArea' 'PSAManager' 'email' 'Number'
 'Zipcode' 'Acreage' 'SupDist' 'ParkID' 'Location 1' 'Lat']


In [98]:
print('Null values: %s.' % identifyNullValues(park_open_space_map))

Null values: {'SupDist': 3, 'Zipcode': 7, 'Acreage': 1, 'ParkID': 1, 'Lat': 230, 'Location 1': 28}.


######  Parkname

In [31]:
getStatsForColumn(park_open_space_map, 'ParkName')

Unique values: ['10TH AVE/CLEMENT MINI PARK' '15TH AVENUE STEPS' '24TH/YORK MINI PARK'
 '29TH/DIAMOND OPEN SPACE' 'ADAM ROGERS PARK' 'ALAMO SQUARE'
 'ALICE CHALMERS PLAYGROUND' 'ALICE MARBLE TENNIS COURTS'
 'ALIOTO MINI PARK' 'ALLYNE PARK' 'ALTA PLAZA'
 'ANGELO J. ROSSI PLAYGROUND' 'APTOS PLAYGROUND' 'ARGONNE PLAYGROUND'
 'Arkansas Friendship Garden' 'Arlington Community Garden'
 'BALBOA NATURAL AREA' 'BALBOA PARK' 'BAY VIEW PARK' 'BAY VIEW PLAYGROUND'
 "BEIDEMAN/O'FARRELL MINI PARK" 'BERKELEY WAY OPEN SPACE'
 'BERNAL HEIGHTS PARK' 'BERNAL HEIGHTS RECREATION CENTER'
 'BILLY GOAT HILL' 'BROADWAY TUNNEL EAST MINI PARK'
 'BROADWAY TUNNEL WEST MINI PARK' 'BROOKS PARK'
 'BROTHERHOOD/CHESTER MINI PARK' 'BUCHANAN STREET MALL' 'BUENA VISTA PARK'
 'BUSH/BRODERICK MINI PARK' 'CABRILLO PLAYGROUND' 'CAMP MATHER'
 'CANDLESTICK PARK' 'CARL LARSEN PARK' 'CAYUGA PLAYGROUND'
 'CAYUGA/LAMARTINE MINI PARK' 'CHESTNUT/KEARNY OPEN SPACE'
 'CHINESE RECREATION CENTER' 'COLERIDGE MINI PARK'
 'COLLIS P. HUNTING

######  park type

In [32]:
getStatsForColumn(park_open_space_map, 'ParkType')

Unique values: ['Civic Plaza or Square' 'Community Garden' 'Concession' 'Family Camp'
 'Mini Park' 'Neighborhood Park or Playground' 'ParkType' 'Parkway'
 'Regional Park' 'Zoological Garden'] 
Unique counts: 10.


###### Park Service Area

In [33]:
getStatsForColumn(park_open_space_map, 'ParkServiceArea' )

Unique values: ['Candlestick Park Stadium' 'Golden Gate Park' 'Outside SF' 'PSA 1'
 'PSA 2' 'PSA 3' 'PSA 4' 'PSA 5' 'PSA 6' 'ParkServiceArea'] 
Unique counts: 10.


######  PSA Manager

In [34]:
getStatsForColumn(park_open_space_map, 'PSAManager' )

Unique values: ['Castile, Steve' 'Cleveland, Maggie' 'Deasy, Jon' 'Dennis, Brent'
 'Elder, Steve' 'Field, Adrian' 'Figone, Joe' 'Gay, Mike'
 'Giammattei, Joe' 'Hill, Eric' 'Koch-Gonzalez, Gloria'
 'Lockwood, Darlene' 'Martin, York (Acting)' 'McCormick, James'
 'Miller, John' "O'Brien, Teresa" "O'Connor, Tom" 'PSAManager'
 'Scott, Ronnie' 'Sheehy, Chuck' 'Sheets, Robert' 'Stone, Andy'
 'Taylor, Zack' 'Watkins, Robert' 'Wayne, Lisa'] 
Unique counts: 25.


######  EMail

In [35]:
getStatsForColumn(park_open_space_map, 'email' )

Unique values: ['adrian.field@sfgov.org' 'andy.stone@sfgov.org' 'brent.dennis@sfgov.org'
 'charles.sheehy@sfgov.org' 'darlene.lockwood@sfgov.org' 'email'
 'eric.hill@sfgov.org' 'gloria.koch-gonzalez@sfgov.org'
 'james.mccormick@sfgov.org' 'joe.giammattei@sfgov.org'
 'john.miller@sfgov.org' 'jon.deasy@sfgov.org' 'joseph.figone@sfgov.org'
 'lisa.wayne@sfgov.org' 'maggie.cleveland@sfgov.org'
 'michael.gay@sfgov.org' 'robert.sheets@sfgov.org'
 'robert.watkins@sfgov.org' 'ronnie.scott@sfgov.org'
 'steve.castile@sfgov.org' 'steven.elder@sfgov.org'
 "teresa.o'brien@sfgov.org" "tom.o'connor@sfgov.org"
 'york.martin@sfgov.org' 'zack.taylor@sfgov.org'] 
Unique counts: 25.


###### NUmber

In [36]:
getStatsForColumn(park_open_space_map, 'Number')

Unique values: ['(415) 218-0259' '(415) 218-2226' '(415) 218-4786' '(415) 235-4576'
 '(415) 254-8014' '(415) 254-8030' '(415) 504-7923' '(415) 513-2577'
 '(415) 601-6501' '(415) 601-7277' '(415) 666-7002' '(415) 666-7003'
 '(415) 666-7004' '(415) 666-7092' '(415) 695-5004' '(415) 713-4997'
 '(415) 717-2872' '(415) 753-7040' '(415) 753-7094' '(415) 753-7271'
 '(415) 819-2699' '(415) 819-6138' '(415) 831-6307' '(415) 841-0856'
 'Number'] 
Unique counts: 25.


###### Zipcode

In [37]:
getStatsForColumn(park_open_space_map, 'Zipcode')

Unique values: [94044. 94102. 94103. 94105. 94107. 94108. 94109. 94110. 94111. 94112.
 94114. 94115. 94116. 94117. 94118. 94121. 94122. 94123. 94124. 94127.
 94131. 94132. 94133. 94134. 95321.    nan] 
Unique counts: 26.


###### acreage

In [38]:
getStatsForColumn(park_open_space_map, 'Acreage')

Unique values: [0.00000e+00 3.00000e-02 5.00000e-02 6.00000e-02 7.00000e-02 8.00000e-02
 9.00000e-02 1.00000e-01 1.10000e-01 1.20000e-01 1.30000e-01 1.40000e-01
 1.50000e-01 1.60000e-01 1.80000e-01 2.10000e-01 2.20000e-01 2.30000e-01
 2.50000e-01 2.60000e-01 2.90000e-01 3.00000e-01 3.10000e-01 3.20000e-01
 3.40000e-01 3.50000e-01 3.70000e-01 4.10000e-01 4.40000e-01 4.50000e-01
 5.10000e-01 5.30000e-01 5.50000e-01 5.90000e-01 6.10000e-01 6.30000e-01
 6.50000e-01 6.60000e-01 7.10000e-01 7.40000e-01 7.50000e-01 7.80000e-01
 7.90000e-01 8.00000e-01 8.10000e-01 8.20000e-01 8.30000e-01 8.40000e-01
 8.90000e-01 9.30000e-01 9.60000e-01 9.70000e-01 1.02000e+00 1.03000e+00
 1.07000e+00 1.11000e+00 1.12000e+00 1.20000e+00 1.23000e+00 1.29000e+00
 1.31000e+00 1.40000e+00 1.47000e+00 1.50000e+00 1.52000e+00 1.53000e+00
 1.57000e+00 1.61000e+00 1.68000e+00 1.74000e+00 1.81000e+00 1.83000e+00
 1.84000e+00 1.91000e+00 1.93000e+00 2.01000e+00 2.03000e+00 2.05000e+00
 2.13000e+00 2.20000e+00 2.21000e+00

In [39]:
getStatsForColumn(park_open_space_map, 'SupDist')

Unique values: [ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. nan] 
Unique counts: 12.


######  Park id

In [40]:
getStatsForColumn(park_open_space_map, 'ParkID')

Unique values: [1.00000e+00 2.00000e+00 3.00000e+00 4.00000e+00 5.00000e+00 6.00000e+00
 7.00000e+00 8.00000e+00 9.00000e+00 1.00000e+01 1.20000e+01 1.30000e+01
 1.40000e+01 1.50000e+01 1.60000e+01 1.70000e+01 1.80000e+01 1.90000e+01
 2.00000e+01 2.10000e+01 2.20000e+01 2.30000e+01 2.40000e+01 2.50000e+01
 2.60000e+01 2.70000e+01 2.80000e+01 2.90000e+01 3.00000e+01 3.10000e+01
 3.20000e+01 3.30000e+01 3.40000e+01 3.50000e+01 3.60000e+01 3.70000e+01
 3.80000e+01 3.90000e+01 4.00000e+01 4.10000e+01 4.20000e+01 4.30000e+01
 4.40000e+01 4.50000e+01 4.60000e+01 4.70000e+01 4.80000e+01 4.90000e+01
 5.00000e+01 5.10000e+01 5.20000e+01 5.30000e+01 5.40000e+01 5.50000e+01
 5.60000e+01 5.70000e+01 5.80000e+01 5.90000e+01 6.00000e+01 6.10000e+01
 6.20000e+01 6.30000e+01 6.40000e+01 6.50000e+01 6.60000e+01 6.70000e+01
 6.80000e+01 6.90000e+01 7.00000e+01 7.10000e+01 7.20000e+01 7.30000e+01
 7.40000e+01 7.50000e+01 7.60000e+01 7.70000e+01 7.80000e+01 7.90000e+01
 8.00000e+01 8.10000e+01 8.20000e+01

###### Location 1

In [41]:
getStatsForColumn(park_open_space_map, 'Location 1')

Unique values: [nan '1 11th Ave\nSan Francisco, CA\n(37.78724257, -122.4690454)'
 '1 Miley St\nSan Francisco, CA\n(37.79733829, -122.44485143)'
 '1 Sharp Park Rd\nPacifica, CA\n(37.62439438, -122.48189132)'
 '1 Zoo Rd\nSan Francisco, CA\n(37.73158916, -122.50380857)'
 '100 Collingwood St\nSan Francisco, CA\n(37.75978957, -122.43648069)'
 '100 Cortland Ave\nSan Francisco, CA\n(37.74002499, -122.42097734)'
 '1016 Laguna\nSan Francisco, CA\n(37.78084683, -122.425427)'
 '1101 Washington St\nSan Francisco, CA\n(37.79420965, -122.41178577)'
 '1180 Stanyan St\nSan Francisco, CA\n(37.76259314, -122.45147654)'
 '1201 Broadway\nSan Francisco, CA\n(37.79643576, -122.41742319)'
 '12th Ave (off Pacheco\nSan Francisco, CA\n(37.74998538, -122.46940674)'
 '1395 Mendell St\nSan Francisco, CA\n(37.73453491, -122.389412)'
 '1398 Hudson St\nSan Francisco, CA\n(37.73876792, -122.38450221)'
 '14th Ave Rivera\nSan Francisco, CA\n(37.74657651, -122.46872941)'
 '15th Ave\nSan Francisco, CA\n(37.75507227, -122.

In [42]:
getStatsForColumn(park_open_space_map, 'Lat')

Unique values: [nan] 
Unique counts: 1.


In [43]:
park_open_space_map = park_open_space_map .replace(np.nan, 'N/A')

### Privately owned Public open spaces

In [44]:
spaces = pd.read_csv('data/privately_owned_public_open_spaces_data.csv')


In [45]:

spaces.head()


Unnamed: 0,NAME,POPOS_ADDR,DNTWN_PLAN,TYPE,CASE_NO,HOURS,LANDSCAPIN,SEATING_No,FOOD_SERVI,Art,...,LOCATION,YEAR,SOURCE,Food,Seating,Restroom,Descriptio,Seating_an,Hours_Type,the_geom
0,343 Sansome St,343 Sansome St,,"View Terrace, Public Sitting Area",1985.079X,10:00am - 5:00pm; Mon-Friday,Trees and plants,"18 chairs, 50 linear seats on planter box sizes.",Food provided along Leidesdorff Street,Yes,...,"15th floor terrace, and on top of the ""old off...",1990,"DT Plan 2009, SPUR, POPOS Mailing List",,Y,,This fifteenth floor open space has excellent ...,,Open Business Hours,"(37.79374000000007, -122.40145999999999)"
1,120 Howard St,120 Howard St,,"Uban Garden, Snippet",2006.0616,8AM - 6PM,plants,None.,Cart,,...,interior courtyard between the building and th...,"existed before, updates conditioned 2006",POPOS Mailing List,Y,,,This space features plants along the entirety ...,,Open Business Hours,"(37.79107000000005, -122.39304999999996)"
2,100 1st St,100 1st St,,"Sun Terrace, Indoor Park, Snippet",1983.331,Ground level arcade space accessible at all ti...,Trees and plants,30 seats at 10 tables plus numerous areas to s...,At ground level,Yes,...,Sun Terrace is on second floor above adjoining...,1985,"SPUR, DT Plan 2009, Motion",Y,Y,,The large Sun Terrace features extensive lands...,Y,Open At All Times,"(37.789100000000076, -122.39796999999999)"
3,Commercial Street (235 Pine Entitlement),235 Pine St,,lunchtime conversion. Commission was satisfied...,84.432X,,Plants,,,Yes,...,commercial street between montgomery and sansome,,POPOS Mailing List,,,,Linear park improvements along Commercial Stre...,,Unknown,"(37.79439000000008, -122.40227999999996)"
4,555 California St,555 California St,,Urban Garden,,Open at all times.,,Yes- 24 small wooden benches located at elevat...,,,...,Located at the elevated plaza along California...,1969,,,,,,,Open At All Times,"(37.79245979800004, -122.40395113799997)"


In [46]:


print('Rows: %d, Columns: %d' % (spaces.shape[0], spaces.shape[1]))
print(spaces.columns.values)


Rows: 78, Columns: 22
['NAME' 'POPOS_ADDR' 'DNTWN_PLAN' 'TYPE' 'CASE_NO' 'HOURS' 'LANDSCAPIN'
 'SEATING_No' 'FOOD_SERVI' 'Art' 'RESTROOMS' 'Accessibil' 'LOCATION'
 'YEAR' 'SOURCE' 'Food' 'Seating' 'Restroom' 'Descriptio' 'Seating_an'
 'Hours_Type' 'the_geom']


In [47]:

print('Null values: %s.' % identifyNullValues(spaces))



Null values: {'LOCATION': 27, 'Seating': 34, 'DNTWN_PLAN': 78, 'Art': 46, 'Accessibil': 48, 'Food': 43, 'Descriptio': 6, 'Seating_an': 56, 'CASE_NO': 36, 'RESTROOMS': 27, 'HOURS': 29, 'SOURCE': 37, 'LANDSCAPIN': 36, 'FOOD_SERVI': 21, 'YEAR': 9, 'Restroom': 65, 'TYPE': 2, 'SEATING_No': 25}.


In [48]:
getStatsForColumn(spaces, 'LOCATION')


Unique values: [nan
 '"ground level exterior open space in the form of plazas and walkways"'
 '15th floor terrace, and on top of the "old office tower" (could be one and the same). Also behind the building Leidesdorff Street, the street is closed during lunch hours'
 '4th floor, turn left from elevator bay, walk beyond convention/meeting rooms, turn left'
 '6th Floor' '6th floor, entrance next to elevator bay'
 'Along commercial street and at 642 commercial st' 'Below street level'
 'Bench next to entrance setback 15 feet on 2nd Street.  Per Thayer:  Benches on sidewalk setback on Folsom.'
 'Commercial Street between Drumm and Sansome'
 'Entrance from Stevenson Street' 'Ground floor' 'Ground floor lobby'
 'Ground level.' 'Ground/Mezz level at 2nd/mission'
 'In front of building facing Second Street'
 'In front of building on Stevenson Street and through a pedestrianized alley to Jessie Street'
 'In front of building, sidewalk widening' 'Inside the lobby'
 'Lobby entrance'
 "Located alo

In [49]:
getStatsForColumn(spaces, 'Seating')

Unique values: [nan 'N' 'Y'] 
Unique counts: 3.


In [50]:
getStatsForColumn(spaces, 'DNTWN_PLAN')

Unique values: [nan] 
Unique counts: 1.


In [145]:
getStatsForColumn(spaces, 'Art')

Unique values: [nan 'No' 'Yes'] 
Unique counts: 3.


In [146]:
getStatsForColumn(spaces,  'Accessibil')

Unique values: [nan '13080' 'Direct' 'Direct for plaza. Indirect for indoor park'
 'Direct, indirect' 'Indirect' 'Indirect for Atrium. Unknown for Plaza'
 'Indirect through lobby elevator?' 'direct'] 
Unique counts: 9.


In [147]:
getStatsForColumn(spaces, 'Food')

Unique values: [nan 'N' 'Y'] 
Unique counts: 3.


In [148]:
getStatsForColumn(spaces, 'Descriptio')

Unique values: [nan 'A cul-de-sac walkway with benches at the end.'
 'A shady open space area with art in the form of sculptures. This snippet found on the northwest side of the building, up the stairs. It adjoins the open space of the adjacent building.'
 'An enclosed open space, with operable floor to ceiling windows and doors; located within the street-level lobby.'
 'An enclosed open space, with operable sliding glass doors that open up to street frontage; located within the street-level lobby.'
 'An open plaza between buildings with benches, trees and some landscape materials.'
 'An open plaza with some seating, landscaping and planters.'
 'Brick plaza with some trees, artwork and seating on brick structures.'
 'Built with marble, and covered with glass, this greenhouse provides ample seating and tables.'
 'Commercial Street throughout Embarcadero 1, 2, and 3 is a pedestrian mall on ground floor and 2nd level, which is accessed by an elaborate staircase system, flanked by waterfal

In [149]:
getStatsForColumn(spaces, 'CASE_NO')

Unique values: [nan '1981.183ED' '1981.249' '1982.463C' '1983.075' '1983.222' '1983.331'
 '1984.397' '1985.079X' '1986.085' '1986.223' '1988.530Q' '1989.589'
 '1997.215' '1997.484X' '1997.689X' '1998.084X' '1998.090B' '1998.144'
 '1998.321X' '1998.902' '1999.176X' '2000.074' '2000.790X' '2001.0798'
 '2004.0165' '2006.0616' '2006.066' '2006.1106' '2006.1524' '2008.0001'
 '84.432X' 'CU75.32' 'CU81.005'
 'modified: 2005.0198X, original 2001.0792X'] 
Unique counts: 35.


In [150]:
getStatsForColumn(spaces, 'Seating_an')

Unique values: [nan 'N' 'Y'] 
Unique counts: 3.


In [151]:
getStatsForColumn(spaces, 'RESTROOMS')

Unique values: [nan 'Hotel. Could use the lobby restrooms'
 'Hotel. Could use the lobby restrooms etc'
 'Located in hallway between lobby and indoor park' 'No'
 'No signs for any' 'None' 'None marked' 'None observed' 'None visible'
 'None, but public restrooms downstairs' 'Restaurant' 'Yes' 'Yes, hotel'
 'Yes, inside One Market Plaza at Mission St. entrance'
 'Yes, through main lobby at reception desk (ID required)'] 
Unique counts: 16.


In [152]:
getStatsForColumn(spaces, 'HOURS')

Unique values: [nan '10:00am - 5:00pm; Mon-Friday' '8:00am - 5:00pm' '8:00am - 6:00pm'
 '8AM - 6PM' '8am-5pm, M-F' '8am-6pm, M-F' '9:00am - 5:00pm'
 '9:00am - 6:00pm'
 'Ground level arcade space accessible at all times, Sun Terrace open during daylight hours.'
 'M-F 8:00AM-6:00PM' 'Open at all times' 'Open at all times.'] 
Unique counts: 13.


In [153]:
getStatsForColumn(spaces, 'SOURCE')

Unique values: [nan 'DT Plan 2009, POPOS Mailing List'
 'DT Plan 2009, SPUR, POPOS Mailing List'
 'DT Plan 2009, SPUR, POPOS Mailing List, Motion' 'POPOS Mailing List'
 'SPUR' 'SPUR, DT Plan 2009' 'SPUR, DT Plan 2009, Motion'] 
Unique counts: 8.


In [154]:
getStatsForColumn(spaces, 'LANDSCAPIN')

Unique values: [nan 'Hedges and other types of plants.' 'No' 'None' 'Planter: trees'
 'Planting and flowers' 'Plants' 'Plants and flowers'
 'Plants, trees, grass' 'Pots of flowers' 'Redwood trees.'
 'Street trees, planters with bushes, flowers' 'Trees' 'Trees and flowers'
 'Trees and plants' 'Trees, ferns, arbor, planters'
 'Water fountain and lots of plants and shade.'
 'Willow Trees and planting.' 'bamboo, other trees.'
 'bamboo, street trees. Indoor park has bushes.' 'flowers' 'planter box'
 'planter boxes, trees art sculpture' 'plants' 'trees'
 'trees and waterfall' 'trees plants, boulders'] 
Unique counts: 27.


In [155]:
getStatsForColumn(spaces, 'FOOD_SERVI')

Unique values: [nan 'At ground level' 'Caf\xc3\xa9' 'Caf\xc3\xa9 on one snippet.' 'Cart'
 'Empty retail on the plaza' 'Food provided along Leidesdorff Street' 'No'
 'No caf\xc3\xa9' 'None' 'None, but food on the premises' 'None.'
 'Restaurant service nearby, starbucks outside'
 'Restaurants located adjacent to Open Space.' 'Sandwich shop'
 'Several restaurants and cafes located within One Market Plaza, open M-F business hours'
 'There is food service serving the commercial street POPOS, but not directly serving that of Empire park.'
 'Yes' 'Yes - Caf\xc3\xa9' 'Yes caf\xc3\xa9' 'Yes, cafe'
 'Yes, caf\xc3\xa9/deli' 'Yes- Caf\xc3\xa9' 'Yes.'
 'Yes; all along the alley' 'caf\xc3\xa9'] 
Unique counts: 26.


In [156]:
getStatsForColumn(spaces, 'YEAR')

Unique values: [nan '1959' '1960' '1965' '1968' '1969' '1970' '1972' '1973' '1974' '1976'
 '1979' '1980' '1981' '1982' '1983' '1984' '1985' '1986' '1988' '1989'
 '1990' '2000' '2001' '2002' '2003' '2005' '2006' '2008' '2016'
 'existed before, updates conditioned 2006'] 
Unique counts: 31.


In [157]:
getStatsForColumn(spaces, 'Restroom')

Unique values: [nan 'Y'] 
Unique counts: 2.


In [158]:
getStatsForColumn(spaces, 'TYPE')

Unique values: [nan 'Atrium' 'Atrium, Plaza' 'Greenhouse' 'Indoor Park'
 'Interior courtyard area - does not appear to be public (see notes)'
 'Interior open space at ground level, specific type not specified in motion.'
 'Lobby' 'Outdoor Plaza and "Indoor Park"' 'Pedestrian Walkway'
 'Pedestrian Walkways' 'Pedestrian walkway on ground and 2nd levels'
 'Pedestrian-only alleyway' 'Plaza' 'Plaza, Indoor Park'
 'Public Sitting Area in Pedestrian Walkway' 'Snippet' 'Sun Terrace'
 'Sun Terrace, Indoor Park, Snippet' 'Uban Garden, Snippet' 'Urban Gardem'
 'Urban Garden' 'Urban Park' 'View Terrace'
 'View Terrace, Public Sitting Area'
 'lunchtime conversion. Commission was satisfied that it met the guidelins.'] 
Unique counts: 26.


In [159]:
getStatsForColumn(spaces, 'SEATING_No')

Unique values: [nan '100 linear seats and benches.' '12 chairs w/ tables'
 '122 linear seats' '15 chairs'
 '15 chairs w/ tables, 8 bench seats in the park, 8 along commercial.'
 '15 chairs, 24 linear seat on  ledge, 45 linear seats on planter boxes sides.'
 '16 small benches' '18 chairs, 50 linear seats on planter box sizes.'
 '20 chairs' '20 linear seats' '23 linear seats.' '24 linear'
 '24 linear seats'
 '25 linear seats on bench/  ledge. 23 tables, 69 chairs'
 '30 seats at 10 tables plus numerous areas to sit along built-in benches and planters, grassy areas.'
 '30 tables, 107 chairs, plus planter-side seating, benches.'
 "40 chairs w/ 10 tables, Linear Seating: only 10'' deep, but approx 82 linear seats"
 '42 chairs with tables'
 '44 linear seating on ledges, 21 seating on planter box, 7 tables 28 chairs'
 '45 chairs with 15 tables. 134 linear seating on ledges. 10 linear seats on benches'
 '49 chairs, 6 chaise lounge' '59 chairs, 10 linear seats'
 '62 chairs and 15 tables' '76 sea

In [160]:

spaces = spaces.replace(np.nan, 'N/A')

### Public park data

In [31]:
parkinfo = pd.read_csv('data/public_park_data.csv')

In [32]:
parkinfo.head()


Unnamed: 0,ParkName,ParkType,ParkServiceArea,PSAManager,email,Number,Zipcode,Acreage,SupDist,ParkID,Location 1,Lat
0,ParkName,ParkType,ParkServiceArea,PSAManager,email,Number,,,,,,
1,10TH AVE/CLEMENT MINI PARK,Mini Park,PSA 1,"Elder, Steve",steven.elder@sfgov.org,(415) 601-6501,94118.0,0.66,1.0,156.0,"351 9th Ave\nSan Francisco, CA\n(37.78184397, ...",
2,15TH AVENUE STEPS,Mini Park,PSA 4,"Sheehy, Chuck",charles.sheehy@sfgov.org,(415) 218-2226,94122.0,0.26,7.0,185.0,"15th Ave b w Kirkham\nSan Francisco, CA\n(37.7...",
3,24TH/YORK MINI PARK,Mini Park,PSA 6,"Field, Adrian",adrian.field@sfgov.org,(415) 717-2872,94110.0,0.12,9.0,51.0,"24th\nSan Francisco, CA\n(37.75306042, -122.40...",
4,29TH/DIAMOND OPEN SPACE,Neighborhood Park or Playground,PSA 5,"O'Brien, Teresa",teresa.o'brien@sfgov.org,(415) 819-2699,94131.0,0.82,8.0,194.0,"Diamond\nSan Francisco, CA\n(37.74360211, -122...",


In [33]:
print('Rows: %d, Columns: %d' % (parkinfo.shape[0], parkinfo.shape[1]))
print(parkinfo.columns.values)

Rows: 230, Columns: 12
['ParkName' 'ParkType' 'ParkServiceArea' 'PSAManager' 'email' 'Number'
 'Zipcode' 'Acreage' 'SupDist' 'ParkID' 'Location 1' 'Lat']


In [127]:
print('Null values: %s.' % identifyNullValues(parkinfo))

Null values: {'SupDist': 3, 'Zipcode': 7, 'Acreage': 1, 'ParkID': 1, 'Lat': 230, 'Location 1': 28}.


###### Park name

In [128]:
getStatsForColumn(parkinfo, 'ParkName' )

Unique values: ['10TH AVE/CLEMENT MINI PARK' '15TH AVENUE STEPS' '24TH/YORK MINI PARK'
 '29TH/DIAMOND OPEN SPACE' 'ADAM ROGERS PARK' 'ALAMO SQUARE'
 'ALICE CHALMERS PLAYGROUND' 'ALICE MARBLE TENNIS COURTS'
 'ALIOTO MINI PARK' 'ALLYNE PARK' 'ALTA PLAZA'
 'ANGELO J. ROSSI PLAYGROUND' 'APTOS PLAYGROUND' 'ARGONNE PLAYGROUND'
 'Arkansas Friendship Garden' 'Arlington Community Garden'
 'BALBOA NATURAL AREA' 'BALBOA PARK' 'BAY VIEW PARK' 'BAY VIEW PLAYGROUND'
 "BEIDEMAN/O'FARRELL MINI PARK" 'BERKELEY WAY OPEN SPACE'
 'BERNAL HEIGHTS PARK' 'BERNAL HEIGHTS RECREATION CENTER'
 'BILLY GOAT HILL' 'BROADWAY TUNNEL EAST MINI PARK'
 'BROADWAY TUNNEL WEST MINI PARK' 'BROOKS PARK'
 'BROTHERHOOD/CHESTER MINI PARK' 'BUCHANAN STREET MALL' 'BUENA VISTA PARK'
 'BUSH/BRODERICK MINI PARK' 'CABRILLO PLAYGROUND' 'CAMP MATHER'
 'CANDLESTICK PARK' 'CARL LARSEN PARK' 'CAYUGA PLAYGROUND'
 'CAYUGA/LAMARTINE MINI PARK' 'CHESTNUT/KEARNY OPEN SPACE'
 'CHINESE RECREATION CENTER' 'COLERIDGE MINI PARK'
 'COLLIS P. HUNTING

In [129]:
getStatsForColumn(parkinfo, 'ParkType' )

Unique values: ['Civic Plaza or Square' 'Community Garden' 'Concession' 'Family Camp'
 'Mini Park' 'Neighborhood Park or Playground' 'ParkType' 'Parkway'
 'Regional Park' 'Zoological Garden'] 
Unique counts: 10.


In [130]:
getStatsForColumn(parkinfo, 'ParkServiceArea' )

Unique values: ['Candlestick Park Stadium' 'Golden Gate Park' 'Outside SF' 'PSA 1'
 'PSA 2' 'PSA 3' 'PSA 4' 'PSA 5' 'PSA 6' 'ParkServiceArea'] 
Unique counts: 10.


In [131]:
getStatsForColumn(parkinfo, 'PSAManager' )

Unique values: ['Castile, Steve' 'Cleveland, Maggie' 'Deasy, Jon' 'Dennis, Brent'
 'Elder, Steve' 'Field, Adrian' 'Figone, Joe' 'Gay, Mike'
 'Giammattei, Joe' 'Hill, Eric' 'Koch-Gonzalez, Gloria'
 'Lockwood, Darlene' 'Martin, York (Acting)' 'McCormick, James'
 'Miller, John' "O'Brien, Teresa" "O'Connor, Tom" 'PSAManager'
 'Scott, Ronnie' 'Sheehy, Chuck' 'Sheets, Robert' 'Stone, Andy'
 'Taylor, Zack' 'Watkins, Robert' 'Wayne, Lisa'] 
Unique counts: 25.


In [132]:
getStatsForColumn(parkinfo, 'email' )

Unique values: ['adrian.field@sfgov.org' 'andy.stone@sfgov.org' 'brent.dennis@sfgov.org'
 'charles.sheehy@sfgov.org' 'darlene.lockwood@sfgov.org' 'email'
 'eric.hill@sfgov.org' 'gloria.koch-gonzalez@sfgov.org'
 'james.mccormick@sfgov.org' 'joe.giammattei@sfgov.org'
 'john.miller@sfgov.org' 'jon.deasy@sfgov.org' 'joseph.figone@sfgov.org'
 'lisa.wayne@sfgov.org' 'maggie.cleveland@sfgov.org'
 'michael.gay@sfgov.org' 'robert.sheets@sfgov.org'
 'robert.watkins@sfgov.org' 'ronnie.scott@sfgov.org'
 'steve.castile@sfgov.org' 'steven.elder@sfgov.org'
 "teresa.o'brien@sfgov.org" "tom.o'connor@sfgov.org"
 'york.martin@sfgov.org' 'zack.taylor@sfgov.org'] 
Unique counts: 25.


In [133]:
getStatsForColumn(parkinfo, 'Number')

Unique values: ['(415) 218-0259' '(415) 218-2226' '(415) 218-4786' '(415) 235-4576'
 '(415) 254-8014' '(415) 254-8030' '(415) 504-7923' '(415) 513-2577'
 '(415) 601-6501' '(415) 601-7277' '(415) 666-7002' '(415) 666-7003'
 '(415) 666-7004' '(415) 666-7092' '(415) 695-5004' '(415) 713-4997'
 '(415) 717-2872' '(415) 753-7040' '(415) 753-7094' '(415) 753-7271'
 '(415) 819-2699' '(415) 819-6138' '(415) 831-6307' '(415) 841-0856'
 'Number'] 
Unique counts: 25.


In [134]:
getStatsForColumn(parkinfo, 'Zipcode' )

Unique values: [94044. 94102. 94103. 94105. 94107. 94108. 94109. 94110. 94111. 94112.
 94114. 94115. 94116. 94117. 94118. 94121. 94122. 94123. 94124. 94127.
 94131. 94132. 94133. 94134. 95321.    nan] 
Unique counts: 26.


In [135]:
getStatsForColumn(parkinfo, 'Acreage' )

Unique values: [0.00000e+00 3.00000e-02 5.00000e-02 6.00000e-02 7.00000e-02 8.00000e-02
 9.00000e-02 1.00000e-01 1.10000e-01 1.20000e-01 1.30000e-01 1.40000e-01
 1.50000e-01 1.60000e-01 1.80000e-01 2.10000e-01 2.20000e-01 2.30000e-01
 2.50000e-01 2.60000e-01 2.90000e-01 3.00000e-01 3.10000e-01 3.20000e-01
 3.40000e-01 3.50000e-01 3.70000e-01 4.10000e-01 4.40000e-01 4.50000e-01
 5.10000e-01 5.30000e-01 5.50000e-01 5.90000e-01 6.10000e-01 6.30000e-01
 6.50000e-01 6.60000e-01 7.10000e-01 7.40000e-01 7.50000e-01 7.80000e-01
 7.90000e-01 8.00000e-01 8.10000e-01 8.20000e-01 8.30000e-01 8.40000e-01
 8.90000e-01 9.30000e-01 9.60000e-01 9.70000e-01 1.02000e+00 1.03000e+00
 1.07000e+00 1.11000e+00 1.12000e+00 1.20000e+00 1.23000e+00 1.29000e+00
 1.31000e+00 1.40000e+00 1.47000e+00 1.50000e+00 1.52000e+00 1.53000e+00
 1.57000e+00 1.61000e+00 1.68000e+00 1.74000e+00 1.81000e+00 1.83000e+00
 1.84000e+00 1.91000e+00 1.93000e+00 2.01000e+00 2.03000e+00 2.05000e+00
 2.13000e+00 2.20000e+00 2.21000e+00

In [136]:
getStatsForColumn(parkinfo, 'SupDist' )

Unique values: [ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. nan] 
Unique counts: 12.


In [137]:
getStatsForColumn(parkinfo, 'ParkID' )

Unique values: [1.00000e+00 2.00000e+00 3.00000e+00 4.00000e+00 5.00000e+00 6.00000e+00
 7.00000e+00 8.00000e+00 9.00000e+00 1.00000e+01 1.20000e+01 1.30000e+01
 1.40000e+01 1.50000e+01 1.60000e+01 1.70000e+01 1.80000e+01 1.90000e+01
 2.00000e+01 2.10000e+01 2.20000e+01 2.30000e+01 2.40000e+01 2.50000e+01
 2.60000e+01 2.70000e+01 2.80000e+01 2.90000e+01 3.00000e+01 3.10000e+01
 3.20000e+01 3.30000e+01 3.40000e+01 3.50000e+01 3.60000e+01 3.70000e+01
 3.80000e+01 3.90000e+01 4.00000e+01 4.10000e+01 4.20000e+01 4.30000e+01
 4.40000e+01 4.50000e+01 4.60000e+01 4.70000e+01 4.80000e+01 4.90000e+01
 5.00000e+01 5.10000e+01 5.20000e+01 5.30000e+01 5.40000e+01 5.50000e+01
 5.60000e+01 5.70000e+01 5.80000e+01 5.90000e+01 6.00000e+01 6.10000e+01
 6.20000e+01 6.30000e+01 6.40000e+01 6.50000e+01 6.60000e+01 6.70000e+01
 6.80000e+01 6.90000e+01 7.00000e+01 7.10000e+01 7.20000e+01 7.30000e+01
 7.40000e+01 7.50000e+01 7.60000e+01 7.70000e+01 7.80000e+01 7.90000e+01
 8.00000e+01 8.10000e+01 8.20000e+01

In [93]:
#getStatsForColumn(parkinfo, 'Location 1')
lat = parkinfo["Location 1"].str.split("\n").str.get(2).str.split(",")
.str.get(0)
print lat.str.slice(1,15)
print lat

0                 NaN
1         37.78184397
2         37.75956493
3         37.75306042
4         37.74360211
5         37.73101645
6         37.77634875
7          37.7098271
8         37.80142776
9         37.75890196
10        37.79746066
11        37.79117333
12        37.77874186
13         37.7285246
14          37.779415
15                NaN
16        37.73704325
17        37.77591285
18        37.72494861
19                NaN
20        37.72593906
21        37.78238894
22        37.73956697
23                NaN
24        37.73844636
25        37.74140787
26        37.79699682
27        37.79643576
28        37.71727397
29        37.71238249
            ...      
200       37.79205617
201       37.76380044
202       37.79592597
203       37.73193576
204       37.73366412
205       37.75658875
206       37.75994801
207               NaN
208       37.78482103
209               NaN
210       37.78274031
211               NaN
212       37.78793122
213       37.74243091
214       

In [140]:
parkinfo = parkinfo.replace(np.nan, 'N/A')

### Restaurants

In [161]:
restaurants = pd.read_csv('data/Restaurant_Scores_-_LIVES_Standard.csv')

In [162]:
restaurants.head()

Unnamed: 0,business_id,business_name,business_address,business_city,business_state,business_postal_code,business_latitude,business_longitude,business_location,business_phone_number,inspection_id,inspection_date,inspection_score,inspection_type,violation_id,violation_description,risk_category
0,1757,Dar Bar Pakistani/Indian Cusine,1412 Polk St,San Francisco,CA,94109,37.789784,-122.420455,"(37.789784, -122.420455)",,1757_20170928,09/28/2017 12:00:00 AM,86.0,Routine - Unscheduled,1757_20170928_103131,Moderate risk vermin infestation,Moderate Risk
1,4864,DRAGON CITY BAKERY & CAFE,2367 MISSION St,San Francisco,CA,94110,37.759174,-122.419066,"(37.759174, -122.419066)",14155830000.0,4864_20161206,12/06/2016 12:00:00 AM,84.0,Routine - Unscheduled,4864_20161206_103157,Food safety certificate or food handler card n...,Low Risk
2,79782,Deli 23,2449 23rd St,San Francisco,CA,94110,,,,,79782_20160503,05/03/2016 12:00:00 AM,92.0,Routine - Unscheduled,79782_20160503_103120,Moderate risk food holding temperature,Moderate Risk
3,73840,L'acajou Bakery and Cafe,498 09th St Ste. C,San Francisco,CA,94103,,,,,73840_20171207,12/07/2017 12:00:00 AM,71.0,Routine - Unscheduled,73840_20171207_103105,Improper cooling methods,High Risk
4,76437,Sweetheart Cafe,909 Grant Ave,San Francisco,CA,94108,,,,,76437_20160329,03/29/2016 12:00:00 AM,76.0,Routine - Unscheduled,76437_20160329_103113,Sewage or wastewater contamination,High Risk


In [167]:
print('Rows: %d, Columns: %d' % (restaurants.shape[0], restaurants.shape[1]))
print(restaurants.columns.values)

Rows: 50630, Columns: 17
['business_id' 'business_name' 'business_address' 'business_city'
 'business_state' 'business_postal_code' 'business_latitude'
 'business_longitude' 'business_location' 'business_phone_number'
 'inspection_id' 'inspection_date' 'inspection_score' 'inspection_type'
 'violation_id' 'violation_description' 'risk_category']


In [168]:
print('Null values: %s.' % identifyNullValues(restaurants))

Null values: {'risk_category': 12321, 'business_latitude': 20548, 'inspection_score': 13026, 'business_longitude': 20548, 'business_location': 20548, 'business_phone_number': 35188, 'violation_description': 12321, 'business_postal_code': 1284, 'violation_id': 12321}.


In [169]:
getStatsForColumn(restaurants, 'business_id')

Unique values: [   19    24    31 ... 95256 95312 95398] 
Unique counts: 6010.


In [170]:
getStatsForColumn(restaurants, 'business_name')

Unique values: ['100% Dessert Cafe' '111 Minna Gallery' '1300 on Fillmore' ...
 'flourChylde Bakery' 'iNoodles' 'vive la tarte'] 
Unique counts: 5502.


In [171]:
getStatsForColumn(restaurants, 'business_address')

Unique values: ['001 WEST PORTAL Ave' '0044 Montgomery St  LL1' '022 Battery St' ...
 'Treasure Island' 'Treasure Island Flea Market' 'Various Farmers Markets'] 
Unique counts: 5452.


In [172]:
getStatsForColumn(restaurants, 'business_name')

Unique values: ['100% Dessert Cafe' '111 Minna Gallery' '1300 on Fillmore' ...
 'flourChylde Bakery' 'iNoodles' 'vive la tarte'] 
Unique counts: 5502.


In [173]:
getStatsForColumn(restaurants, 'business_city')

Unique values: ['San Francisco'] 
Unique counts: 1.


In [174]:
getStatsForColumn(restaurants, 'business_state')

Unique values: ['CA'] 
Unique counts: 1.


In [175]:
getStatsForColumn(restaurants, 'business_postal_code')

Unique values: [nan '00000' '92672' '94013' '94014' '94080' '941' '94101' '94102' '94103'
 '941033148' '94104' '94105' '94107' '94108' '94109' '94110' '941102019'
 '94111' '94112' '94114' '94115' '94116' '94117' '94118' '94120' '94121'
 '94122' '94123' '94124' '94127' '94129' '94130' '94131' '94132' '94133'
 '94134' '94143' '94158' '94188' '94301' '94544' '94602' '94901' '95105'
 'CA' 'Ca'] 
Unique counts: 47.


In [176]:
getStatsForColumn(restaurants, 'business_latitude')

Unique values: [ 0.       37.668824 37.681741 ... 37.817451 37.824494       nan] 
Unique counts: 2553.


In [177]:
getStatsForColumn(restaurants, 'business_longitude')

Unique values: [-122.510896 -122.510053 -122.510043 ... -122.368257    0.
         nan] 
Unique counts: 2599.


In [178]:
getStatsForColumn(restaurants, 'business_location')

Unique values: [nan '(0, 0)' '(37.668824, -122.409411)' ... '(37.816422, -122.370584)'
 '(37.817451, -122.368257)' '(37.824494, -122.371798)'] 
Unique counts: 2660.


In [179]:
getStatsForColumn(restaurants, 'business_phone_number')

Unique values: [1.41502049e+10 1.41502086e+10 1.41502088e+10 ... 1.41598689e+10
 1.41598814e+10            nan] 
Unique counts: 1634.


In [180]:
getStatsForColumn(restaurants, 'inspection_id')

Unique values: ['1000_20150422' '1000_20150902' '1000_20160919' ... '99_20150811'
 '99_20150818' '99_20171207'] 
Unique counts: 24252.


In [181]:
getStatsForColumn(restaurants, 'violation_description')

Unique values: [nan 'Consumer advisory not provided for raw or undercooked foods'
 'Contaminated or adulterated food'
 'Discharge from employee nose mouth or eye' 'Employee eating or smoking'
 'Food in poor condition'
 'Food safety certificate or food handler card not available'
 'Foods not protected from contamination'
 'High risk food holding temperature' 'High risk vermin infestation'
 'Improper cooking time or temperatures' 'Improper cooling methods'
 'Improper food labeling or menu misrepresentation'
 'Improper food storage' 'Improper or defective plumbing'
 'Improper reheating of food'
 'Improper storage of equipment utensils or linens'
 'Improper storage use or identification of toxic substances'
 'Improper thawing methods'
 'Improperly displayed mobile food permit or signage'
 'Improperly washed fruits and vegetables'
 'Inadequate HACCP plan record keeping'
 'Inadequate and inaccessible handwashing facilities'
 'Inadequate dressing rooms or improper storage of personal items'
 

In [182]:
restaurants = restaurants.replace(np.nan, 'N/A')