# Parking Meters in Boston

#### Import Dataset from API:
**Note:** Using customized function from Dana_Library

In [3]:
import sys
import os
import pandas as pd
# Use customsed liberary to import open data from boston api
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'Dana_Library')))
from bos_data_importer import bos_data_api_import as importer

# Use the imported function
resource_id = "df6dac6b-e484-470d-8757-6ecae4e04f2a"
df = importer(resource_id)
# Display the first few rows of the DataFrame
print('Attributes:', df.columns)
print('Database Dimension:', df.shape)

Attributes: Index(['_id', 'X', 'Y', 'OBJECTID', 'METER_ID', 'VENDOR', 'PAY_POLICY',
       'PRE_PAY', 'PARK_NO_PAY', 'GREEN_DOME', 'TOW_AWAY', 'STREET_CLEANING',
       'DIR', 'BLK_NO', 'STREET', 'LOCK_', 'LOCK__', 'LONGITUDE', 'LATITUDE',
       'TRAVEL_DIRECTION', 'FROM_INTERSECTION', 'TO_INTERSECTION',
       'SPACE_NUMBER', 'NUMBEROFSPACES', 'METER_TYPE', 'HAS_SENSOR',
       'G_DISTRICT', 'G_PASSPORT_ZONES', 'G_PM_ZONE', 'G_SUBZONE', 'G_ZONE',
       'BASE_RATE', 'POLE_MOUNT', 'YOKE', 'HOUSING_TYPE',
       'HOUSING_MANUFACTURER', 'SIDEWALKGE', 'COIN_SLOTLE', 'METER_CONDITION',
       'PERMIT_RATE', 'INSTALLED_ON', 'PURCHASED_DATE', 'METER_STATE',
       'SPACE_STATE'],
      dtype='object')
Database Dimension: (6955, 44)


#### Generating Profile Report:
**Note:** Given by the profiling report, we can re-organize and clean the dataset accordingly.

In [None]:
from ydata_profiling import ProfileReport
# It took around 7 mins to generateing report.
"""
The report usually constains the following information:
    Overview; Distinct; Missing; Infinite; Mean; Minimum; Maximum; Zeros; Negative; Memory Size 
"""
profile = ProfileReport(df, title="Profiling Reprot")
profile.to_file("Profiling_Report.html")

#### Data Cleaning:

In [5]:
df.head()

Unnamed: 0,_id,X,Y,OBJECTID,METER_ID,VENDOR,PAY_POLICY,PRE_PAY,PARK_NO_PAY,GREEN_DOME,...,HOUSING_TYPE,HOUSING_MANUFACTURER,SIDEWALKGE,COIN_SLOTLE,METER_CONDITION,PERMIT_RATE,INSTALLED_ON,PURCHASED_DATE,METER_STATE,SPACE_STATE
0,1,771671.6127543,2953700.43340772,1,450001,Parkeon,08:00AM-08:00PM MON-SAT $0.25 120,,"00:00AM-24:00AM SUN, 00:00AM-08:00AM MON-SAT, ...",,...,,,,,,,1/1/2017 12:00:00 AM,,ACTIVE,ACTIVE
1,2,771908.989248052,2953745.35523389,2,450002,Parkeon,08:00AM-08:00PM MON-SAT $0.25 120,,"00:00AM-24:00AM SUN, 00:00AM-08:00AM MON-SAT, ...",,...,,,,,,,1/1/2017 12:00:00 AM,,ACTIVE,ACTIVE
2,3,771592.285813227,2953671.61063072,3,450003,Parkeon,08:00AM-08:00PM MON-SAT $0.25 120,,"00:00AM-24:00AM SUN, 00:00AM-08:00AM MON-SAT, ...",,...,,,,,,,1/1/2017 12:00:00 AM,,ACTIVE,ACTIVE
3,4,771816.443829477,2953711.36350398,4,450004,Parkeon,08:00AM-08:00PM MON-SAT $0.25 120,,"00:00AM-24:00AM SUN, 00:00AM-08:00AM MON-SAT, ...",,...,,,,,,,1/1/2017 12:00:00 AM,,ACTIVE,ACTIVE
4,5,771502.970375061,2953640.18746522,5,450005,Parkeon,08:00AM-08:00PM MON-SAT $0.25 120,,"00:00AM-24:00AM SUN, 00:00AM-08:00AM MON-SAT, ...",,...,,,,,,,1/1/2017 12:00:00 AM,,ACTIVE,ACTIVE


**Used features:**
- `METER_ID`
- `PAY_POLICY`
- `PARK_NO_PAY`
- `TOW_AWAY`
- `DIR`: Direction
- `BLK_NO`: Block Number
- `STREET`: Street Name
- `LOCK_`: undefined
- `LONGITUDE`
- `LATITUDE`
- `SPACE_NUMBER`: 98.2% missing
- `NUMBEROFSPACES`
- `METER_TYPE`
- `HAS_SENSOR`: No one has a sensor
- `G_ZONE`
- `BASE_RATE`
- `INSTALLED_ON`
- `METER_STATE`
- `SPACE_STATE`

In [10]:
df.loc[df['TRAVEL_DIRECTION'] == 'S']

Unnamed: 0,_id,X,Y,OBJECTID,METER_ID,VENDOR,PAY_POLICY,PRE_PAY,PARK_NO_PAY,GREEN_DOME,...,HOUSING_TYPE,HOUSING_MANUFACTURER,SIDEWALKGE,COIN_SLOTLE,METER_CONDITION,PERMIT_RATE,INSTALLED_ON,PURCHASED_DATE,METER_STATE,SPACE_STATE
1584,1585,775461.363376886,2954065.20925298,1585,,IPS,08:00AM-08:00PM MON-SAT $0.25 120,,"00:00AM-24:00AM SUN, 00:00AM-08:00AM MON-SAT, ...",,...,,,,,,,4/1/2017 1:00:00 AM,,ACTIVE,ACTIVE
1585,1586,775437.838161469,2954066.90970889,1586,,IPS,08:00AM-08:00PM MON-SAT $0.25 120,,"00:00AM-24:00AM SUN, 00:00AM-08:00AM MON-SAT, ...",,...,,,,,,,4/1/2017 1:00:00 AM,,ACTIVE,ACTIVE


In [8]:
df['STREET'].unique()

array(['NEWBURY ST A-B', 'NEWBURY ST B-C', 'NEWBURY ST C-D',
       'NEWBURY ST D-E', 'NEWBURY ST E-F', 'NEWBURY ST F-G',
       'FAIRFIELD ST B-N', 'NEWBURY ST H-M', 'BOYLSTON ST M-H',
       'GLOUCESTER ST N-B', 'HEREFORD ST B-N', 'NEWBURY ST G-H',
       'BOYLSTON ST H-G', 'BOYLSTON ST G-F', 'BOYLSTON ST ST C-D',
       'BOYLSTON ST F-E', 'EXETER ST B-B', 'EXETER ST N-B',
       'BOYLSTON ST D-C', 'BOYLSTON ST E-D', 'CLARENDON ST B-ST JAMES',
       'DARTMOUTH ST B-N', 'BOYLSTON ST C-B', 'CLARENDON ST N-B',
       'BERKELEY ST ST J-B', 'BOYLSTON ST B-A', 'BERKELEY ST B-N',
       'BOYLSTON ST A-C ST SO', 'STUART ST', 'CHURCH ST', 'COLUMBUS AV',
       'CONGRESS ST D-A', 'BEVERLY ST C-V', 'STANIFORD ST O-C',
       'STANIFORD ST L-O', 'LOMASNEY WY N-S', 'CANAL ST NC-V',
       'CANAL ST V-C', 'PORTLAND ST V-C', 'BROAD ST S-F', 'BROAD ST F-C',
       'BROAD ST C-M', 'BROAD ST M-C', 'BROAD ST C-S', 'BROAD ST S-W',
       'BROAD ST F-W', 'TREMONT ST W-TP', 'TREMONT ST TP-W',
       'TRE