In [1]:
# Import all the necessary packages.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Optional - Ignore warnings.
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Read the provided CSV file/data set.
nyc_counts = pd.read_csv('Bicycle_Counts.csv') 

# View the DataFrame, types and shape of the data.
print(nyc_counts.shape)
print(nyc_counts.dtypes)
nyc_counts.head()

(4167507, 4)
id         int64
date      object
counts     int64
status     int64
dtype: object


Unnamed: 0,id,date,counts,status
0,100009425,06/24/2022 12:00:00 AM,15,0
1,100009425,06/24/2022 12:15:00 AM,12,0
2,100009425,06/24/2022 12:30:00 AM,14,0
3,100009425,06/24/2022 12:45:00 AM,5,0
4,100009425,06/24/2022 01:00:00 AM,10,0


In [4]:
# Read the provided CSV file/data set.
nyc_parking = pd.read_csv('Bicycle_Parking.csv') 

# Print the output.
print(nyc_parking.info())
nyc_parking.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30178 entries, 0 to 30177
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   BoroCode    30178 non-null  int64 
 1   BoroName    30178 non-null  object
 2   CounDist    30178 non-null  int64 
 3   AssemDist   30178 non-null  int64 
 4   StSenDist   30178 non-null  int64 
 5   CongDist    30178 non-null  int64 
 6   STREETON    17710 non-null  object
 7   XSTREET1    21881 non-null  object
 8   XSTREET2    21771 non-null  object
 9   SIDE        18395 non-null  object
 10  ASSETSUBTY  30178 non-null  object
dtypes: int64(5), object(6)
memory usage: 2.5+ MB
None


Unnamed: 0,BoroCode,BoroName,CounDist,AssemDist,StSenDist,CongDist,STREETON,XSTREET1,XSTREET2,SIDE,ASSETSUBTY
0,1,Manhattan,10,72,31,13,W 207 ST,BROADWAY,COOPER ST,S,Small Hoop
1,3,Brooklyn,35,57,25,9,,,,,Large Hoop
2,4,Queens,29,27,16,6,108 ST,63 RD,63 DR,W,Small Hoop
3,4,Queens,29,27,16,6,108 ST,63 RD,63 DR,W,Small Hoop
4,1,Manhattan,1,66,26,10,,BLEECKER ST,W HOUSTON ST,W,U-Rack


In [5]:
# Determine whether there are missing values.
nyc_parking.isnull().sum()

BoroCode          0
BoroName          0
CounDist          0
AssemDist         0
StSenDist         0
CongDist          0
STREETON      12468
XSTREET1       8297
XSTREET2       8407
SIDE          11783
ASSETSUBTY        0
dtype: int64

There are lots of information about locations missing.

In [8]:
# General statistics to check sanity of data.
nyc_parking.describe()

Unnamed: 0,BoroCode,CounDist,AssemDist,StSenDist,CongDist
count,30178.0,30178.0,30178.0,30178.0,30178.0
mean,2.526907,23.436576,56.7078,22.954901,10.286765
std,1.152441,15.186193,16.486713,6.594112,2.900433
min,1.0,1.0,23.0,10.0,3.0
25%,1.0,8.0,44.0,18.0,8.0
50%,3.0,26.0,55.0,25.0,10.0
75%,3.0,36.0,71.0,28.0,12.0
max,5.0,51.0,87.0,36.0,16.0


We can only determine there are 30,178 records avalable in the df.

In [4]:
# Check the number of different types of cycling parkings available in NYC.
nyc_parking['ASSETSUBTY'].value_counts()

Small Hoop           9201
U-Rack               8652
Large Hoop           8324
LARGE HOOP           1966
Wave Rack            1548
Bike Corral           408
GFI SLED (SILVER)      48
WAVE RACK (PARKS)      18
DOT SLED (BLACK)        7
WAVE RACK               5
STAPLE (PARKS)          1
Name: ASSETSUBTY, dtype: int64

In [6]:
# Determine cycle parking capacity by NYC borough name.
df = nyc_parking.groupby('BoroName') ['ASSETSUBTY'].count()
df.head()

BoroName
Bronx             3382
Brooklyn         11602
Manhattan         8877
Queens            5775
Staten Island      542
Name: ASSETSUBTY, dtype: int64

- Brooklyn has the highest number of parking racks avalable.
- Comparing to London (about 150,000 spaces available), NYC has less cycle parking capacity. 
- Decision was made not to investigate this example further.

In [7]:
# Read the provided XLSX file/data set.
nyc_routes = pd.read_excel('Bicycle_Routes.xlsx') 

# View the DataFrame, types and shape of the data.
print(nyc_routes.shape)
print(nyc_routes.dtypes)
nyc_routes.head()

(20824, 8)
segmentid     object
boro           int64
street        object
fromstreet    object
tostreet      object
bikedir       object
lanecount      int64
ft_facilit    object
dtype: object


Unnamed: 0,segmentid,boro,street,fromstreet,tostreet,bikedir,lanecount,ft_facilit
0,53,5,CONFERENCE HOUSE PARK GREENWAY,HYLAN BLVD,SWINNERTON ST,2,2,Greenway
1,57,5,CONFERENCE HOUSE PARK GREENWAY,HYLAN BLVD,SWINNERTON ST,2,2,Greenway
2,59,5,CONFERENCE HOUSE PARK GREENWAY,HYLAN BLVD,SWINNERTON ST,2,2,Greenway
3,61,5,CONFERENCE HOUSE PARK GREENWAY,HYLAN BLVD,SWINNERTON ST,2,2,Greenway
4,64,5,CONFERENCE HOUSE PARK GREENWAY,HYLAN BLVD,SWINNERTON ST,2,2,Greenway
