# Transportation Notebook <br> ~ Tobias Beidler-Shenk

In [216]:
import pandas as pd
import numpy as np
import geopandas as geo
%matplotlib inline
import matplotlib.pyplot as plt

transportation = pd.read_csv("transportation.csv", index_col = "Neighborhood")
transportation

Unnamed: 0_level_0,Sector #,Population (2010),Miles of Major Roads,Total Street Miles,Street Density (st. mi/area sq. mi),# Sets of Steps,# Step Treads,Res. Permit Parking Area(s),Total Working Pop. (Age 16+) (2010),Commute to Work: Drive Alone (2010),Commute to Work: Carpool/Vanpool (2010),Commute to Work: Public Transportation (2010),Commute to Work: Taxi (2010),Commute to Work: Motorcycle (2010),Commute to Work: Bicycle (2010),Commute to Work: Walk (2010),Commute to Work: Other (2010),Work at Home (2010)
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Allegheny Center,3,933,2.23,4.14,19.7,0,0,,386,27.7%,9.8%,16.8%,0.0%,0.0%,0.0%,43.8%,0.0%,1.8%
Allegheny West,3,462,1.47,4.87,34.5,0,0,G,151,23.2%,20.5%,15.9%,14.6%,0.0%,0.0%,25.8%,0.0%,0.0%
Allentown,6,2500,1.65,12.33,41.8,12,474,,824,47.0%,10.9%,38.0%,0.0%,0.8%,0.0%,0.0%,1.5%,1.8%
Arlington,7,1869,0.86,10.73,22.8,13,428,,673,74.0%,7.3%,13.5%,0.0%,0.0%,0.0%,1.9%,0.9%,2.4%
Arlington Heights,7,244,0.00,1.09,8.2,0,0,,72,11.1%,0.0%,88.9%,0.0%,0.0%,0.0%,0.0%,0.0%,0.0%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Upper Lawrenceville,13,2669,1.83,12.09,29.9,9,1121,,1163,56.7%,5.4%,25.0%,0.0%,0.0%,1.0%,5.2%,0.0%,6.5%
West End,4,254,4.17,7.04,35.2,5,311,,64,48.4%,17.2%,15.6%,0.0%,18.8%,0.0%,0.0%,0.0%,0.0%
West Oakland,14,2604,1.26,6.49,30.2,6,376,"B, C",834,35.5%,1.0%,22.1%,0.0%,0.0%,0.0%,40.8%,0.7%,0.0%
Westwood,4,3066,2.41,16.64,24.0,8,375,,1563,63.0%,11.4%,12.9%,0.0%,1.2%,0.0%,8.7%,0.0%,2.9%


# Public Transportation

First, I'll analyze the strength of the public transportation offered in a neighborhood. The higher percentage of people who take public transportation to work, the stronger we consider that system to be.

In [111]:
# Removes percentage signs, so we can order numerically:
count = 0
for x in transportation['Commute to Work: Public Transportation (2010)']:
    x = str(x).rstrip('%')
    transportation.iloc[count, 11] = float(x)
    count+=1

# Displays top 8 scores, after weight:
publictransit = transportation.sort_values(by=['Commute to Work: Public Transportation (2010)'], ascending = False).iloc[0:, 11:12]
publictransit.head(8)

Unnamed: 0_level_0,Commute to Work: Public Transportation (2010)
Neighborhood,Unnamed: 1_level_1
Arlington Heights,88.9
Northview Heights,77.4
St. Clair,55.0
Middle Hill,48.3
Terrace Village,45.2
Beltzhoover,43.1
Manchester,41.7
California-Kirkbride,41.0


insert geopandas here

# Car-Friendliness

Next, we'll look at the car-friendliness of each neighborhood.

In [124]:
# Sorting by street density:
street_density = transportation.sort_values(by=['Street Density (st. mi/area sq. mi)'], ascending = False).iloc[0:, 4:5]
street_density.head(8)

Unnamed: 0_level_0,Street Density (st. mi/area sq. mi)
Neighborhood,Unnamed: 1_level_1
East Allegheny,71.8
Bluff,50.4
Central Business District,48.2
Knoxville,47.7
Central Northside,47.0
Bloomfield,45.2
North Shore,44.9
Middle Hill,43.5


In [215]:
# Removes percentage signs, so we can order numerically:
count = 0
for x in transportation['Commute to Work: Drive Alone (2010)']:
    x = str(x).rstrip('%')
    transportation.iloc[count, 9] = float(x)
    count+=1

# displays driving percentages
driving_percentages = transportation.sort_values(by=['Commute to Work: Drive Alone (2010)'], ascending = False).iloc[0:, 9:10]
driving_percentages.head(8)

Unnamed: 0_level_0,Commute to Work: Drive Alone (2010)
Neighborhood,Unnamed: 1_level_1
East Carnegie,100.0
Summer Hill,94.6
New Homestead,88.9
Lincoln Place,83.9
Hays,81.9
Banksville,81.5
Ridgemont,80.4
Oakwood,78.4


In [171]:
# Sets "Nones" to 0s for the purpose of comparisons
count = 0
for x in transportation['Res. Permit Parking Area(s)']:
    if (x == "None"):
        transportation.iloc[count, 7] = 0;
    count+=1    

parking_available = transportation.sort_values(by=['Res. Permit Parking Area(s)'], ascending = False).iloc[0:, 7:8]
parking_available.head(8)

Unnamed: 0_level_0,Res. Permit Parking Area(s)
Neighborhood,Unnamed: 1_level_1
Shadyside,5
Squirrel Hill North,3
South Oakland,2
Mount Washington,2
West Oakland,2
Beechview,2
North Oakland,2
Bloomfield,2


From here, we create one "parking score" by combining these values and multiplying the result by our weight for the survey.

In [221]:
# Scales up the values so they each represent an equal part of the score (each out of 100, like percentages)
parking_scale = 100/parking_available.iloc[0]
density_scale = 100/street_density.iloc[0]
driving_scale = 100/driving_percentages.iloc[0]
for x in range (len(parking_available)):
    parking_available.iloc[x] = parking_available.iloc[x] * parking_scale
    street_density.iloc[x] = street_density.iloc[x] * density_scale
    driving_percentages.iloc[x] = driving_percentages.iloc[x] * driving_scale

# Averages the three values for each neighborhood (street density, parking areas available and driving percentages) 
# and multiplies the result by the weight from the survey
scores = pd.concat([street_density, parking_available, driving_percentages], axis = 1, join='inner')
scores["Scores"] = np.nan 
for x in range(len(scores)):
    scores.iloc[x,3] = 0
    for y in range(3):
        scores.iloc[x,3] += scores.iloc[x,y]
    scores.iloc[x,3] = scores.iloc[x,3]/3
    # Still need the weight dont forget
    
# Displays top 8 scores, after weight:
scores.sort_values(by=['Scores'], ascending = False).head(8)

Unnamed: 0_level_0,Street Density (st. mi/area sq. mi),Res. Permit Parking Area(s),Commute to Work: Drive Alone (2010),Scores
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shadyside,41.225627,100,44.8,62.008542
East Allegheny,100.0,20,44.3,54.766667
North Shore,62.534819,20,72.0,51.511606
Mount Washington,41.78273,40,65.0,48.927577
Crawford-Roberts,59.888579,20,65.4,48.429526
Bloomfield,62.952646,40,42.0,48.317549
South Oakland,52.64624,40,46.6,46.415413
Squirrel Hill North,30.222841,60,47.5,45.907614


# Walking/Biking

Finally, we will consider the strength of a neighborhood in the context of walking/biking.

In [218]:
# Removes percentage signs
count = 0
for x in transportation['Commute to Work: Walk (2010)']:
    x = str(x).rstrip('%')
    transportation.iloc[count, 15] = float(x)
    count+=1
  
# displays walking percentages
walking = transportation.sort_values(by=['Commute to Work: Walk (2010)'], ascending = False).iloc[0:, 15:16]
walking.head(8)

Unnamed: 0_level_0,Commute to Work: Walk (2010)
Neighborhood,Unnamed: 1_level_1
Central Oakland,62.4
North Oakland,52.6
Allegheny Center,43.8
Bluff,43.1
Central Business District,42.0
West Oakland,40.8
Terrace Village,31.2
South Oakland,30.2


In [219]:
# Removes percentage signs
count = 0
for x in transportation['Commute to Work: Bicycle (2010)']:
    x = str(x).rstrip('%')
    transportation.iloc[count, 14] = float(x)
    count+=1
  
# displays biking percentages
biking = transportation.sort_values(by=['Commute to Work: Bicycle (2010)'], ascending = False).iloc[0:, 14:15]
biking.head(8)

Unnamed: 0_level_0,Commute to Work: Bicycle (2010)
Neighborhood,Unnamed: 1_level_1
Lower Lawrenceville,7.5
Bloomfield,6.4
Spring Hill-City View,6.3
Spring Garden,5.2
Friendship,4.9
Greenfield,3.1
Swisshelm Park,3.1
North Oakland,2.9


In [223]:
# Scales up the values for walking/biking so they each represent an equal part of the score
walking_scale = 100/walking.iloc[0]
biking_scale = 100/biking.iloc[0]
for x in range (len(walking)):
    walking.iloc[x] = walking.iloc[x] * walking_scale
    biking.iloc[x] = biking.iloc[x] * biking_scale

# Averages the two values for each neighborhood (biking and walking percentages) and multiplies the result by the weight from the survey
scores = pd.concat([walking, biking], axis = 1, join='inner')
scores["Scores"] = np.nan 
for x in range(len(scores)):
    scores.iloc[x,2] = 0
    for y in range(2):
        scores.iloc[x,2] += scores.iloc[x,y]
    scores.iloc[x,2] = scores.iloc[x,2]/2
    # Still need the weight dont forget

# Displays the top 8 scores, after weight:
scores.sort_values(by=['Scores'], asce)