# Transportation Notebook <br> ~ Tobias Beidler-Shenk
- - -

In [2]:
import pandas as pd
import numpy as np
import geopandas as geo
%matplotlib inline
import matplotlib.pyplot as plt

transportation = pd.read_csv("transportation.csv", index_col = "Neighborhood")
weights = pd.read_csv("weights.csv", index_col = "Timestamp")

# Public Transportation
- - -

First, I'll analyze the strength of the public transportation offered in a neighborhood. The higher percentage of people who take public transportation to work, the stronger we consider that system to be.

In [3]:
# Removes percentage signs, so we can order numerically:
count = 0
for x in transportation['Commute to Work: Public Transportation (2010)']:
    x = str(x).rstrip('%')
    transportation.iloc[count, 11] = float(x)
    count+=1

# Displays top 8 scores, after weight:
pt_scores = transportation.sort_values(by=['Commute to Work: Public Transportation (2010)'], ascending = False).iloc[0:, 11:12]
pt_scores["Public Transit Scores"] = np.nan
#transit_scale = 100/pt_scores.iloc[0,0]
for x in range (len(pt_scores)):
    pt_scores.iloc[x,1] = pt_scores.iloc[x,0] * weights.iloc[len(weights)-1, 0]
print(weights.iloc[len(weights)-1, 0])
pt_scores.head(8)

4.344827586


Unnamed: 0_level_0,Commute to Work: Public Transportation (2010),Public Transit Scores
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1
Arlington Heights,88.9,386.255172
Northview Heights,77.4,336.289655
St. Clair,55.0,238.965517
Middle Hill,48.3,209.855172
Terrace Village,45.2,196.386207
Beltzhoover,43.1,187.262069
Manchester,41.7,181.17931
California-Kirkbride,41.0,178.137931


insert geopandas here

# Car-Friendliness
- - -

Next, we'll look at the car-friendliness of each neighborhood.

In [4]:
# Sorting by street density:
street_density = transportation.sort_values(by=['Street Density (st. mi/area sq. mi)'], ascending = False).iloc[0:, 4:5]
street_density.head(8)

Unnamed: 0_level_0,Street Density (st. mi/area sq. mi)
Neighborhood,Unnamed: 1_level_1
East Allegheny,71.8
Bluff,50.4
Central Business District,48.2
Knoxville,47.7
Central Northside,47.0
Bloomfield,45.2
North Shore,44.9
Middle Hill,43.5


In [5]:
# Removes percentage signs, so we can order numerically:
count = 0
for x in transportation['Commute to Work: Drive Alone (2010)']:
    x = str(x).rstrip('%')
    transportation.iloc[count, 9] = float(x)
    count+=1

# Displays driving percentages per neighborhood
driving_percentages = transportation.sort_values(by=['Commute to Work: Drive Alone (2010)'], ascending = False).iloc[0:, 9:10]
driving_percentages.head(8)

Unnamed: 0_level_0,Commute to Work: Drive Alone (2010)
Neighborhood,Unnamed: 1_level_1
East Carnegie,100.0
Summer Hill,94.6
New Homestead,88.9
Lincoln Place,83.9
Hays,81.9
Banksville,81.5
Ridgemont,80.4
Oakwood,78.4


In [6]:
# Sets "Nones" to 0s, and when areas are specificed, counts the number (ex. "A, B" = 2 areas):
for x in range (len(transportation)):
    if (transportation.iloc[x, 7] == "None"):
        transportation.iloc[x, 7] = 0;
    else:
        transportation.iloc[x, 7] = len(str(transportation.iloc[x, 7]).split(','))
    
# Displays number of parking areas per neighborhood
parking_available = transportation.sort_values(by=['Res. Permit Parking Area(s)'], ascending = False).iloc[0:, 7:8]
parking_available.head(8)

Unnamed: 0_level_0,Res. Permit Parking Area(s)
Neighborhood,Unnamed: 1_level_1
Shadyside,5
Squirrel Hill North,3
South Oakland,2
Mount Washington,2
West Oakland,2
Beechview,2
North Oakland,2
Bloomfield,2


From here, we create one "parking score" by combining these values and multiplying the result by our weight for the survey.

In [7]:
# Scales up the values so they each represent an equal part of the score (each out of 100, like percentages)
parking_scale = 100/parking_available.iloc[0]
density_scale = 100/street_density.iloc[0]
driving_scale = 100/driving_percentages.iloc[0]
for x in range (len(parking_available)):
    parking_available.iloc[x] = parking_available.iloc[x] * parking_scale
    street_density.iloc[x] = street_density.iloc[x] * density_scale
    driving_percentages.iloc[x] = driving_percentages.iloc[x] * driving_scale

# Averages the three values for each neighborhood (street density, parking areas available and driving percentages) 
# and multiplies the result by the weight from the survey
driving_scores = pd.concat([street_density, parking_available, driving_percentages], axis = 1, join='inner')
driving_scores["Driving Scores"] = np.nan 
for x in range(len(driving_scores)):
    driving_scores.iloc[x,3] = 0
    for y in range(3):
        driving_scores.iloc[x,3] += driving_scores.iloc[x,y]
    driving_scores.iloc[x,3] = (driving_scores.iloc[x,3]/3) * weights.iloc[len(weights)-1, 1]
print(weights.iloc[len(weights)-1,0])
    
# Displays top 8 scores, after weight and scaling:
driving_scores.sort_values(by=['Driving Scores'], ascending = False).head(8)

4.344827586


Unnamed: 0_level_0,Street Density (st. mi/area sq. mi),Res. Permit Parking Area(s),Commute to Work: Drive Alone (2010),Driving Scores
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shadyside,41.225627,100,44.8,233.06659
East Allegheny,100.0,20,44.3,205.847126
North Shore,62.534819,20,72.0,193.612589
Mount Washington,41.78273,40,65.0,183.900202
Crawford-Roberts,59.888579,20,65.4,182.02822
Bloomfield,62.952646,40,42.0,181.607338
South Oakland,52.64624,40,46.6,174.457932
Squirrel Hill North,30.222841,60,47.5,172.549307


# Walking/Biking
- - -

Finally, we will consider the strength of a neighborhood in the context of walking/biking.

In [8]:
# Removes percentage signs so we can order numerically:
count = 0
for x in transportation['Commute to Work: Walk (2010)']:
    x = str(x).rstrip('%')
    transportation.iloc[count, 15] = float(x)
    count+=1

# Displays walking percentages for each neighborhood:
walking = transportation.sort_values(by=['Commute to Work: Walk (2010)'], ascending = False).iloc[0:, 15:16]
walking.head(8)

Unnamed: 0_level_0,Commute to Work: Walk (2010)
Neighborhood,Unnamed: 1_level_1
Central Oakland,62.4
North Oakland,52.6
Allegheny Center,43.8
Bluff,43.1
Central Business District,42.0
West Oakland,40.8
Terrace Village,31.2
South Oakland,30.2


In [9]:
# Removes percentage signs
count = 0
for x in transportation['Commute to Work: Bicycle (2010)']:
    x = str(x).rstrip('%')
    transportation.iloc[count, 14] = float(x)
    count+=1
  
# displays biking percentages
biking = transportation.sort_values(by=['Commute to Work: Bicycle (2010)'], ascending = False).iloc[0:, 14:15]
biking.head(8)

Unnamed: 0_level_0,Commute to Work: Bicycle (2010)
Neighborhood,Unnamed: 1_level_1
Lower Lawrenceville,7.5
Bloomfield,6.4
Spring Hill-City View,6.3
Spring Garden,5.2
Friendship,4.9
Greenfield,3.1
Swisshelm Park,3.1
North Oakland,2.9


In [10]:
# Scales up the values for walking/biking so they each represent an equal part of the score:
walking_scale = 100/walking.iloc[0]
biking_scale = 100/biking.iloc[0]
for x in range (len(walking)):
    walking.iloc[x] = walking.iloc[x] * walking_scale
    biking.iloc[x] = biking.iloc[x] * biking_scale

# Averages the two values for each neighborhood (biking and walking percentages) and multiplies the result by the weight from the survey:
bw_scores = pd.concat([walking, biking], axis = 1, join='inner')
bw_scores["Biking/Walking Scores"] = np.nan 
for x in range(len(bw_scores)):
    bw_scores.iloc[x,2] = 0
    for y in range(2):
        bw_scores.iloc[x,2] += bw_scores.iloc[x,y]
    bw_scores.iloc[x,2] = (bw_scores.iloc[x,2]/2) * weights.iloc[len(weights)-1, 2]
    # Still need the weight dont forget

# Displays the top 8 scores, after weight:
print(weights.iloc[len(weights)-1, 2])
bw_scores.sort_values(by=['Biking/Walking Scores'], ascending = False).head(8)

4.333333333


Unnamed: 0_level_0,Commute to Work: Walk (2010),Commute to Work: Bicycle (2010),Biking/Walking Scores
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
North Oakland,84.2949,38.6667,266.416667
Lower Lawrenceville,21.9551,100.0,264.236111
Bloomfield,30.1282,85.3333,250.166667
Central Oakland,100.0,0.0,216.666667
Friendship,32.2115,65.3333,211.347222
Spring Hill-City View,7.05128,84.0,197.277778
Spring Garden,20.0321,69.3333,193.625
Allegheny Center,70.1923,0.0,152.083333


# Final Scores
- - -

Finally, we take the scores for the three categories (public transit, driving, and walking/biking) and combine them to create one overall transportation score.

In [11]:
overall_scores = pd.concat([pt_scores.iloc[0:,1:2], driving_scores.iloc[0:,3:4], bw_scores.iloc[0:,2:3]], axis=1, join='inner')
overall_scores["Overall"] = np.nan
for x in range (len(overall_scores)):
    overall_scores.iloc[x,3] = 0
    for y in range (3):
        overall_scores.iloc[x,3] += overall_scores.iloc[x,y]
overall_scores.sort_values(by=['Overall'], ascending = False).head(8)
    

Unnamed: 0_level_0,Public Transit Scores,Driving Scores,Biking/Walking Scores,Overall
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bloomfield,86.896552,181.607338,250.166667,518.670557
Friendship,123.827586,117.268268,211.347222,452.443076
Lower Lawrenceville,79.075862,107.403371,264.236111,450.715345
North Oakland,49.965517,128.44711,266.416667,444.829294
Shadyside,116.875862,233.06659,94.736111,444.678563
Arlington Heights,386.255172,28.21548,0.0,414.470653
Central Oakland,57.786207,129.516066,216.666667,403.96894
Northview Heights,336.289655,42.805699,19.791667,398.887021


# Analysis of Results/Conclusion
- - -

Bloomfield, Friendship and Lower Lawrenceville take the top 3 places! Some other noteable results:
1. Arlington Heights and Northview Heights both place in the top 8 due to their very good transportation systems. However, they severely lack in other areas.
2. Although Bloomfield wins over friendship by quite a few points, an argument can be made that Friendship is more balanced overall (all three categories over 100)
3. For each individual category, our winners were:<br>
   __Public Transit:__ Arlington Heights<br>
   __Driving:__ Shadyside<br>
   __Walking/Biking:__ North Oakland