In [1]:
# Dependencies and Setup
import pandas as pd
from pathlib import Path
import numpy as np
from scipy import stats
from scipy.stats import linregress
import scipy.stats as st
import sys

In [2]:
# Load the data file, read and display it with Pandas
bridge_data = Path("Philadelphia Cleaned DOT Data.csv")

In [3]:
bridge_df = pd.read_csv(bridge_data)

bridge_df

Unnamed: 0,Latitude,Longitude,Year Built,Owner,Maintenance Responsibility,Bridge Condition,Bridge Improvement Cost,Roadway Improvement Cost,Total Improvement Cost,Year Reconstructed,Deck Width,Structure Length
0,40.103450,-74.961092,1990,4,4,G,0.0,0.0,0.0,0.0,13.0,18.3
1,39.989944,-75.250461,1935,1,27,F,86.0,252.0,1156.0,0.0,19.1,28.7
2,40.000700,-75.227539,1910,1,1,P,25.0,72.0,332.0,0.0,25.3,18.9
3,40.006661,-75.199589,1953,1,1,F,0.0,0.0,0.0,1989.0,14.9,50.3
4,40.007103,-75.192278,1960,1,1,F,605.0,1782.0,8168.0,,13.6,587.0
...,...,...,...,...,...,...,...,...,...,...,...,...
588,39.971314,-75.123447,2019,1,1,G,,,,,22.3,238.4
589,39.942597,-75.211397,2020,27,27,G,,,,,24.2,15.2
590,40.058022,-75.195347,2020,1,1,G,,,,,15.2,16.8
591,39.998933,-75.086278,2021,1,1,G,,,,,29.5,56.7


In [4]:
# Add a square ft column to the data frame
bridge_df.loc[:,"SqFt"] = bridge_df["Deck Width"] * bridge_df["Structure Length"]
bridge_df

Unnamed: 0,Latitude,Longitude,Year Built,Owner,Maintenance Responsibility,Bridge Condition,Bridge Improvement Cost,Roadway Improvement Cost,Total Improvement Cost,Year Reconstructed,Deck Width,Structure Length,SqFt
0,40.103450,-74.961092,1990,4,4,G,0.0,0.0,0.0,0.0,13.0,18.3,237.90
1,39.989944,-75.250461,1935,1,27,F,86.0,252.0,1156.0,0.0,19.1,28.7,548.17
2,40.000700,-75.227539,1910,1,1,P,25.0,72.0,332.0,0.0,25.3,18.9,478.17
3,40.006661,-75.199589,1953,1,1,F,0.0,0.0,0.0,1989.0,14.9,50.3,749.47
4,40.007103,-75.192278,1960,1,1,F,605.0,1782.0,8168.0,,13.6,587.0,7983.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...
588,39.971314,-75.123447,2019,1,1,G,,,,,22.3,238.4,5316.32
589,39.942597,-75.211397,2020,27,27,G,,,,,24.2,15.2,367.84
590,40.058022,-75.195347,2020,1,1,G,,,,,15.2,16.8,255.36
591,39.998933,-75.086278,2021,1,1,G,,,,,29.5,56.7,1672.65


In [5]:
# Add column for cost to repair by multipling the SqFt by the cost to repair figures calculated by the US DOT FHA
bridge_df.loc[:,"Repair Cost Est"] = bridge_df["SqFt"] * 314
bridge_df

Unnamed: 0,Latitude,Longitude,Year Built,Owner,Maintenance Responsibility,Bridge Condition,Bridge Improvement Cost,Roadway Improvement Cost,Total Improvement Cost,Year Reconstructed,Deck Width,Structure Length,SqFt,Repair Cost Est
0,40.103450,-74.961092,1990,4,4,G,0.0,0.0,0.0,0.0,13.0,18.3,237.90,74700.60
1,39.989944,-75.250461,1935,1,27,F,86.0,252.0,1156.0,0.0,19.1,28.7,548.17,172125.38
2,40.000700,-75.227539,1910,1,1,P,25.0,72.0,332.0,0.0,25.3,18.9,478.17,150145.38
3,40.006661,-75.199589,1953,1,1,F,0.0,0.0,0.0,1989.0,14.9,50.3,749.47,235333.58
4,40.007103,-75.192278,1960,1,1,F,605.0,1782.0,8168.0,,13.6,587.0,7983.20,2506724.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
588,39.971314,-75.123447,2019,1,1,G,,,,,22.3,238.4,5316.32,1669324.48
589,39.942597,-75.211397,2020,27,27,G,,,,,24.2,15.2,367.84,115501.76
590,40.058022,-75.195347,2020,1,1,G,,,,,15.2,16.8,255.36,80183.04
591,39.998933,-75.086278,2021,1,1,G,,,,,29.5,56.7,1672.65,525212.10


In [6]:
# Add column for cost to replace by multipling the SqFt by the cost to replace (cost to replace * 1.32) figures calculated by the US DOT FHA
bridge_df.loc[:,"Replacement Cost Est"] = bridge_df["SqFt"] * 414.48
bridge_df

Unnamed: 0,Latitude,Longitude,Year Built,Owner,Maintenance Responsibility,Bridge Condition,Bridge Improvement Cost,Roadway Improvement Cost,Total Improvement Cost,Year Reconstructed,Deck Width,Structure Length,SqFt,Repair Cost Est,Replacement Cost Est
0,40.103450,-74.961092,1990,4,4,G,0.0,0.0,0.0,0.0,13.0,18.3,237.90,74700.60,9.860479e+04
1,39.989944,-75.250461,1935,1,27,F,86.0,252.0,1156.0,0.0,19.1,28.7,548.17,172125.38,2.272055e+05
2,40.000700,-75.227539,1910,1,1,P,25.0,72.0,332.0,0.0,25.3,18.9,478.17,150145.38,1.981919e+05
3,40.006661,-75.199589,1953,1,1,F,0.0,0.0,0.0,1989.0,14.9,50.3,749.47,235333.58,3.106403e+05
4,40.007103,-75.192278,1960,1,1,F,605.0,1782.0,8168.0,,13.6,587.0,7983.20,2506724.80,3.308877e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
588,39.971314,-75.123447,2019,1,1,G,,,,,22.3,238.4,5316.32,1669324.48,2.203508e+06
589,39.942597,-75.211397,2020,27,27,G,,,,,24.2,15.2,367.84,115501.76,1.524623e+05
590,40.058022,-75.195347,2020,1,1,G,,,,,15.2,16.8,255.36,80183.04,1.058416e+05
591,39.998933,-75.086278,2021,1,1,G,,,,,29.5,56.7,1672.65,525212.10,6.932800e+05


In [7]:
#Replace all the number 4s in the 'Maintenance Responsibility' column to 'City or Municipal Highway Agency'
integer_to_string = {4: "City or Municipal Highway Agency"}

bridge_df["Maintenance Responsibility"] = bridge_df["Maintenance Responsibility"].map(integer_to_string)
bridge_df

Unnamed: 0,Latitude,Longitude,Year Built,Owner,Maintenance Responsibility,Bridge Condition,Bridge Improvement Cost,Roadway Improvement Cost,Total Improvement Cost,Year Reconstructed,Deck Width,Structure Length,SqFt,Repair Cost Est,Replacement Cost Est
0,40.103450,-74.961092,1990,4,City or Municipal Highway Agency,G,0.0,0.0,0.0,0.0,13.0,18.3,237.90,74700.60,9.860479e+04
1,39.989944,-75.250461,1935,1,,F,86.0,252.0,1156.0,0.0,19.1,28.7,548.17,172125.38,2.272055e+05
2,40.000700,-75.227539,1910,1,,P,25.0,72.0,332.0,0.0,25.3,18.9,478.17,150145.38,1.981919e+05
3,40.006661,-75.199589,1953,1,,F,0.0,0.0,0.0,1989.0,14.9,50.3,749.47,235333.58,3.106403e+05
4,40.007103,-75.192278,1960,1,,F,605.0,1782.0,8168.0,,13.6,587.0,7983.20,2506724.80,3.308877e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
588,39.971314,-75.123447,2019,1,,G,,,,,22.3,238.4,5316.32,1669324.48,2.203508e+06
589,39.942597,-75.211397,2020,27,,G,,,,,24.2,15.2,367.84,115501.76,1.524623e+05
590,40.058022,-75.195347,2020,1,,G,,,,,15.2,16.8,255.36,80183.04,1.058416e+05
591,39.998933,-75.086278,2021,1,,G,,,,,29.5,56.7,1672.65,525212.10,6.932800e+05


In [8]:
#Filter down the 'Maintenance Responsibility' column to bridges only maintained by the city of Philadelphia

bridge_df  = bridge_df.loc[bridge_df["Maintenance Responsibility"]== "City or Municipal Highway Agency"]
bridge_df

Unnamed: 0,Latitude,Longitude,Year Built,Owner,Maintenance Responsibility,Bridge Condition,Bridge Improvement Cost,Roadway Improvement Cost,Total Improvement Cost,Year Reconstructed,Deck Width,Structure Length,SqFt,Repair Cost Est,Replacement Cost Est
0,40.103450,-74.961092,1990,4,City or Municipal Highway Agency,G,0.0,0.0,0.0,0.0,13.0,18.3,237.90,74700.60,9.860479e+04
215,40.017092,-75.211428,1928,1,City or Municipal Highway Agency,F,19.0,55.0,253.0,0.0,19.2,32.9,631.68,198347.52,2.618187e+05
221,39.963467,-75.186778,1964,4,City or Municipal Highway Agency,P,133.0,390.0,1790.0,0.0,16.1,167.0,2688.70,844251.80,1.114412e+06
222,39.963861,-75.185025,1964,4,City or Municipal Highway Agency,P,27.0,78.0,359.0,0.0,16.2,27.1,439.02,137852.28,1.819650e+05
258,40.035356,-75.208878,1957,1,City or Municipal Highway Agency,F,95.0,280.0,1285.0,2012.0,20.7,115.2,2384.64,748776.96,9.883856e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
562,39.958900,-75.170372,2017,4,City or Municipal Highway Agency,G,,,,,19.8,29.9,592.02,185894.28,2.453804e+05
572,39.959400,-75.173906,2018,4,City or Municipal Highway Agency,G,,,,,20.4,39.0,795.60,249818.40,3.297603e+05
573,39.959500,-75.175500,2018,4,City or Municipal Highway Agency,G,,,,,25.5,32.9,838.95,263430.30,3.477280e+05
577,39.960194,-75.159694,1892,4,City or Municipal Highway Agency,P,,,,2018.0,4.8,17.1,82.08,25773.12,3.402052e+04


In [9]:
bridge_df.columns

Index(['Latitude', 'Longitude', 'Year Built', 'Owner',
       'Maintenance Responsibility', 'Bridge Condition',
       'Bridge Improvement Cost', 'Roadway Improvement Cost',
       'Total Improvement Cost', 'Year Reconstructed', 'Deck Width',
       'Structure Length', 'SqFt', 'Repair Cost Est', 'Replacement Cost Est'],
      dtype='object')

In [10]:
# Trim the columns of data to the one we need
bridge_df = bridge_df.drop(['Owner','Bridge Improvement Cost', 'Roadway Improvement Cost',
       'Total Improvement Cost', 'Year Reconstructed','Deck Width',
       'Structure Length'], axis=1)
bridge_df

Unnamed: 0,Latitude,Longitude,Year Built,Maintenance Responsibility,Bridge Condition,SqFt,Repair Cost Est,Replacement Cost Est
0,40.103450,-74.961092,1990,City or Municipal Highway Agency,G,237.90,74700.60,9.860479e+04
215,40.017092,-75.211428,1928,City or Municipal Highway Agency,F,631.68,198347.52,2.618187e+05
221,39.963467,-75.186778,1964,City or Municipal Highway Agency,P,2688.70,844251.80,1.114412e+06
222,39.963861,-75.185025,1964,City or Municipal Highway Agency,P,439.02,137852.28,1.819650e+05
258,40.035356,-75.208878,1957,City or Municipal Highway Agency,F,2384.64,748776.96,9.883856e+05
...,...,...,...,...,...,...,...,...
562,39.958900,-75.170372,2017,City or Municipal Highway Agency,G,592.02,185894.28,2.453804e+05
572,39.959400,-75.173906,2018,City or Municipal Highway Agency,G,795.60,249818.40,3.297603e+05
573,39.959500,-75.175500,2018,City or Municipal Highway Agency,G,838.95,263430.30,3.477280e+05
577,39.960194,-75.159694,1892,City or Municipal Highway Agency,P,82.08,25773.12,3.402052e+04


In [None]:
english_movies_df.to_csv("English_Movies_Data.csv", index=False)