In [9]:
import pandas as pd 
import numpy as np

deaths = pd.read_csv('Multiple Cause of Death, 1999-2014.csv')

#Rename the columns to replace spaces with underscores
for each in deaths.columns:
    deaths = deaths.rename(columns={each: each.replace(" ", "_")})


In [10]:
#Clean the data and convert to nums
for index, each in deaths.iterrows():
    if each["Deaths"] == "Suppressed" or each ["Crude_Rate"] == "Unreliable":
        deaths = deaths.drop(index)

deaths["Deaths"] = pd.to_numeric(deaths["Deaths"], errors = 'raise')
deaths["Crude_Rate"] = pd.to_numeric(deaths["Crude_Rate"], errors = 'raise')


In [11]:
#Find all states and years where deaths were greater than 2000
states = deaths[deaths["Deaths"] > 2000]["State"] 
year = deaths[deaths["Deaths"] > 2000]["Year"]
deathsOver2000 = pd.concat([states, year], axis = 1)
print(deathsOver2000.to_string(index = False) + "\n") #ONLY show the State and Year columns

     State  Year
California  2009
California  2010
California  2011
California  2013
California  2014
      Ohio  2014



In [12]:
#Find total number of deaths for each state
totalDeaths = deaths.groupby("State")["Deaths"].sum().reset_index(name="Total")
print(totalDeaths.to_string(index = False) + "\n") 

               State  Total
             Alabama   2173
              Alaska    644
             Arizona   7298
            Arkansas   2178
          California  27044
            Colorado   5155
         Connecticut   3755
            Delaware   1091
District of Columbia    693
             Florida  19919
             Georgia   6052
              Hawaii    955
               Idaho    971
            Illinois  13072
             Indiana   3758
                Iowa   1642
              Kansas   1794
            Kentucky   6283
           Louisiana   2419
               Maine   1589
            Maryland   9403
       Massachusetts   9923
            Michigan   8748
           Minnesota   3158
         Mississippi   1048
            Missouri   6460
             Montana    752
            Nebraska    576
              Nevada   5954
       New Hampshire   1859
          New Jersey   6961
          New Mexico   4002
            New York  16156
      North Carolina  10413
        North Dakota

In [13]:
#All states that start with the letter A
aStates = deaths[deaths["State"].str.startswith("A")] 
print(aStates.to_string(index = False) + "\n") 

   State  Year  Deaths  Population  Crude_Rate Crude_Rate_Lower_95%_Confidence_Interval Crude_Rate_Upper_95%_Confidence_Interval
 Alabama  1999      39     4430141         0.9                                      0.6                                      1.2
 Alabama  2000      46     4447100         1.0                                      0.8                                      1.4
 Alabama  2001      67     4467634         1.5                                      1.2                                      1.9
 Alabama  2002      75     4480089         1.7                                      1.3                                      2.1
 Alabama  2003      54     4503491         1.2                                      0.9                                      1.6
 Alabama  2004      92     4530729         2.0                                      1.6                                      2.5
 Alabama  2005      83     4569805         1.8                                      1.4          

In [14]:
def getTopFivePopulatedStates(dataframe: pd.DataFrame, year: int):
    topFive = dataframe[dataframe["Year"] == year].nlargest(5, "Population")
    return topFive

##This can accept any year in the CSV and will return the top 5 states with the highest population
topFive = getTopFivePopulatedStates(deaths, 2014)
print(topFive["State"].to_string(index = False) + "\n") 

California
     Texas
   Florida
  New York
  Illinois



In [18]:
topState = topFive["State"].iloc[0]
topFromtopState = deaths[deaths["State"] == topState].nlargest(5, "Deaths")
print(topFromtopState[["Year", "Deaths"]].to_string(index=False) + "\n")

 Year  Deaths
 2014    2159
 2009    2128
 2013    2088
 2010    2059
 2011    2057



### Do the deaths in New York after 9/11 increase immediately after due to health effects?
Compared to the 3 years before the riots

In [32]:
twoYearsBefore = deaths[(deaths["Year"] >= 1999) & (deaths["Year"] < 2001) & (deaths["State"] == "New York")]
twoYearsAfter = deaths[(deaths["Year"] > 2001) & (deaths["Year"] <= 2003) & (deaths["State"] == "New York")]

print("Two Years Before 9/11:", twoYearsBefore["Deaths"].mean())
print("Two Years After 9/11:", twoYearsAfter["Deaths"].mean())


Two Years Before 9/11: 592.5
Two Years After 9/11: 600.0


Conclusion: It looks like there was not a noticable increase to deaths due to 9/11 health effects within this time period. 