In [1]:
# Importing libraries
import pandas as pd
import numpy as np

In [47]:
# Defining function to find the hospital in the desired rank
def rankall(outcome, num = "best"):

    # Read the data
    data = pd.read_csv("outcome-of-care-measures.csv")

    # Check whether the outcome and rank are valid

    # Check if outcome value is one of the possibilities
    # If not stop the function
    if ( (outcome != "heart attack") and (outcome != "heart failure") and (outcome != "pneumonia") ):
        raise ValueError("Invalid outcome")

    if ( (type(num) == "str") and ( (num != "best") and (num != "worst") ) ):
        raise ValueError("Invalid rank")

    # Find the hospital with wanted rank in 30 day mortality rate in each state

    # Create array with states list and order alphabetically
    states = sorted( data.State.unique() )

    # Create an empty data frame to store the hospitals list
    rank_hosp = pd.DataFrame()

    # Change column of mortality rates to numeric
    # Remove NAs values from column
    # Order the data frame by rank and hospital names alphabetically
    if (outcome == "heart attack"):
        data.iloc[:, 10] = pd.to_numeric(data.iloc[:, 10], errors = "coerce")
        data = data.dropna(subset = [data.columns[10]] )
        data = data.sort_values(by = ["Hospital 30-Day Death (Mortality) Rates from Heart Attack", "Hospital Name"])

    if (outcome == "heart failure"):
        data.iloc[:, 16] = pd.to_numeric(data.iloc[:, 16], errors = "coerce")
        data = data.dropna(subset = [data.columns[16]] )
        data = data.sort_values(by = ["Hospital 30-Day Death (Mortality) Rates from Heart Failure", "Hospital Name"])

    if (outcome == "pneumonia"):
        data.iloc[:, 22] = pd.to_numeric(data.iloc[:, 22], errors = "coerce")
        data = data.dropna(subset = [data.columns[22]] )
        data = data.sort_values(by = ["Hospital 30-Day Death (Mortality) Rates from Pneumonia", "Hospital Name"])

    # Find the hospital of desired rank per outcome
    for i in states:

        # Filter only state hospitals
        # Reset index for correct search
        data_state = data.loc[data.State == i]
        data_state = data_state.reset_index(drop = True)

        # Convert rank all to numeric
        if (num == "best"):
            n = 0
        elif (num == "worst"):
            n = len(data_state)-1
        else:
            n = num-1

        # Test if rank is not larger than number of hospital
        # If so return NaN as the hospital number
        if (n > len(data_state)-1 ):
            rank_hosp = rank_hosp.append( pd.DataFrame( {"hospital": [np.NaN], "state": i} ) )
        else:
            # Add the hospital and state to the data frame
            rank_hosp = rank_hosp.append( pd.DataFrame( {"hospital": [data_state.loc[n, "Hospital Name"]], "state": i} ) )
            #print(rank_hosp)

        # Reset index
        rank_hosp = rank_hosp.reset_index( drop = True )

    # Return data frame as output of function
    return rank_hosp         

In [48]:
# Test
rankall("heart attack", 20).head(10) 

Unnamed: 0,hospital,state
0,,AK
1,D W MCMILLAN MEMORIAL HOSPITAL,AL
2,ARKANSAS METHODIST MEDICAL CENTER,AR
3,JOHN C LINCOLN DEER VALLEY HOSPITAL,AZ
4,SHERMAN OAKS HOSPITAL,CA
5,SKY RIDGE MEDICAL CENTER,CO
6,MIDSTATE MEDICAL CENTER,CT
7,,DC
8,,DE
9,SOUTH FLORIDA BAPTIST HOSPITAL,FL


In [49]:
# Test
rankall("pneumonia", "worst").tail(3) 

Unnamed: 0,hospital,state
51,"MAYO CLINIC HEALTH SYSTEM - NORTHLAND, INC",WI
52,PLATEAU MEDICAL CENTER,WV
53,NORTH BIG HORN HOSPITAL DISTRICT,WY


In [50]:
# Test
rankall("heart failure").tail(10) 

Unnamed: 0,hospital,state
44,WELLMONT HAWKINS COUNTY MEMORIAL HOSPITAL,TN
45,FORT DUNCAN MEDICAL CENTER,TX
46,VA SALT LAKE CITY HEALTHCARE - GEORGE E. WAHLE...,UT
47,SENTARA POTOMAC HOSPITAL,VA
48,GOV JUAN F LUIS HOSPITAL & MEDICAL CTR,VI
49,SPRINGFIELD HOSPITAL,VT
50,HARBORVIEW MEDICAL CENTER,WA
51,AURORA ST LUKES MEDICAL CENTER,WI
52,FAIRMONT GENERAL HOSPITAL,WV
53,CHEYENNE VA MEDICAL CENTER,WY
