#### Hospitals CSV File

##### From the CORGIS Dataset Project

###### By Dennis Kafura
###### Version 1.0.0, created 6/10/2019
###### Tags: hospitals, health care, medical, hospital costs, hospital quality

In [54]:
import pandas as pd

In [55]:
df = pd.read_csv('hospitals.csv')
df.columns

Index(['Facility.Name', 'Facility.City', 'Facility.State', 'Facility.Type',
       'Rating.Overall', 'Rating.Mortality', 'Rating.Safety',
       'Rating.Readmission', 'Rating.Experience', 'Rating.Effectiveness',
       'Rating.Timeliness', 'Rating.Imaging', 'Procedure.Heart Attack.Cost',
       'Procedure.Heart Attack.Quality', 'Procedure.Heart Attack.Value',
       'Procedure.Heart Failure.Cost', 'Procedure.Heart Failure.Quality',
       'Procedure.Heart Failure.Value', 'Procedure.Pneumonia.Cost',
       'Procedure.Pneumonia.Quality', 'Procedure.Pneumonia.Value',
       'Procedure.Hip Knee.Cost', 'Procedure.Hip Knee.Quality',
       'Procedure.Hip Knee.Value'],
      dtype='object')

In [56]:
# Extract columns and rename to avoid spaces in column names
clean = df[['Facility.Name', 'Facility.City', 'Facility.State', 'Facility.Type', 'Rating.Overall', 'Procedure.Heart Attack.Cost',
       'Procedure.Heart Attack.Quality']].copy()
clean.rename(columns={'Procedure.Heart Attack.Cost': 'Procedure.Heart_Attack.Cost', 'Procedure.Heart Attack.Quality': 'Procedure.Heart_Attack.Quality'}, inplace=True)
clean

Unnamed: 0,Facility.Name,Facility.City,Facility.State,Facility.Type,Rating.Overall,Procedure.Heart_Attack.Cost,Procedure.Heart_Attack.Quality
0,Southeast Alabama Medical Center,Dothan,AL,Government,2,23394,Average
1,Marshall Medical Center South,Boaz,AL,Government,1,22579,Average
2,Eliza Coffee Memorial Hospital,Florence,AL,Government,3,24002,Average
3,Mizell Memorial Hospital,Opp,AL,Private,1,0,Unknown
4,Crenshaw Community Hospital,Luverne,AL,Proprietary,4,0,Unknown
...,...,...,...,...,...,...,...
4767,Providence Hospital Of North Houston Llc,Houston,TX,Proprietary,-1,0,Unknown
4768,The Hospitals Of Providence Transmountain Campus,El Paso,TX,Proprietary,-1,0,Unknown
4769,Saint Camillus Medical Center,Hurst,TX,Proprietary,-1,0,Unknown
4770,Houston Methodist The Woodlands Hospital,The Woodlands,TX,Private,3,0,Unknown


In [57]:
# Drop rows with Procedure.Heart_Attack.Cost (Unknown)

matches = len(clean[clean['Procedure.Heart_Attack.Cost']==0])
print(f'Matches: {matches}')
clean = clean[clean['Procedure.Heart_Attack.Cost'] != 0]
clean

Matches: 2504


Unnamed: 0,Facility.Name,Facility.City,Facility.State,Facility.Type,Rating.Overall,Procedure.Heart_Attack.Cost,Procedure.Heart_Attack.Quality
0,Southeast Alabama Medical Center,Dothan,AL,Government,2,23394,Average
1,Marshall Medical Center South,Boaz,AL,Government,1,22579,Average
2,Eliza Coffee Memorial Hospital,Florence,AL,Government,3,24002,Average
5,St Vincent's East,Birmingham,AL,Private,2,23940,Average
6,Dekalb Regional Medical Center,Fort Payne,AL,Proprietary,2,23853,Average
...,...,...,...,...,...,...,...
4741,Seton Medical Center Harker Heights,Harker Heights,TX,Proprietary,3,22619,Average
4742,Baylor Scott And White Medical Center Mckinney,Mc Kinney,TX,Private,3,24175,Average
4744,Texas Health Harris Methodist Hospital Alliance,Fort Worth,TX,Private,3,24649,Average
4746,Baylor Scott & White Medical Center- College S...,College Station,TX,Private,3,27904,Average


In [58]:
# Drop rows where Rating.Overall equals -1 (unknown)

matches = len(clean[clean['Rating.Overall']==-1])
print(f'Matches: {matches}')
clean = clean[clean['Rating.Overall']!=-1]
clean

Matches: 12


Unnamed: 0,Facility.Name,Facility.City,Facility.State,Facility.Type,Rating.Overall,Procedure.Heart_Attack.Cost,Procedure.Heart_Attack.Quality
0,Southeast Alabama Medical Center,Dothan,AL,Government,2,23394,Average
1,Marshall Medical Center South,Boaz,AL,Government,1,22579,Average
2,Eliza Coffee Memorial Hospital,Florence,AL,Government,3,24002,Average
5,St Vincent's East,Birmingham,AL,Private,2,23940,Average
6,Dekalb Regional Medical Center,Fort Payne,AL,Proprietary,2,23853,Average
...,...,...,...,...,...,...,...
4741,Seton Medical Center Harker Heights,Harker Heights,TX,Proprietary,3,22619,Average
4742,Baylor Scott And White Medical Center Mckinney,Mc Kinney,TX,Private,3,24175,Average
4744,Texas Health Harris Methodist Hospital Alliance,Fort Worth,TX,Private,3,24649,Average
4746,Baylor Scott & White Medical Center- College S...,College Station,TX,Private,3,27904,Average


In [59]:
# Drop rows where Procedure.Heart_Attack.Quality is Unknown

matches = len(clean[clean['Procedure.Heart_Attack.Quality']=='Unknown'])
print(f'Matches: {matches}')
clean = clean[clean['Procedure.Heart_Attack.Quality']!='Unknown']
clean

Matches: 2


Unnamed: 0,Facility.Name,Facility.City,Facility.State,Facility.Type,Rating.Overall,Procedure.Heart_Attack.Cost,Procedure.Heart_Attack.Quality
0,Southeast Alabama Medical Center,Dothan,AL,Government,2,23394,Average
1,Marshall Medical Center South,Boaz,AL,Government,1,22579,Average
2,Eliza Coffee Memorial Hospital,Florence,AL,Government,3,24002,Average
5,St Vincent's East,Birmingham,AL,Private,2,23940,Average
6,Dekalb Regional Medical Center,Fort Payne,AL,Proprietary,2,23853,Average
...,...,...,...,...,...,...,...
4741,Seton Medical Center Harker Heights,Harker Heights,TX,Proprietary,3,22619,Average
4742,Baylor Scott And White Medical Center Mckinney,Mc Kinney,TX,Private,3,24175,Average
4744,Texas Health Harris Methodist Hospital Alliance,Fort Worth,TX,Private,3,24649,Average
4746,Baylor Scott & White Medical Center- College S...,College Station,TX,Private,3,27904,Average


In [60]:
# Create csv

clean.to_csv('hospitals_clean.csv')