# Additional Survey Statistics

This notebook contains the code and analysis by BuzzFeed News to show how some additional survey statistics presented in the story were calculated. Please read the original story for further context.

In [1]:
import pandas as pd
import glob

## Load and filter data

In [2]:
facs = (
    pd
    .concat(
        [ pd.read_csv(f) for f in glob.glob("../../data/ownership/final/*.csv") ]
    )
)

brightspring_facs = (
    facs
    .loc[
        lambda x: x["is_brightspring"] == True
    ]
)

In [3]:
brightspring_facs.head()

Unnamed: 0,provider_id,name,type,region,state,address,phone,address.1,particip_date,certified_beds,hospital_based,ownership_type,termination_code,termination_date,legal_owner,is_brightspring
0,51G002,5TH AVENUE GROUP HOME,Intermediate Care Facilities for Individuals w...,(III) Philadelphia,WV,"916 FIFTH AVENUE\nSAINT ALBANS, WV 25177",304 720-0015,"916 FIFTH AVENUE\nSAINT ALBANS, WV 25177",1981-10-06,9,No,For Profit,,,"RSCR WEST VIRGINIA, INCORPORATED",True
2,51G007,CROSS LANES GROUP HOME,Intermediate Care Facilities for Individuals w...,(III) Philadelphia,WV,"5202 LINDA VISTA DRIVE\nCROSS LANES, WV 25313",304 776-2005,"5202 LINDA VISTA DRIVE\nCROSS LANES, WV 25313",1986-11-17,8,No,For Profit,,,"RSCR WEST VIRGINIA, INCORPORATED",True
3,51G008,EIGHTH AVENUE GROUP HOME,Intermediate Care Facilities for Individuals w...,(III) Philadelphia,WV,"1519 8TH AVENUE\nHUNTINGTON, WV 25701",304 523-0177,"1519 8TH AVENUE\nHUNTINGTON, WV 25701",1987-03-06,8,No,For Profit,,,"VOCA CORPORATION OF WEST VIRGINIA, INCORPORATED",True
4,51G009,VIRGINIA AVENUE GROUP HOME,Intermediate Care Facilities for Individuals w...,(III) Philadelphia,WV,"821 VIRGINIA AVENUE\nHUNTINGTON, WV 25701",304 523-0196,"821 VIRGINIA AVENUE\nHUNTINGTON, WV 25701",1987-03-06,8,No,For Profit,,,"VOCA CORPORATION OF WEST VIRGINIA, INCORPORATED",True
5,51G010,811 S. KANAWHA GROUP HOME,Intermediate Care Facilities for Individuals w...,(III) Philadelphia,WV,"811 S. KANAWHA STREET\nBECKLEY, WV 25801",304 252-5937,"811 S. KANAWHA STREET\nBECKLEY, WV 25801",1987-02-17,8,No,For Profit,,,"VOCA CORPORATION OF WEST VIRGINIA, INCORPORATED",True


In [4]:
deficiencies = pd.read_csv("../../data/qcor/deficiencies.csv")
surveys = pd.read_csv("../../data/qcor/surveys.csv", parse_dates=["date"])

In [5]:
deficiencies.head()

Unnamed: 0,survey_key,level,tag,desc,date
0,01G005-2010-0,Standard,K0211,Means of Egress - General,2010-11-24
1,01G005-2010-0,Standard,K0211,Means of Egress - General,2010-11-24
2,01G005-2010-0,Standard,K0211,Means of Egress - General,2010-11-24
3,01G005-2010-0,Standard,K0211,Means of Egress - General,2010-11-24
4,01G005-2010-0,Standard,K0211,Means of Egress - General,2010-11-24


In [6]:
surveys.head()

Unnamed: 0,survey_key,provider_id,date,type,subtype,num_deficiencies
0,01G005-2010-0,01G005,2010-11-24,STANDARD,LIFE SAFETY,43
1,01G005-2010-1,01G005,2010-11-24,STANDARD,HEALTH,5
2,01G005-2011-0,01G005,2011-07-27,STANDARD,LIFE SAFETY,76
3,01G005-2011-1,01G005,2011-07-27,STANDARD,HEALTH,18
4,01G006-2010-0,01G006,2010-07-27,STANDARD,LIFE SAFETY,0


In [7]:
recent_surveys = surveys.loc[
    lambda x: 
    x["provider_id"].isin(facs["provider_id"]) & 
    (x["date"] >= "2019-03-05") & # Date KKR takes over
    (x["date"] < "2022-01-01") & # QCOR data is scraped until this date 
    (x["subtype"] == "HEALTH") # Only analyzing Health surveys
].copy()

## Number of open BrightSpring facilities on March 5, 2019

> "became the owner of more than 600 residential facilities serving people from California to West Virginia"

In [8]:
open_bs_facs = (
    brightspring_facs
    .loc[
        lambda x: (x["particip_date"] <= "2019-03-05") & # open as of March 5, 2019
        (
            x["termination_date"].isnull() | 
            (x["termination_date"] >= "2019-03-05") # never closed or closed after March 5, 2019
        )
    ]
)

In [9]:
len(open_bs_facs)

615

## Overall statistic

> "The analysis focused on intermediate care facilities, the type of group home with the most comprehensive state inspection reports, in the seven states with the most for-profit homes. In those states, KKR owns only 16% of the homes but racked up 40% of the serious citations — more than 500 in total."

In [10]:
# Finding the percentage of facilities
(
    recent_surveys
    .merge(
        facs[["provider_id", "is_brightspring"]],
        how="left",
        on="provider_id"
    )
    .groupby(["is_brightspring"])["provider_id"]
    .nunique() # all facilities surveyed at least once during the time period
    .to_frame()
    .assign(
        percentage = lambda df: df.apply(
            lambda x: x["provider_id"] / df["provider_id"].sum(axis=0) * 100, axis=1
        )
        .round(2)
    )
)

Unnamed: 0_level_0,provider_id,percentage
is_brightspring,Unnamed: 1_level_1,Unnamed: 2_level_1
False,3143,83.59
True,617,16.41


In [11]:
# Finding the percentage of Conditions and the total
(
    deficiencies
    .loc[lambda x: 
         (x["level"] == "Condition") & # only conditional deficiencies
         (x["survey_key"].isin(recent_surveys["survey_key"]))] # only surveys conducted post BrightSpring acquisition
    .merge(
        recent_surveys[["survey_key", "provider_id", "type"]],
        how="left",
        on="survey_key"
    )
    .merge(
        facs[["provider_id", "is_brightspring"]],
        how="left",
        on="provider_id"
    )
    .groupby(["is_brightspring", "type"])["survey_key"]
    .count()
    .unstack()
    .assign(total = lambda x: x.sum(axis=1))
    .assign(percentage = lambda df: df.apply(
            lambda x: x["total"] / df["total"].sum(axis=0) * 100, axis=1
        )
        .round(2)
    )
)

type,COMPLAINT,STANDARD,total,percentage
is_brightspring,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
False,371,422,793,59.89
True,267,264,531,40.11


In [12]:
# Finding the percentage of surveys 
# to show the above conclusion is not because 
# BrightSpring was surveyed at a much higher rate
survey_stats = (
    recent_surveys
    .merge(
        facs[["provider_id", "is_brightspring"]],
        how="left",
        on="provider_id"
    )
    .groupby(["is_brightspring", "type"])["survey_key"]
    .nunique() # each unique survey
    .unstack()
    .assign(total = lambda x: x.sum(axis=1))
    .assign(percentage = lambda df: df.apply(
            lambda x: x["total"] / df["total"].sum(axis=0) * 100, axis=1
        )
        .round(2)
    )
)

In [13]:
survey_stats

type,COMPLAINT,STANDARD,total,percentage
is_brightspring,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
False,7530,5656,13186,82.11
True,1667,1205,2872,17.89


## Staffing deficiencies

> "According to reports in the seven states analyzed by BuzzFeed News, inspectors found 118 instances of dangerously low staffing since KKR bought the company — double the rate of all other facilities."

This analysis looks at the W code W186, see [this CMS document](https://www.cms.gov/regulations-and-guidance/guidance/manuals/downloads/som107ap_j_intermcare.pdf) for more information about W codes. The regulatory reference for W186 states: "The facility must provide sufficient direct care staff to manage and supervise clients in accordance with their individual program plans".

In [14]:
staffing_deficiencies = (
    deficiencies
    .loc[
        lambda x: (x["tag"] == "W0186") & x["survey_key"].isin(recent_surveys["survey_key"])
    ]
    .copy()
    .sort_values("tag", ascending=False)
    .merge(
        surveys[["survey_key", "provider_id", "type", "subtype"]],
        how="left",
        on="survey_key"
    )
    .merge(
        facs[["provider_id", "name", "state", "is_brightspring"]],
        how="left",
        on="provider_id"
    )
)

In [15]:
staff_def_counts = (
    staffing_deficiencies
    .groupby(["is_brightspring", "tag"])["survey_key"]
    .count()
    .unstack()
)

In [16]:
staff_def_counts

tag,W0186
is_brightspring,Unnamed: 1_level_1
False,204
True,118


In [17]:
(
    staff_def_counts
    .join(
        survey_stats, # overall survey stats from the time period from earlier
        how="left"
    )
    .assign(pct_186_violation = lambda df: df.apply(lambda x: x["W0186"] / x["total"] * 100, axis=1).round(2))
)

Unnamed: 0_level_0,W0186,COMPLAINT,STANDARD,total,percentage,pct_186_violation
is_brightspring,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
False,204,7530,5656,13186,82.11,1.55
True,118,1667,1205,2872,17.89,4.11


## Training deficiencies

> "More than 100 inspection reports, however, confirm what six managers and direct care workers told BuzzFeed News: many staff were not adequately trained, and patients were in danger."

This analysis looks at the W code W189, see [this CMS document](https://www.cms.gov/regulations-and-guidance/guidance/manuals/downloads/som107ap_j_intermcare.pdf) for more information about W codes. The regulatory reference for W189 states: "The facility must provide each employee with initial and continuing training
that enables the employee to perform his or her duties effectively, efficiently, and
competently."

In [18]:
training_deficiencies = (
    deficiencies
    .loc[
        lambda x: (x["tag"] == "W0189") & x["survey_key"].isin(recent_surveys["survey_key"])
    ]
    .copy()
    .sort_values("tag", ascending=False)
    .merge(
        surveys[["survey_key", "provider_id", "type", "subtype"]],
        how="left",
        on="survey_key"
    )
    .merge(
        facs[["provider_id", "name", "state", "is_brightspring"]],
        how="left",
        on="provider_id"
    )
)

In [19]:
training_def_counts = (
    training_deficiencies
    .groupby(["is_brightspring", "tag"])["survey_key"]
    .count()
    .unstack()
)

In [20]:
training_def_counts

tag,W0189
is_brightspring,Unnamed: 1_level_1
False,535
True,121


---

---

---