****#### **Import all necessary libraries**

In [1099]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

### **Data loading**

##### List first five samples

In [1100]:
df = pd.read_csv("../_data/tb-rsqa.csv", encoding="ISO-8859-1")

#### List of Columns

In [1101]:
for col in df.columns:
    print(col)

Name of Supervisor 1
Name of Supervisor 2
HF Name
GeneXpert-site
HF Type
HF TB Type
Province
District
Sub_District
Period of implementation
FY
Domaine d'activités du NSP 2013-2018
Summary group
Category of activity
No_Old
NO
Normes 
Element à verifier
Procedures 
Num
Den
% (don't use)
Commentaires
How many Times


### **Data Cleaning**

In [1102]:
df = df.copy()

#### 1. Extra relevant columns

In [1103]:
relevant_cols = [
    'HF Name',               # Health facility name
    'GeneXpert-site',        # Whether the site has GeneXpert
    'HF Type',               # Hospital or Health Center
    'HF TB Type',            # TB type (e.g., CTD(Center for Treatment & Diagnosis), CT(Center for Treatment))
    'FY',                    # Fiscal year
    'Province', 'District',  # Location info
    'Summary group',         # indicators sections
    'Normes ',                # question
    'Element à verifier',    # What was verified
    'Num',                   # Numerator of the indicator
    'Den',                   # Denominator of the indicator
];
new_df = df[relevant_cols].copy()
new_df.shape[0]

25647

#### 2. Handling Duplicates

In [1104]:
# Check for duplicates
print(f"Number of duplicates: {new_df.duplicated().sum()}")

# Remove duplicates
new_df = new_df.drop_duplicates()

# Verify
print(f"Data after removing duplicates: {new_df.shape}")


Number of duplicates: 176
Data after removing duplicates: (25471, 12)


#### 3. Handling missing values

In [1105]:
# Check for missing values
missing_values = new_df.isnull().sum()
print(f"\nMissing Values:\n{missing_values[missing_values > 0]}")



Missing Values:
Normes                9711
Element à verifier    1089
Num                   4613
Den                   2201
dtype: int64


In [1106]:
new_df["Element à verifier"] = new_df["Element à verifier"].fillna(new_df["Normes "])
print(f"Data after handling missing values: {new_df.shape}")

Data after handling missing values: (25471, 12)


#### 4. Rename columns

In [1107]:
# Rename columns to make them cleaner and consistent
new_df.columns = new_df.columns.str.strip().str.lower().str.replace(' ', '_')

new_df.rename(columns={
    'element_à_verifier': 'item_name',
    'genexpert-site': 'genexpert_site',
}, inplace=True)

# Verify the new column names
print(f"Renamed columns: {new_df.columns}")

Renamed columns: Index(['hf_name', 'genexpert_site', 'hf_type', 'hf_tb_type', 'fy', 'province',
       'district', 'summary_group', 'normes', 'item_name', 'num', 'den'],
      dtype='object')


##### 5. Data Types Validation

In [1108]:
new_df["num"] = pd.to_numeric(new_df["num"], errors="coerce")
new_df["den"] = pd.to_numeric(new_df["den"], errors="coerce")

#### **Standardize fiscal year variable**

In [1109]:
# Clean the 'FY' column by removing the 'FY' prefix and standardizing format
new_df['fy'] = new_df['fy'].str.replace('FY ', '', regex=False)

# Verify
print("\nUnique Fiscal Years after cleaning:")
print(new_df['fy'].unique())
print(new_df['fy'].value_counts())



Unique Fiscal Years after cleaning:
['2020-2021' '2021-2022' '2022-2023' '2023-2024']
fy
2023-2024    9293
2020-2021    6969
2021-2022    6486
2022-2023    2723
Name: count, dtype: int64


#### **Standardize hf_type Values**

In [1110]:
# Create a mapping to standardize 'hf_type' values
new_df.hf_type = new_df.hf_type.str.strip().str.lower()
hf_type_mapping = {
    'hospital (dh,ph,rh)': 'hospital',
}

# Apply the mapping to standardize values
new_df['hf_type'] = new_df['hf_type'].replace(hf_type_mapping)

# Verify
print("\nUnique 'hf_type' after cleaning:")
print(new_df['hf_type'].unique())



Unique 'hf_type' after cleaning:
['health center' 'hospital' 'prison' 'teaching hospital']


#### 7. Remove `Prison` and `Teaching Hospital` data

In [1111]:
# Filter out rows where 'hf_type' is either 'prison' or 'teaching hospital'
new_df = new_df[~new_df['hf_type'].isin(['prison', 'teaching hospital'])]

# Verify the removal
print("\nUnique 'hf_type' after removal:")
print(new_df['hf_type'].unique())

# Check the shape of the data after filtering
print("\nData shape after filtering:")
print(new_df.shape)



Unique 'hf_type' after removal:
['health center' 'hospital']

Data shape after filtering:
(24490, 12)


#### **Remove Specific Samples Based on HF Name**

In [1112]:
# Remove rows where 'HF Name' is 'Butare Chu Hnr (huye)' or 'kanombe RH'
new_df = new_df[~new_df['hf_name'].isin(['Butare Chu Hnr (huye)', 'kanombe RH'])]

# Verify the removal
print("\nHF Names after removal:")
print(new_df['hf_name'].unique())

# Check the shape of the data after filtering
print("\nData shape after filtering:")
print(new_df.shape)



HF Names after removal:
['Ruli CS' 'Nyange (ruli) CS' 'Rwankuba CS' 'Minazi CS' 'Ruli DH'
 'Kinigi CS' 'Nyakinama CS' 'Rwaza CS' 'Ruhengeri RH'
 'Muhoza (Ruhengeri) CS' 'Kirehe CS' 'Kirehe DH' 'Mushikiri CS'
 'Rusumo CS' 'Musaza CS' 'Kibungo RH' 'Rukira CS' 'Rukoma Sake CS'
 'Mutenderi CS' ' Gitwe DH' 'Gitwe CS' 'Gishweru CS' 'Muremure CS'
 ' Byimana CS' 'Ruhunda CS' 'Musha (rwamagana) CS' 'Munyiginya CS'
 'Rwamagana PH' 'Nyagatare CS' 'Ntoma CS' ' Nyagatare DH'
 'Nyagatare Prison' 'Cyabayaga CS' 'Rukara CS' 'Gahini DH' 'Gahini CS'
 'Rutare (kayonza) CS' 'Mukarange CS' 'Kamubuga CS' 'Karambo (gakenke) CS'
 'Mataba CS' 'Gatonde CS' 'Nemba DH' 'Kinihira PH' 'Tumba CS' 'Tare CS'
 'Kisaro CS' 'Kinihira (rulindo) CS' 'Rugarama (gatsibo) CS' 'Gitoki CS'
 'Kabarore CS' 'Gatsibo CS' 'Rwimitereri CS' 'Kiziguro DH' 'Simbi CS'
 'Mukura (Huye) CS' 'Kabutare DH' 'Agahabwa CS' 'Kibilizi (gisagara) CS'
 'Kibayi CS' 'Mugombwa CS' 'Kibilizi DH' 'Gihara CS'
 'Kamonyi (gacurabwenge) CS' 'Mugina CS' 'Kay

#### **Update hf_type for Specific Facility**

In [1113]:
# Update hf_type for 'kiziguro dh' from 'health center' to 'hospital'
new_df.loc[new_df['hf_name'] == 'kiziguro dh', 'hf_type'] = 'hospital'
new_df.loc[new_df['hf_name'] == 'gwinkwavu dh', 'hf_type'] = 'hospital'

# Verify the change
print("\nUpdated 'hf_type' for 'kiziguro dh':")
print(new_df[new_df['hf_name'] == 'kiziguro dh'][['hf_name', 'hf_type']])
print(new_df[new_df['hf_name'] == 'gwinkwavu dh'][['hf_name', 'hf_type']])

# Verify the unique hf_type values
print("\nUnique 'hf_type' after update:")
print(new_df['hf_type'].unique())



Updated 'hf_type' for 'kiziguro dh':
Empty DataFrame
Columns: [hf_name, hf_type]
Index: []
Empty DataFrame
Columns: [hf_name, hf_type]
Index: []

Unique 'hf_type' after update:
['health center' 'hospital']


#### **Correct hf_type for 'CT' Facilities Labeled as 'Hospital'**

In [1114]:
# Correct hf_type for facilities where hf_tb_type is 'CT' and hf_type is 'hospital'
new_df.loc[(new_df['hf_tb_type'] == 'CT') & (new_df['hf_type'] == 'hospital'), 'hf_type'] = 'health center'

# Verify the change
print("\nUpdated 'hf_type' for 'CT' facilities mistakenly labeled as 'hospital':")
print(new_df[(new_df['hf_tb_type'] == 'CT') & (new_df['hf_type'] == 'hospital')][['hf_name', 'hf_tb_type', 'hf_type']])

# Verify the unique hf_type values
print("\nUnique 'hf_type' after update:")
print(new_df['hf_type'].unique())
new_df.shape


Updated 'hf_type' for 'CT' facilities mistakenly labeled as 'hospital':
Empty DataFrame
Columns: [hf_name, hf_tb_type, hf_type]
Index: []

Unique 'hf_type' after update:
['health center' 'hospital']


(24349, 12)

#### **Update hf_type for Hospitals Ending with 'cs' or 'CS'**

In [1115]:
# Initial mask for mistakened hospitals
mask = new_df['hf_name'].str.endswith(('cs', 'CS')) & (new_df['hf_type'] == 'hospital')

print("Before updating hf_type to 'health center':", new_df[mask]['hf_name'].unique())

# Apply the fix
new_df.loc[mask, 'hf_type'] = 'health center'

# Recreate mask (optional: check if any were still missed)
new_mask = new_df['hf_name'].str.endswith(('cs', 'CS')) & (new_df['hf_type'] == 'hospital')

print("After updating hf_type, still marked as hospital:", new_df[new_mask]['hf_name'].unique())

Before updating hf_type to 'health center': ['Save CS' 'Ruhango CS']
After updating hf_type, still marked as hospital: []


#### **Update hf_type for Health Centers Ending with 'dh', 'ph', 'rh' or 'capitalized'**

In [1116]:
# Identify and correct hf_type for facilities whose hf_name ends with 'cs' or 'CS' but are marked as 'hospital'
mask = new_df['hf_name'].str.endswith(('dh', 'ph', 'rh', 'DH', 'PH', 'RH')) & (new_df['hf_type'] == 'health center')

print("Before updating hf_type to 'hosptials':", new_df[mask]['hf_name'].unique())

# Update the hf_type to 'health center' for those facilities
new_df.loc[mask, 'hf_type'] = 'hospital'

new_mask = new_df['hf_name'].str.endswith(('dh', 'ph', 'rh', 'DH', 'PH', 'RH')) & (new_df['hf_type'] == 'health center')

# Verify the change
print("Before updating hf_type to 'hosptials':", new_df[new_mask]['hf_name'].unique())


Before updating hf_type to 'hosptials': [' Nyagatare DH' 'Kiziguro DH' 'gwinkwavu DH']
Before updating hf_type to 'hosptials': []


#### **Extract Categorical and Numerical Columns**

In [1117]:
new_df["num"] = pd.to_numeric(new_df["num"], errors="coerce")
new_df["den"] = pd.to_numeric(new_df["den"], errors="coerce")

In [1118]:
# Extract categorical columns (typically object type or category dtype)
categorical_columns = new_df.select_dtypes(include=['object', 'category']).columns.tolist()

# Extract numerical columns (typically int64 or float64 dtype)
numerical_columns = new_df.select_dtypes(include=['number']).columns.tolist()

# Display results
print("\nCategorical Columns:")
print(categorical_columns)

print("\nNumerical Columns:")
print(numerical_columns)


Categorical Columns:
['hf_name', 'genexpert_site', 'hf_type', 'hf_tb_type', 'fy', 'province', 'district', 'summary_group', 'normes', 'item_name']

Numerical Columns:
['num', 'den']


#### **Transform Categorical Columns to Lowercase**

In [1119]:
# Loop through each categorical column and transform its values to lowercase
for col in categorical_columns:
    new_df[col] = new_df[col].str.strip().str.lower()

# Verify the changes by previewing the transformed categorical columns
print("\nPreview of Categorical Columns after Lowercasing:")
new_df.head()



Preview of Categorical Columns after Lowercasing:


Unnamed: 0,hf_name,genexpert_site,hf_type,hf_tb_type,fy,province,district,summary_group,normes,item_name,num,den
0,ruli cs,no,health center,ct,2020-2021,north,gakenke,"are algorithms for tb screening, tb diagnosis ...",toute fosa doit rendre disponible des algorith...,"les services vih/arv, consultations externes, ...",3.0,4.0
1,ruli cs,no,health center,ct,2020-2021,north,gakenke,"are tb monitoring tools existing, update and w...",les prestataires doivent avoir les connaissanc...,"les prestataires des services arv, consultatio...",4.0,4.0
2,ruli cs,no,health center,ct,2020-2021,north,gakenke,is active case finding conducted in tb high ri...,,sur 10 nouveaux prisonniers entrés au cours de...,,10.0
3,ruli cs,no,health center,ct,2020-2021,north,gakenke,is active case finding conducted in tb high ri...,,sur 10 nouveaux prisonniers sortants au cours ...,,10.0
4,ruli cs,no,health center,ct,2020-2021,north,gakenke,are tb diagnostics continuously functional (mi...,,sur les 10 derniers patients hospitalises( 5 p...,,10.0


#### **Check and Remove Duplicates**

In [1120]:
print("shape before removing duplicates:")
print(new_df.shape)
# Check for duplicates
duplicate_rows = new_df[new_df.duplicated()]

# Display the duplicate rows (if any)
print("\nDuplicate Rows:", duplicate_rows.shape)
duplicate_rows

# Remove duplicates
new_df = new_df.drop_duplicates()

# # Verify the shape of the DataFrame after removing duplicates
print("\nShape after removing duplicates:")
new_df.shape

shape before removing duplicates:
(24349, 12)

Duplicate Rows: (231, 12)

Shape after removing duplicates:


(24118, 12)

#### **version 2: Remove a few emerged teaching hospitals**

In [1121]:
# Create a boolean mask for 'hf_name' starting with specific prefixes and ending with specific suffixes
mask = new_df['hf_name'].str.startswith(("ch", "kabgayi", 'kibuye', 'kigali', 'ngoma', 'huye', 'ruhengeri', 'rwanda')) & \
       new_df['hf_name'].str.endswith(("dh", "ph", 'rh'))

# Apply the mask to remove the matching rows
new_df = new_df[~mask]

# Verify the changes
print("\nData after removing rows:")
print(new_df.head())



Data after removing rows:
   hf_name genexpert_site        hf_type hf_tb_type         fy province  \
0  ruli cs             no  health center         ct  2020-2021    north   
1  ruli cs             no  health center         ct  2020-2021    north   
2  ruli cs             no  health center         ct  2020-2021    north   
3  ruli cs             no  health center         ct  2020-2021    north   
4  ruli cs             no  health center         ct  2020-2021    north   

  district                                      summary_group  \
0  gakenke  are algorithms for tb screening, tb diagnosis ...   
1  gakenke  are tb monitoring tools existing, update and w...   
2  gakenke  is active case finding conducted in tb high ri...   
3  gakenke  is active case finding conducted in tb high ri...   
4  gakenke  are tb diagnostics continuously functional (mi...   

                                              normes  \
0  toute fosa doit rendre disponible des algorith...   
1  les prestataires

#### **Remove `Prison` related question in `screening` section**

In [1122]:
# Define the items to exclude
items_to_exclude = [
    "Sur 10 nouveaux prisonniers entrÃ©s au cours de la pÃ©riode evaluÃ©e, combien ont beneficiÃ© du screening TB Ã¡ l'entrÃ©e",
    "Sur 10 nouveaux prisonniers sortants au cours de la pÃ©riode evaluÃ©e, combien ont beneficiÃ© du screening TB Ã¡ la sortie"
]

# Filter out rows where item_name is in items_to_exclude
new_df = new_df[~new_df["item_name"].isin(items_to_exclude)]

#### **Feature engineering**: 
Derive a new 'section' column by mapping 'summary_group' to broader categories


In [1123]:
section_mapping = {
    "is active case finding conducted in tb high risk groups?[questions 2, 8 and 9]": "screening",
    "are tb diagnostics continuously functional (microscopy and expert) and their results available timely (microscopy, expert and culture)?[questions 3, 4, 5, 6 and 7]": "diagnosis",
    "are tb patients early initiated on tb treatment and on art (if indicated), and their bacteriological control performed according to guidelines?[questions 16, 17, 18 and 19]": "treatment",
    "does the bmi monitored for tb patients and nutritional support provided to the eligible patients?[questions new_70 and new_71]": "treatment",
}

new_df["section"] = new_df["summary_group"].map(section_mapping)

# Keep only rows where section is screening, diagnosis, or treatment
new_df = new_df[new_df["section"].isin(["screening", "diagnosis", "treatment"])]

#### **Remove on-applicable (N/A) questions**

filter out rows that meet one or more of these conditions, meaning the question wasn't applicable:

- den is missing → can't assess
- num is missing but den > 0 → incomplete data
- num == 0 and den == 0 → nothing to measure
- num > den → logically incorrect in most performance indicators

-- starts --

In [1124]:
# summary_item_scores = (
#     new_df.groupby(
#         ['province', 'section', 'summary_group', "genexpert_site", 'item_name', 'hf_name', 'hf_tb_type', 'hf_type', 'fy']
#     )
#     .agg({'num': 'sum', 'den': 'sum'})
#     .reset_index()
# )

In [1125]:
# summary_item_scores.section.unique()

-- ends --

In [1126]:
new_df = (
    new_df.groupby(
        ['province', 'section', 'summary_group', "genexpert_site", 'item_name', 'hf_name', 'hf_tb_type', 'hf_type', 'fy']
    )
    .agg({'num': 'sum', 'den': 'sum'})
    .reset_index()
)

In [1127]:
# Define valid cases
valid_df = new_df[
    (new_df["num"] <= new_df["den"]) &  # num must not be greater than den
    ~(new_df["num"].isna() & new_df["den"] > 0) &  # Exclude cases where num is NaN but den > 0
    ~((new_df["num"] == 0) & (new_df["den"] == 0)) &  # Exclude cases where both num and den are 0
    ~(new_df["den"].isna())  # Exclude cases where den is NaN (new fix)
]

invalid_df = new_df[
    (new_df["num"] > new_df["den"]) |  # Invalid if num > den
    ((new_df["num"].isna()) & (new_df["den"] > 0)) |  # Invalid if num is NaN but den > 0
    ((new_df["num"] == 0) & (new_df["den"] == 0)) |  # Invalid if both num and den are 0
    (new_df["den"].isna()) |  # Invalid if den is NaN
    ((new_df["num"].isna()) & (new_df["den"] == 0))  # NEW FIX: Invalid if num is NaN and den = 0
]

print("df size: ", new_df.shape)
print("valid size: ", valid_df.shape)
print("invalid size: ", invalid_df.shape)
new_df = valid_df

df size:  (6640, 11)
valid size:  (5950, 11)
invalid size:  (690, 11)


# `PART 2`

### **Approach #1**

`1`

In [1129]:
screening_hc = new_df[
    (new_df['section'] == 'screening') &
    (new_df['hf_type'] == 'health center')
]

# Group by fy and facility, aggregate num and den
facility_scores = screening_hc.groupby(['fy', 'hf_name']).agg(
    total_num=('num', 'sum'),
    total_den=('den', 'sum')
).reset_index()

# Recalculate score_out_of_100
facility_scores['score_out_of_100'] = 100 * facility_scores['total_num'] / facility_scores['total_den']

# Then average across all facilities per year
hc_result = facility_scores.groupby('fy').agg(
    average_score=('score_out_of_100', 'mean'),
    num_facilities=('hf_name', 'nunique')
).reset_index()
hc_result


Unnamed: 0,fy,average_score,num_facilities
0,2020-2021,81.552145,74
1,2021-2022,45.021951,69
2,2022-2023,39.695691,31
3,2023-2024,40.619488,69


In [None]:
facility_scores

Unnamed: 0,fy,hf_name,total_num,total_den,score_out_of_100
0,2020-2021,agahabwa cs,20.0,20.0,100.000000
1,2020-2021,buramba cs,15.0,15.0,100.000000
2,2020-2021,bushara cs,10.0,10.0,100.000000
3,2020-2021,busoro cs,18.0,20.0,90.000000
4,2020-2021,byimana cs,18.0,20.0,90.000000
...,...,...,...,...,...
237,2023-2024,rusoro cs,20.0,20.0,100.000000
238,2023-2024,rutonde cs,14.0,15.0,93.333333
239,2023-2024,rwahi cs,13.0,20.0,65.000000
240,2023-2024,rwankuba cs,20.0,20.0,100.000000


In [None]:
fy_summary = valid_df.groupby(
    ['section', 'hf_tb_type', 'hf_type', 'fy']
).apply(lambda x: (x["num"].sum() / x["den"].sum()) * 100).reset_index(name="overall_score")
fy_summary

Unnamed: 0,section,hf_tb_type,hf_type,fy,overall_score
0,diagnosis,cdt,health center,2020-2021,87.123116
1,diagnosis,cdt,health center,2021-2022,73.961562
2,diagnosis,cdt,health center,2022-2023,81.526718
3,diagnosis,cdt,health center,2023-2024,85.706278
4,diagnosis,cdt,hospital,2020-2021,86.382475
5,diagnosis,cdt,hospital,2021-2022,88.619718
6,diagnosis,cdt,hospital,2022-2023,83.281734
7,diagnosis,cdt,hospital,2023-2024,85.815603
8,diagnosis,ct,health center,2020-2021,62.602881
9,diagnosis,ct,health center,2021-2022,68.004169


`2`

In [None]:
STOP

NameError: name 'STOP' is not defined

### **Approach #2**

`1`

In [764]:
summary_item_scores['score_out_of_100'] = 100 * summary_item_scores['num'] / summary_item_scores['den']

screening_hc = summary_item_scores[
    (summary_item_scores['section'] == 'screening') &
    (summary_item_scores['hf_type'] == 'health center')
]

hc_result = screening_hc.groupby('fy').agg(
    average_score=('score_out_of_100', 'mean'),
    num_facilities=('hf_name', 'nunique')
).reset_index()

hc_result


Unnamed: 0,fy,average_score,num_facilities
0,2020-2021,79.565217,74
1,2021-2022,49.45723,69
2,2022-2023,46.54321,31
3,2023-2024,47.218915,69


`2`

In [758]:
# Step 1: Filter the DataFrame
screening_hc = summary_item_scores[
    (summary_item_scores['section'] == 'screening') &
    (summary_item_scores['hf_type'] == 'health center')
]

# step 2: Group by fiscal year
hc_result = screening_hc.groupby('fy').agg(
    average_score=('score_out_of_100', 'mean'),
    num_facilities=('hf_name', 'nunique'),
).reset_index()
hc_result

Unnamed: 0,fy,average_score,num_facilities
0,2020-2021,79.565217,74
1,2021-2022,49.45723,69
2,2022-2023,46.54321,31
3,2023-2024,47.218915,69


In [None]:
# First calculate the overall_score as you were doing
fy_summary = valid_df.groupby(["province", 'fy']).apply(
    lambda x: (x["num"].sum() / x["den"].sum()) * 100
).reset_index(name="overall_score")

# Now calculate the hf_count by counting unique health facilities in each group
hf_counts = valid_df.groupby(["province", 'fy'])["hf_name"].nunique().reset_index(name="hf_count")

# Merge the two dataframes to get all the columns you need
result_df = pd.merge(fy_summary, hf_counts, on=["province", 'fy'], how="left")
result_df 

Unnamed: 0,province,fy,overall_score,hf_count
0,eastern,2020-2021,73.58169,34
1,eastern,2021-2022,49.876758,41
2,eastern,2023-2024,49.062934,43
3,kigali,2023-2024,56.275862,17
4,northern,2020-2021,63.286945,25
5,northern,2023-2024,53.051714,34
6,southern,2020-2021,68.149211,36
7,southern,2021-2022,60.967796,49
8,southern,2023-2024,65.794907,9
9,western,2022-2023,44.524561,39


`Overall score and count by 'Health facility type'`

In [644]:
# First calculate the overall_score as you were doing
fy_summary = valid_df.groupby(["hf_type", 'fy']).apply(
    lambda x: (x["num"].sum() / x["den"].sum()) * 100
).reset_index(name="overall_score")

# Now calculate the hf_count by counting unique health facilities in each group
hf_counts = valid_df.groupby(["hf_type", 'fy'])["hf_name"].nunique().reset_index(name="hf_count")

# Merge the two dataframes to get all the columns you need
result_df = pd.merge(fy_summary, hf_counts, on=["hf_type", 'fy'], how="left")
result_df 

Unnamed: 0,hf_type,fy,overall_score,hf_count
0,health center,2020-2021,64.8971,74
1,health center,2021-2022,54.702987,69
2,health center,2022-2023,40.765172,31
3,health center,2023-2024,44.139361,69
4,"hospital (dh,ph,rh)",2020-2021,81.016334,21
5,"hospital (dh,ph,rh)",2021-2022,66.345062,21
6,"hospital (dh,ph,rh)",2022-2023,59.289415,8
7,"hospital (dh,ph,rh)",2023-2024,67.406334,42
