### --- Setup, Config, and Imports ---

In [None]:
# --- Setup ---
import pandas as pd
import os
import sys
import matplotlib.pyplot as plt
import seaborn as sns

 # To resolve relative imports
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from scripts import fetch_cms_data, clean_column_names, drop_empty_columns, encode_categoricals, normalize_columns

# Global configs
pd.set_option('display.max_columns', None)
DATA_PATH = "data/raw"
SAVE_PATH = "data/clean"
os.makedirs(SAVE_PATH, exist_ok=True)

### --- Load Data ---

In [3]:
API_URL = "https://data.cms.gov/data-api/v1/dataset/d086edc0-4953-4fb9-a663-b35526371add/data"
print("Fetching data frmo CMS API...")
df = fetch_cms_data(API_URL)
print(f"Data shape: {df.shape}")

Fetching data frmo CMS API...
Data shape: (1000, 16)


### --- Initial Cleaning ---

In [4]:
print("Cleaning column names...")
df = clean_column_names(df)

print("Dropping empty columns...")
df = drop_empty_columns(df, threshold=0.2)

Cleaning column names...
Dropping empty columns...


### --- Inspecting Dataset ---

In [5]:
print("Basic Info:")
df.info()

print("Missing values (top 10 columns):")
print(df.isnull().sum().sort_values(ascending=False).head(10))

print("Sample rows:")
display(df.sample(5))

Basic Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 16 columns):
 #   Column                         Non-Null Count  Dtype 
---  ------                         --------------  ----- 
 0   ccn                            1000 non-null   object
 1   provider_name                  1000 non-null   object
 2   city                           1000 non-null   object
 3   state                          1000 non-null   object
 4   zip_code                       1000 non-null   object
 5   fips_county_code               1000 non-null   object
 6   county_name                    1000 non-null   object
 7   report_date                    1000 non-null   object
 8   mds_item_question_description  1000 non-null   object
 9   mds_item_response              1000 non-null   object
 10  overall_percent                1000 non-null   object
 11  total_residents                1000 non-null   object
 12  short_stay_percent             1000 non-null   obje

Unnamed: 0,ccn,provider_name,city,state,zip_code,fips_county_code,county_name,report_date,mds_item_question_description,mds_item_response,overall_percent,total_residents,short_stay_percent,short_stay_residents,long_stay_percent,long_stay_residents
762,15009,"BURNS NURSING HOME, INC.",RUSSELLVILLE,AL,35653,59,Franklin,"Q1, 2025",GG0170C5: Functional Abilities and Goals - Mob...,Resident refused,0,0,0,0,0,0
485,15009,"BURNS NURSING HOME, INC.",RUSSELLVILLE,AL,35653,59,Franklin,"Q1, 2025",GG0130B3: Functional Abilities and Goals - Sel...,Not attempted due to medical condition or safe...,0,0,0,0,.,0
105,15009,"BURNS NURSING HOME, INC.",RUSSELLVILLE,AL,35653,59,Franklin,"Q1, 2025",A2105: Discharge - Discharge Status/Location,Skilled Nursing Facility: Unplanned Discharge ...,0,0,0,0,0,0
690,15009,"BURNS NURSING HOME, INC.",RUSSELLVILLE,AL,35653,59,Franklin,"Q1, 2025",GG0170A3: Functional Abilities and Goals - Mob...,Setup or clean-up assistance,0,0,0,0,.,0
579,15009,"BURNS NURSING HOME, INC.",RUSSELLVILLE,AL,35653,59,Franklin,"Q1, 2025",GG0130F5: Functional Abilities and Goals - Sel...,Supervision or touching assistance,_,_,_,_,_,_
