In [None]:
import pandas as pd
import os

# Step 1: Load dataset from Excel file with debugging
df = pd.read_excel("/content/RAW_DATA.xlsx")

# Debugging: Check if data is loaded
print("Dataset loaded successfully. First 5 rows:")
print(df.head())

# Debugging: Check column names
print("Column names:", df.columns)

# Step 2: Ensure correct sheet is read (if multiple sheets exist)
xls = pd.ExcelFile("/content/RAW_DATA.xlsx")
print("Available sheets:", xls.sheet_names)
df = pd.read_excel("/content/RAW_DATA.xlsx", sheet_name=0)  # Adjust sheet_name if needed

# Step 3: Convert categorical values to binary if 'Frailty' column exists
if 'Frailty' in df.columns:
    df['Frailty'] = df['Frailty'].map({'N': 0, 'Y': 1})
else:
    print("Error: 'Frailty' column not found!")

# Debugging: Check if data conversion worked
if 'Frailty' in df.columns:
    print("Unique values in 'Frailty' after conversion:", df['Frailty'].unique())

# Step 4: Check for hidden blank rows
print("Checking first 20 rows for hidden blanks:")
print(df.head(20))

# Step 5: Drop only FULLY empty rows
print("Rows before dropna:", len(df))
df = df.dropna(how='all')
print("Rows after dropna:", len(df))

# Step 6: Save cleaned data
df.to_csv("/content/frailty_cleaned.csv", index=False)
print("Cleaned dataset saved at /content/frailty_cleaned.csv")

# Step 7: Verify the saved file
df_cleaned = pd.read_csv("/content/frailty_cleaned.csv")
print("Cleaned Data Preview:")
print(df_cleaned.head(10))

Dataset loaded successfully. First 5 rows:
   Height   Weight  Age  Gripstrength  Unnamed: 4 Frailty
0     65.8     112   30            30         NaN       N
1     71.5     136   19            31         NaN       N
2     69.4     153   45            29         NaN       N
3     68.2     142   22            28         NaN       Y
4     67.8     144   29            24         NaN       Y
Column names: Index(['Height ', 'Weight', 'Age', 'Gripstrength', 'Unnamed: 4', 'Frailty'], dtype='object')
Available sheets: ['Sheet1']
Unique values in 'Frailty' after conversion: [0 1]
Checking first 20 rows for hidden blanks:
   Height   Weight  Age  Gripstrength  Unnamed: 4  Frailty
0     65.8     112   30            30         NaN        0
1     71.5     136   19            31         NaN        0
2     69.4     153   45            29         NaN        0
3     68.2     142   22            28         NaN        1
4     67.8     144   29            24         NaN        1
5     68.7     123   50   