In [1]:
import pandas as pd

# Data dictionary
data = {
    'Height': [65.8, 71.5, 69.4, 68.2, 67.8, 68.7, 69.8, 70.1, 67.9, 66.8],
    'Weight': [112, 136, 153, 142, 144, 123, 141, 136, 112, 120],
    'Age': [30, 19, 45, 22, 29, 50, 51, 23, 17, 39],
    'Grip strength': [30, 31, 29, 28, 24, 26, 22, 20, 19, 31],
    'Frailty': ['N', 'N', 'N', 'Y', 'Y', 'N', 'Y', 'Y', 'N', 'N']
}

# Create DataFrame
df = pd.DataFrame(data)


# Check for missing values
print("Missing values before cleaning:\n", df.isnull().sum())




# Remove duplicates if any
df.drop_duplicates(inplace=True)

# Ensure proper data types
df['Height'] = df['Height'].astype(float)  # Convert height to float
df['Weight'] = df['Weight'].astype(float)  # Convert weight to float
df['Age'] = df['Age'].astype(int)  # Convert age to integer
df['Grip strength'] = df['Grip strength'].astype(int)  # Convert grip strength to integer
df['Frailty'] = df['Frailty'].astype(str)  # Ensure frailty is string (categorical)

# Clean and validate categorical columns (Frailty)
df['Frailty'] = df['Frailty'].str.upper()  # Ensure consistency in case (Y/N)
df['Frailty'] = df['Frailty'].replace({'Yes': 'Y', 'No': 'N'})  # Replace variations if needed

# Display cleaned data
print("\nCleaned DataFrame:\n", df)




Missing values before cleaning:
 Height           0
Weight           0
Age              0
Grip strength    0
Frailty          0
dtype: int64

Cleaned DataFrame:
    Height  Weight  Age  Grip strength Frailty
0    65.8   112.0   30             30       N
1    71.5   136.0   19             31       N
2    69.4   153.0   45             29       N
3    68.2   142.0   22             28       Y
4    67.8   144.0   29             24       Y
5    68.7   123.0   50             26       N
6    69.8   141.0   51             22       Y
7    70.1   136.0   23             20       Y
8    67.9   112.0   17             19       N
9    66.8   120.0   39             31       N
