In [1]:
import pandas as pd
import matplotlib.patches as mpatches

In [2]:
df = pd.read_csv("frailty_data.csv")
df.head()

Unnamed: 0,Height,Weight,Age,Grip strength,Frailty
0,65.8,112,30,30,N
1,71.5,136,19,31,N
2,69.4,153,45,29,N
3,68.2,142,22,28,Y
4,67.8,144,29,24,Y


In [3]:
df["Height_m"] = df["Height"] * 0.0254
df["Weight_kg"] = df["Weight"] * 0.45359237
df.head()

Unnamed: 0,Height,Weight,Age,Grip strength,Frailty,Height_m,Weight_kg
0,65.8,112,30,30,N,1.67132,50.802345
1,71.5,136,19,31,N,1.8161,61.688562
2,69.4,153,45,29,N,1.76276,69.399633
3,68.2,142,22,28,Y,1.73228,64.410117
4,67.8,144,29,24,Y,1.72212,65.317301


In [4]:
df["BMI"] = (df["Weight_kg"] / (df["Height_m"] ** 2)).round(2)
df.head()

Unnamed: 0,Height,Weight,Age,Grip strength,Frailty,Height_m,Weight_kg,BMI
0,65.8,112,30,30,N,1.67132,50.802345,18.19
1,71.5,136,19,31,N,1.8161,61.688562,18.7
2,69.4,153,45,29,N,1.76276,69.399633,22.33
3,68.2,142,22,28,Y,1.73228,64.410117,21.46
4,67.8,144,29,24,Y,1.72212,65.317301,22.02


In [5]:
def categorize_age(age):
    if age < 30:
        return "<30"
    elif 30 <= age <= 45:
        return "30-45"
    elif 46 <= age <= 60:
        return "46-60"
    else:
        return ">60"

df["AgeGroup"] = df["Age"].apply(categorize_age)
df.head()

Unnamed: 0,Height,Weight,Age,Grip strength,Frailty,Height_m,Weight_kg,BMI,AgeGroup
0,65.8,112,30,30,N,1.67132,50.802345,18.19,30-45
1,71.5,136,19,31,N,1.8161,61.688562,18.7,<30
2,69.4,153,45,29,N,1.76276,69.399633,22.33,30-45
3,68.2,142,22,28,Y,1.73228,64.410117,21.46,<30
4,67.8,144,29,24,Y,1.72212,65.317301,22.02,<30


In [6]:
df["Frailty_binary"] = df["Frailty"].map({"Y": 1, "N": 0}).astype("int8")
df.head()

Unnamed: 0,Height,Weight,Age,Grip strength,Frailty,Height_m,Weight_kg,BMI,AgeGroup,Frailty_binary
0,65.8,112,30,30,N,1.67132,50.802345,18.19,30-45,0
1,71.5,136,19,31,N,1.8161,61.688562,18.7,<30,0
2,69.4,153,45,29,N,1.76276,69.399633,22.33,30-45,0
3,68.2,142,22,28,Y,1.73228,64.410117,21.46,<30,1
4,67.8,144,29,24,Y,1.72212,65.317301,22.02,<30,1


In [12]:
age_dummies = pd.get_dummies(df["AgeGroup"], prefix="AgeGroup")
df = pd.concat([df, age_dummies], axis=1)
df.to_csv('cleaned_fratility.csv', index=False, encoding='utf-8')
df.head()

Unnamed: 0,Height,Weight,Age,Grip strength,Frailty,Height_m,Weight_kg,BMI,AgeGroup,Frailty_binary,AgeGroup_30-45,AgeGroup_46-60,AgeGroup_<30,AgeGroup_30-45.1,AgeGroup_46-60.1,AgeGroup_<30.1
0,65.8,112,30,30,N,1.67132,50.802345,18.19,30-45,0,True,False,False,True,False,False
1,71.5,136,19,31,N,1.8161,61.688562,18.7,<30,0,False,False,True,False,False,True
2,69.4,153,45,29,N,1.76276,69.399633,22.33,30-45,0,True,False,False,True,False,False
3,68.2,142,22,28,Y,1.73228,64.410117,21.46,<30,1,False,False,True,False,False,True
4,67.8,144,29,24,Y,1.72212,65.317301,22.02,<30,1,False,False,True,False,False,True


In [8]:
import numpy as np

numeric_cols = df.select_dtypes(include=[np.number]).columns

summary_table = df[numeric_cols].agg(["mean", "median", "std"]).T

print(summary_table)

                      mean      median        std
Height           68.600000   68.450000   1.670662
Weight          131.900000  136.000000  14.231811
Age              32.500000   29.500000  12.860361
Grip strength    26.000000   27.000000   4.521553
Height_m          1.742440    1.738630   0.042435
Weight_kg        59.828834   61.688562   6.455441
BMI              19.682000   19.185000   1.780972
Frailty_binary    0.400000    0.000000   0.516398


In [9]:
correlation = df["Grip strength"].corr(df["Frailty_binary"])

print("Correlation between Grip_strength and Frailty_binary:", correlation)

Correlation between Grip_strength and Frailty_binary: -0.4758668672668007


In [11]:
report_content = f"""
# Frailty Data Analysis Report

## Summary Statistics

{summary_table.to_markdown()}

## Correlation Analysis

Correlation between Grip_strength and Frailty_binary:

**{correlation:.4f}**
"""

with open("findings.md", "w") as f:
    f.write(report_content)