## Importing the required/ needed Libraries

In [33]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from scipy.stats import pointbiserialr

In [18]:
data = {
    "Height": [65.8, 71.5, 69.4, 68.2, 67.8, 68.7, 69.8, 70.1, 67.9, 66.8],
    "Weight": [112, 136, 153, 142, 144, 123, 141, 136, 112, 120],
    "Age": [30, 19, 45, 22, 29, 50, 51, 23, 17, 39],
    "Grip strength": [30, 31, 29, 28, 24, 26, 22, 20, 19, 31],
    "Frailty": ["N", "N", "N", "Y", "Y", "N", "Y", "Y", "N", "N"]
}

In [19]:
df = pd.DataFrame(data)

In [20]:
csv_file = "raw_data.csv"
df.to_csv(csv_file, index=False)

csv_file

'raw_data.csv'

In [21]:
df1 = pd.read_csv("raw_data.csv")
df1

Unnamed: 0,Height,Weight,Age,Grip strength,Frailty
0,65.8,112,30,30,N
1,71.5,136,19,31,N
2,69.4,153,45,29,N
3,68.2,142,22,28,Y
4,67.8,144,29,24,Y
5,68.7,123,50,26,N
6,69.8,141,51,22,Y
7,70.1,136,23,20,Y
8,67.9,112,17,19,N
9,66.8,120,39,31,N


In [22]:
df1.rename(columns={"Height": "Height(m)"}, inplace=True)
df1['Height(m)'] = df1['Height(m)']*0.0254
#df1["Height"].rename("Height(m)", inplace=True)
df1

Unnamed: 0,Height(m),Weight,Age,Grip strength,Frailty
0,1.67132,112,30,30,N
1,1.8161,136,19,31,N
2,1.76276,153,45,29,N
3,1.73228,142,22,28,Y
4,1.72212,144,29,24,Y
5,1.74498,123,50,26,N
6,1.77292,141,51,22,Y
7,1.78054,136,23,20,Y
8,1.72466,112,17,19,N
9,1.69672,120,39,31,N


In [23]:
df1.rename(columns={"Weight": "Weight(kg)"}, inplace=True)
df1['Weight(kg)'] = df1['Weight(kg)']*0.453592
df1

Unnamed: 0,Height(m),Weight(kg),Age,Grip strength,Frailty
0,1.67132,50.802304,30,30,N
1,1.8161,61.688512,19,31,N
2,1.76276,69.399576,45,29,N
3,1.73228,64.410064,22,28,Y
4,1.72212,65.317248,29,24,Y
5,1.74498,55.791816,50,26,N
6,1.77292,63.956472,51,22,Y
7,1.78054,61.688512,23,20,Y
8,1.72466,50.802304,17,19,N
9,1.69672,54.43104,39,31,N


In [24]:
df1.rename(columns={"Grip strength": "Grip_strength(kg)"}, inplace=True)
df1

Unnamed: 0,Height(m),Weight(kg),Age,Grip_strength(kg),Frailty
0,1.67132,50.802304,30,30,N
1,1.8161,61.688512,19,31,N
2,1.76276,69.399576,45,29,N
3,1.73228,64.410064,22,28,Y
4,1.72212,65.317248,29,24,Y
5,1.74498,55.791816,50,26,N
6,1.77292,63.956472,51,22,Y
7,1.78054,61.688512,23,20,Y
8,1.72466,50.802304,17,19,N
9,1.69672,54.43104,39,31,N


In [25]:
df1["BMI"] = df1["Weight(kg)"]/(df1["Height(m)"]*df1["Height(m)"])
df1

Unnamed: 0,Height(m),Weight(kg),Age,Grip_strength(kg),Frailty,BMI
0,1.67132,50.802304,30,30,N,18.187131
1,1.8161,61.688512,19,31,N,18.703582
2,1.76276,69.399576,45,29,N,22.334202
3,1.73228,64.410064,22,28,Y,21.46434
4,1.72212,65.317248,29,24,Y,22.024246
5,1.74498,55.791816,50,26,N,18.322705
6,1.77292,63.956472,51,22,Y,20.347273
7,1.78054,61.688512,23,20,Y,19.458118
8,1.72466,50.802304,17,19,N,17.07955
9,1.69672,54.43104,39,31,N,18.907159


In [26]:
df1['BMI'] = df1['BMI'].round(2)
df1

Unnamed: 0,Height(m),Weight(kg),Age,Grip_strength(kg),Frailty,BMI
0,1.67132,50.802304,30,30,N,18.19
1,1.8161,61.688512,19,31,N,18.7
2,1.76276,69.399576,45,29,N,22.33
3,1.73228,64.410064,22,28,Y,21.46
4,1.72212,65.317248,29,24,Y,22.02
5,1.74498,55.791816,50,26,N,18.32
6,1.77292,63.956472,51,22,Y,20.35
7,1.78054,61.688512,23,20,Y,19.46
8,1.72466,50.802304,17,19,N,17.08
9,1.69672,54.43104,39,31,N,18.91


In [27]:
def divide_age_into_categories(age):
    if age < 30:
      return "<30"
    elif age >= 30 and age <= 45:
      return "30-40"
    elif age > 45 and age <= 60:
      return "45-60"
    else:
      return ">60"

In [28]:
df1.insert(3, "Age_Category", df1["Age"].apply(divide_age_into_categories))
df1

Unnamed: 0,Height(m),Weight(kg),Age,Grip_strength(kg),Age_Category,Frailty,BMI
0,1.67132,50.802304,30,30,30-40,N,18.19
1,1.8161,61.688512,19,31,<30,N,18.7
2,1.76276,69.399576,45,29,30-40,N,22.33
3,1.73228,64.410064,22,28,<30,Y,21.46
4,1.72212,65.317248,29,24,<30,Y,22.02
5,1.74498,55.791816,50,26,45-60,N,18.32
6,1.77292,63.956472,51,22,45-60,Y,20.35
7,1.78054,61.688512,23,20,<30,Y,19.46
8,1.72466,50.802304,17,19,<30,N,17.08
9,1.69672,54.43104,39,31,30-40,N,18.91


In [29]:
df1['Frailty'] = df1['Frailty'].map({'Y': 1, 'N': 0})
df1

Unnamed: 0,Height(m),Weight(kg),Age,Grip_strength(kg),Age_Category,Frailty,BMI
0,1.67132,50.802304,30,30,30-40,0,18.19
1,1.8161,61.688512,19,31,<30,0,18.7
2,1.76276,69.399576,45,29,30-40,0,22.33
3,1.73228,64.410064,22,28,<30,1,21.46
4,1.72212,65.317248,29,24,<30,1,22.02
5,1.74498,55.791816,50,26,45-60,0,18.32
6,1.77292,63.956472,51,22,45-60,1,20.35
7,1.78054,61.688512,23,20,<30,1,19.46
8,1.72466,50.802304,17,19,<30,0,17.08
9,1.69672,54.43104,39,31,30-40,0,18.91


In [30]:
df1 = pd.concat([df1.drop(columns=["Age_Category"]),
                pd.DataFrame((enc := OneHotEncoder(sparse_output=False).fit(df1[["Age_Category"]])).transform(df1[["Age_Category"]]),
                             columns=enc.get_feature_names_out(["Age_Category"]),
                             index=df1.index)], axis=1)

In [31]:
df1

Unnamed: 0,Height(m),Weight(kg),Age,Grip_strength(kg),Frailty,BMI,Age_Category_30-40,Age_Category_45-60,Age_Category_<30
0,1.67132,50.802304,30,30,0,18.19,1.0,0.0,0.0
1,1.8161,61.688512,19,31,0,18.7,0.0,0.0,1.0
2,1.76276,69.399576,45,29,0,22.33,1.0,0.0,0.0
3,1.73228,64.410064,22,28,1,21.46,0.0,0.0,1.0
4,1.72212,65.317248,29,24,1,22.02,0.0,0.0,1.0
5,1.74498,55.791816,50,26,0,18.32,0.0,1.0,0.0
6,1.77292,63.956472,51,22,1,20.35,0.0,1.0,0.0
7,1.78054,61.688512,23,20,1,19.46,0.0,0.0,1.0
8,1.72466,50.802304,17,19,0,17.08,0.0,0.0,1.0
9,1.69672,54.43104,39,31,0,18.91,1.0,0.0,0.0


In [32]:
summary = (
    df1.select_dtypes(include="number")
      .agg(['mean', 'median', 'std'])
      .T  # rows = columns, columns = stats
)
print(summary)

                         mean     median        std
Height(m)            1.742440   1.738630   0.042435
Weight(kg)          59.828785  61.688512   6.455436
Age                 32.500000  29.500000  12.860361
Grip_strength(kg)   26.000000  27.000000   4.521553
Frailty              0.400000   0.000000   0.516398
BMI                 19.682000  19.185000   1.780972
Age_Category_30-40   0.300000   0.000000   0.483046
Age_Category_45-60   0.200000   0.000000   0.421637
Age_Category_<30     0.500000   0.500000   0.527046


In [35]:
corr, p_value = pointbiserialr(df1["Frailty"], df1["Grip_strength(kg)"])

print("Correlation:", corr)
print("P-value:", p_value)

Correlation: -0.47586686726680066
P-value: 0.16446464610511277
