**Principles Of Data Science**

JoshaLynn Worth

02.24.26


In [33]:
import pandas as pd

#Read into a pandas dataframe
df = pd.read_csv('FrailtyData.csv')

print(df)

   Height  Weight  Age  GripStrength Frailty
0    65.8     112   30            30       N
1    71.5     136   19            31       N
2    69.4     153   45            29       N
3    68.2     142   22            28       Y
4    67.8     144   29            24       Y
5    68.7     123   50            26       N
6    69.8     141   51            22       Y
7    70.1     136   23            20       Y
8    67.9     112   17            19       N
9    66.8     120   39            31       N


#Part A (Unit Standardization)

In [34]:
#replace height with conversion to meters
df["Height"] = df["Height"] * 0.0254
print(df)

    Height  Weight  Age  GripStrength Frailty
0  1.67132     112   30            30       N
1  1.81610     136   19            31       N
2  1.76276     153   45            29       N
3  1.73228     142   22            28       Y
4  1.72212     144   29            24       Y
5  1.74498     123   50            26       N
6  1.77292     141   51            22       Y
7  1.78054     136   23            20       Y
8  1.72466     112   17            19       N
9  1.69672     120   39            31       N


In [35]:
#replace weight with conversion lb -> kg
df["Weight"] = df["Weight"] * .45359237
print(df)

    Height     Weight  Age  GripStrength Frailty
0  1.67132  50.802345   30            30       N
1  1.81610  61.688562   19            31       N
2  1.76276  69.399633   45            29       N
3  1.73228  64.410117   22            28       Y
4  1.72212  65.317301   29            24       Y
5  1.74498  55.791862   50            26       N
6  1.77292  63.956524   51            22       Y
7  1.78054  61.688562   23            20       Y
8  1.72466  50.802345   17            19       N
9  1.69672  54.431084   39            31       N


#Part B (Feature Engineering)

In [36]:
#add a new column
#Calc the BMI height
df['BMI'] = df['Weight'] / (df['Height'] ** 2)

print(df)

    Height     Weight  Age  GripStrength Frailty        BMI
0  1.67132  50.802345   30            30       N  18.187146
1  1.81610  61.688562   19            31       N  18.703597
2  1.76276  69.399633   45            29       N  22.334220
3  1.73228  64.410117   22            28       Y  21.464358
4  1.72212  65.317301   29            24       Y  22.024264
5  1.74498  55.791862   50            26       N  18.322720
6  1.77292  63.956524   51            22       Y  20.347290
7  1.78054  61.688562   23            20       Y  19.458134
8  1.72466  50.802345   17            19       N  17.079564
9  1.69672  54.431084   39            31       N  18.907174


In [37]:
#Add a new column AgeGroup
#seprate ito gorups <30 , 30-45, 45-60,>60

df['AgeGroup'] = pd.cut(df['Age'], bins=[0, 30, 45, 60, float('inf')], labels=["<30","30-45","45-60",">60" ])

print(df)

    Height     Weight  Age  GripStrength Frailty        BMI AgeGroup
0  1.67132  50.802345   30            30       N  18.187146      <30
1  1.81610  61.688562   19            31       N  18.703597      <30
2  1.76276  69.399633   45            29       N  22.334220    30-45
3  1.73228  64.410117   22            28       Y  21.464358      <30
4  1.72212  65.317301   29            24       Y  22.024264      <30
5  1.74498  55.791862   50            26       N  18.322720    45-60
6  1.77292  63.956524   51            22       Y  20.347290    45-60
7  1.78054  61.688562   23            20       Y  19.458134      <30
8  1.72466  50.802345   17            19       N  17.079564      <30
9  1.69672  54.431084   39            31       N  18.907174    30-45


#Part C (Categorical --> Numeric Encoding)

In [38]:
#Binary encoding: Frailty_binary (Y→1, N→0, store as int8)
#wanna use mapping

map = { 'Y': 1, 'N': 0}

df['Frailty'] = df['Frailty'].map(map)

#Almost missed this part, good when the vlaues dont need so much memory will
#make program more efficent!
df['Frailty'] = df['Frailty'].astype('int8')


print(df)



    Height     Weight  Age  GripStrength  Frailty        BMI AgeGroup
0  1.67132  50.802345   30            30        0  18.187146      <30
1  1.81610  61.688562   19            31        0  18.703597      <30
2  1.76276  69.399633   45            29        0  22.334220    30-45
3  1.73228  64.410117   22            28        1  21.464358      <30
4  1.72212  65.317301   29            24        1  22.024264      <30
5  1.74498  55.791862   50            26        0  18.322720    45-60
6  1.77292  63.956524   51            22        1  20.347290    45-60
7  1.78054  61.688562   23            20        1  19.458134      <30
8  1.72466  50.802345   17            19        0  17.079564      <30
9  1.69672  54.431084   39            31        0  18.907174    30-45


In [39]:
#we could use get dummies but wanna use one hot encoder
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(sparse_output=False)

encoded = encoder.fit_transform(df[['AgeGroup']])

encoded_df = pd.DataFrame(
    encoded,
    columns=encoder.get_feature_names_out(["AgeGroup"])
)

df = pd.concat([df, encoded_df], axis=1)

print(df)

    Height     Weight  Age  GripStrength  Frailty        BMI AgeGroup  \
0  1.67132  50.802345   30            30        0  18.187146      <30   
1  1.81610  61.688562   19            31        0  18.703597      <30   
2  1.76276  69.399633   45            29        0  22.334220    30-45   
3  1.73228  64.410117   22            28        1  21.464358      <30   
4  1.72212  65.317301   29            24        1  22.024264      <30   
5  1.74498  55.791862   50            26        0  18.322720    45-60   
6  1.77292  63.956524   51            22        1  20.347290    45-60   
7  1.78054  61.688562   23            20        1  19.458134      <30   
8  1.72466  50.802345   17            19        0  17.079564      <30   
9  1.69672  54.431084   39            31        0  18.907174    30-45   

   AgeGroup_30-45  AgeGroup_45-60  AgeGroup_<30  
0             0.0             0.0           1.0  
1             0.0             0.0           1.0  
2             1.0             0.0           0.

#Part D EDA & Reporting

In [42]:
#Creating the Summary table with mean,median,std and sace to report findings.md
#This method is better because you .agg built in function
sum_table = df.select_dtypes(include='number').agg(['mean','median','std'])
sum_table = sum_table.round(2)

print(sum_table)
#now report findings!

        Height  Weight    Age  GripStrength  Frailty    BMI  AgeGroup_30-45  \
mean      1.74   59.83  32.50         26.00     0.40  19.68            0.20   
median    1.74   61.69  29.50         27.00     0.00  19.18            0.00   
std       0.04    6.46  12.86          4.52     0.52   1.78            0.42   

        AgeGroup_45-60  AgeGroup_<30  
mean              0.20          0.60  
median            0.00          1.00  
std               0.42          0.52  


In [48]:
#Quantify relation of strength ↔ frailty: compute correlation between Grip_kg
#and Frailty_binary, and report it.

correlation = df['GripStrength'].corr(df['Frailty'])

print(correlation)

print('As grip strength goes up this means that Fraility goes down. \nThe .47 indicates a moderate realationship')


-0.4758668672668007
As grip strength goes up this means that Fraility goes down. 
The .47 indicates a moderate realationship
