In [4]:
import os
os.makedirs("Assignment1/frailty/data", exist_ok=True)
os.makedirs("Assignment1/frailty/outputs", exist_ok=True)
os.makedirs("Assignment1/frailty/reports", exist_ok=True)
print("Folders created:",
      os.listdir("Assignment1"),
      os.listdir("Assignment1/frailty"))


Folders created: ['student_performance', 'frailty'] ['outputs', 'reports', 'data']


In [5]:
raw = """Height_in,Weight_lb,Age_yr,Grip_kg,Frailty
65.8,112,30,30,N
71.5,136,19,31,N
69.4,153,45,29,N
68.2,142,22,28,Y
67.8,144,29,24,Y
68.7,123,50,26,N
69.8,141,51,22,Y
70.1,136,23,20,Y
67.9,112,17,19,N
66.8,120,39,31,N
"""
open("Assignment1/frailty/data/frailty_raw.csv","w").write(raw)
print("Raw CSV saved to Assignment1/frailty/data/frailty_raw.csv")

Raw CSV saved to Assignment1/frailty/data/frailty_raw.csv


In [11]:
import pandas as pd

df = pd.read_csv("Assignment1/frailty/data/frailty_raw.csv")

df

Unnamed: 0,Height_in,Weight_lb,Age_yr,Grip_kg,Frailty
0,65.8,112,30,30,N
1,71.5,136,19,31,N
2,69.4,153,45,29,N
3,68.2,142,22,28,Y
4,67.8,144,29,24,Y
5,68.7,123,50,26,N
6,69.8,141,51,22,Y
7,70.1,136,23,20,Y
8,67.9,112,17,19,N
9,66.8,120,39,31,N


In [6]:
import pandas as pd
import numpy as np

df = pd.read_csv("Assignment1/frailty/data/frailty_raw.csv")
# Unit standardization
df["Height_m"] = df["Height_in"] * 0.0254
df["Weight_kg"] = df["Weight_lb"] * 0.45359237

# Feature engineering
df["BMI"] = (df["Weight_kg"] / (df["Height_m"]**2)).round(2)

def age_group(age):
    if age < 30:
        return "<30"
    elif 30 <= age <= 45:
        return "30–45"   # en-dash as requested
    elif 46 <= age <= 60:
        return "46–60"
    else:
        return ">60"

df["AgeGroup"] = df["Age_yr"].apply(age_group)

# Categorical -> numeric
df["Frailty_binary"] = df["Frailty"].map({"Y":1,"N":0}).astype("int8")

# One-hot AgeGroup ensuring the exact columns exist
age_dummies = pd.get_dummies(df["AgeGroup"], prefix="AgeGroup")
for col in ["AgeGroup_<30","AgeGroup_30–45","AgeGroup_46–60","AgeGroup_>60"]:
    if col not in age_dummies:
        age_dummies[col] = 0
age_dummies = age_dummies[["AgeGroup_<30","AgeGroup_30–45","AgeGroup_46–60","AgeGroup_>60"]]
df = pd.concat([df, age_dummies], axis=1)

# Save processed
df.to_csv("Assignment1/frailty/outputs/frailty_processed.csv", index=False)
print("Processed CSV saved to Assignment1/frailty/outputs/frailty_processed.csv")
df.head()

Processed CSV saved to Assignment1/frailty/outputs/frailty_processed.csv


Unnamed: 0,Height_in,Weight_lb,Age_yr,Grip_kg,Frailty,Height_m,Weight_kg,BMI,AgeGroup,Frailty_binary,AgeGroup_<30,AgeGroup_30–45,AgeGroup_46–60,AgeGroup_>60
0,65.8,112,30,30,N,1.67132,50.802345,18.19,30–45,0,False,True,False,0
1,71.5,136,19,31,N,1.8161,61.688562,18.7,<30,0,True,False,False,0
2,69.4,153,45,29,N,1.76276,69.399633,22.33,30–45,0,False,True,False,0
3,68.2,142,22,28,Y,1.73228,64.410117,21.46,<30,1,True,False,False,0
4,67.8,144,29,24,Y,1.72212,65.317301,22.02,<30,1,True,False,False,0


In [9]:
# Summary stats (mean/median/std) for numeric columns requested
numeric_cols = ["Height_in","Weight_lb","Age_yr","Grip_kg","Height_m","Weight_kg","BMI"]
summary = df[numeric_cols].agg(["mean","median","std"]).round(2).transpose()

# Correlation (Grip_kg vs Frailty_binary)
corr = df["Grip_kg"].corr(df["Frailty_binary"])

# write findings.md
os.makedirs("Assignment1/frailty/reports", exist_ok=True)
with open("Assignment1/frailty/reports/findings.md","w") as f:
    f.write("# Frailty Findings\n\n")
    f.write("## Summary statistics (mean / median / std)\n\n")
    f.write(summary.to_markdown() + "\n\n")
    f.write("## Grip strength ↔ Frailty\n\n")
    f.write(f"- Correlation (Grip_kg vs Frailty_binary): **{corr:.4f}**\n\n")
    if corr < 0:
        f.write("- Interpretation: negative correlation → higher grip strength is associated with lower frailty (Frailty=1 indicates frailty). \n")
    else:
        f.write("- Interpretation: positive or no correlation.\n")

print("findings.md written at Assignment1/frailty/reports/findings.md")
print("Correlation value:", corr)

findings.md written at Assignment1/frailty/reports/findings.md
Correlation value: -0.4758668672668007


I created three main files: the raw data table, a processed table with converted units, BMI, age groups, and one-hot encoded columns, and a report with summary statistics and correlations.
I loaded the raw CSV, processed the data by converting height and weight, calculating BMI, assigning age groups, and encoding frailty. Then, I analyzed the data by calculating averages, standard deviations, and the correlation between grip strength and frailty.

In [12]:
# zip only the frailty folder
!zip -r Assignment1_frailty.zip Assignment1/frailty
print("Zipped to Assignment1_frailty.zip. Use the Files pane (left) to download it, or use the Colab menu: Files -> Download.")

  adding: Assignment1/frailty/ (stored 0%)
  adding: Assignment1/frailty/outputs/ (stored 0%)
  adding: Assignment1/frailty/outputs/frailty_processed.csv (deflated 58%)
  adding: Assignment1/frailty/reports/ (stored 0%)
  adding: Assignment1/frailty/reports/findings.md (deflated 45%)
  adding: Assignment1/frailty/what_I_have_done.txt (deflated 46%)
  adding: Assignment1/frailty/data/ (stored 0%)
  adding: Assignment1/frailty/data/frailty_raw.csv (deflated 33%)
  adding: Assignment1/frailty/data/raw_frailty.csv (deflated 33%)
Zipped to Assignment1_frailty.zip. Use the Files pane (left) to download it, or use the Colab menu: Files -> Download.
