In [3]:
!pip install fpdf

Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fpdf
  Building wheel for fpdf (setup.py) ... [?25l[?25hdone
  Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40704 sha256=cbd6fe41899df476191351a31fcd35da0f6a64501395f245f5f9955d8ed06f76
  Stored in directory: /root/.cache/pip/wheels/65/4f/66/bbda9866da446a72e206d6484cd97381cbc7859a7068541c36
Successfully built fpdf
Installing collected packages: fpdf
Successfully installed fpdf-1.7.2


In [6]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from fpdf import FPDF
import os

data_dir = '/content/drive/MyDrive/PDS_Assignment/Question_1/data/'
results_dir = '/content/drive/MyDrive/PDS_Assignment/Question_1/results/'

os.makedirs(data_dir, exist_ok=True)
os.makedirs(results_dir + '/visualizations', exist_ok=True)

raw_data = {
    'Height': [65.8, 71.5, 69.4, 68.2, 67.8, 68.7, 69.8, 70.1, 67.9, 66.8],
    'Weight': [112, 136, 153, 142, 144, 123, 141, 136, 112, 120],
    'Age': [30, 19, 45, 22, 29, 50, 51, 23, 17, 39],
    'Grip strength': [30, 31, 29, 28, 24, 26, 22, 20, 19, 31],
    'Frailty': ['N', 'N', 'N', 'Y', 'Y', 'N', 'Y', 'Y', 'N', 'N']
}

df_raw = pd.DataFrame(raw_data)
df_raw.to_csv(data_dir + 'raw_data.csv', index=False)

df_raw['Frailty'] = df_raw['Frailty'].map({'N': 0, 'Y': 1})
df_raw.to_csv(data_dir + 'cleaned_data.csv', index=False)

df_raw['BMI'] = df_raw['Weight'] / (df_raw['Height'] * 0.0254)**2
df_raw.to_csv(data_dir + 'processed_data.csv', index=False)

correlation = df_raw['Grip strength'].corr(df_raw['Frailty'])

plt.figure(figsize=(8, 6))
sns.scatterplot(x='Grip strength', y='Frailty', data=df_raw)
plt.title('Grip Strength vs. Frailty')
plt.xlabel('Grip Strength (kg)')
plt.ylabel('Frailty (0 = No, 1 = Yes)')
plt.savefig(results_dir + 'visualizations/scatter_plot.png')
plt.close()

plt.figure(figsize=(8, 6))
sns.boxplot(x='Frailty', y='Grip strength', data=df_raw)
plt.title('Grip Strength by Frailty Status')
plt.xlabel('Frailty (0 = No, 1 = Yes)')
plt.ylabel('Grip Strength (kg)')
plt.savefig(results_dir + 'visualizations/box_plot.png')
plt.close()

plt.figure(figsize=(8, 6))
sns.histplot(df_raw['Grip strength'], kde=True)
plt.title('Distribution of Grip Strength')
plt.xlabel('Grip Strength (kg)')
plt.ylabel('Frequency')
plt.savefig(results_dir + 'visualizations/grip_strength_histogram.png')
plt.close()

plt.figure(figsize=(8, 6))
sns.countplot(x='Frailty', data=df_raw)
plt.title('Frailty Count')
plt.xlabel('Frailty (0 = No, 1 = Yes)')
plt.ylabel('Count')
plt.savefig(results_dir + 'visualizations/frailty_count_plot.png')
plt.close()

plt.figure(figsize=(8, 6))
sns.regplot(x='Age', y='Grip strength', data=df_raw)
plt.title('Grip Strength vs. Age')
plt.xlabel('Age (years)')
plt.ylabel('Grip Strength (kg)')
plt.savefig(results_dir + 'visualizations/age_grip_strength_regression.png')
plt.close()

with open(results_dir + 'analysis_results.txt', 'w') as f:
    f.write(f"Correlation between grip strength and frailty: {correlation}\n")
    f.write("Box plot, scatter plot, grip strength histogram, frailty count plot, and regression plot saved in the visualizations folder.")

pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", 'B', 16)
pdf.cell(200, 10, txt="Frailty Analysis Report", ln=True, align="C")

pdf.set_font("Arial", 'B', 12)
pdf.ln(10)
pdf.cell(200, 10, txt="1. Correlation Between Grip Strength and Frailty", ln=True)

pdf.set_font("Arial", size=12)
pdf.multi_cell(0, 10, txt=f"The correlation between grip strength and frailty is {correlation:.2f}.")

pdf.ln(10)
pdf.cell(200, 10, txt="2. Visualizations", ln=True)

pdf.image(results_dir + "visualizations/scatter_plot.png", x=10, y=pdf.get_y(), w=180)

pdf.ln(90)
pdf.image(results_dir + "visualizations/box_plot.png", x=10, y=pdf.get_y(), w=180)

pdf.ln(90)
pdf.image(results_dir + "visualizations/grip_strength_histogram.png", x=10, y=pdf.get_y(), w=180)

pdf.ln(90)
pdf.image(results_dir + "visualizations/frailty_count_plot.png", x=10, y=pdf.get_y(), w=180)

pdf.ln(90)
pdf.image(results_dir + "visualizations/age_grip_strength_regression.png", x=10, y=pdf.get_y(), w=180)

pdf.output(results_dir + "frailty_analysis_report.pdf")


''