In [15]:
import os

def create_project_structure(base_dir):
    # Create the directory structure
    directories = [
        'data_raw',
        'data_clean',
        'results',
        'src'
    ]

    for directory in directories:
        os.makedirs(os.path.join(base_dir, directory), exist_ok=True)

base_dir = 'frailty_project'
create_project_structure(base_dir)



In [16]:
import pandas as pd

# Create the raw frailty data
data = {
    'Height': [65.8, 71.5, 69.4, 68.2, 67.8, 68.7, 69.8, 70.1, 67.9, 66.8],
    'Weight': [112, 136, 153, 142, 144, 123, 141, 136, 112, 120],
    'Age': [30, 19, 45, 22, 29, 50, 51, 23, 17, 39],
    'Grip_strength': [30, 31, 29, 28, 24, 26, 22, 20, 19, 31],
    'Frailty': ['N', 'N', 'N', 'Y', 'Y', 'N', 'Y', 'Y', 'N', 'N']
}

df = pd.DataFrame(data)

# Save the raw data to the data_raw directory
df.to_csv(os.path.join(base_dir, 'data_raw/frailty_data.csv'), index=False)


In [17]:
readme_content = """
Frailty Data Description:
- This dataset contains measurements from 10 female participants.
- Columns include:
    - Height: Height in inches.
    - Weight: Weight in pounds.
    - Age: Age in years.
    - Grip_strength: Hand grip strength in kilograms.
    - Frailty: A binary categorical variable (Y = frail, N = not frail).
"""

with open(os.path.join(base_dir, 'data_raw/README.txt'), 'w') as f:
    f.write(readme_content)


In [18]:
# Step 3: Write the cleaning script
def write_cleaning_script(base_dir):
    clean_code = '''\
import pandas as pd
import os

# Load the raw data
df = pd.read_csv(os.path.join('data_raw', 'frailty_data.csv'))

# Remove rows with missing values (not applicable here, but good practice)
df.dropna(inplace=True)

# Convert categorical variables to numeric
df['Frailty'] = df['Frailty'].map({'N': 0, 'Y': 1})

# Save the cleaned data
df.to_csv(os.path.join('data_clean', 'clean_frailty_data.csv'), index=False)
print("Cleaned data saved as 'clean_frailty_data.csv'")
'''

    with open(os.path.join(base_dir, 'src', 'clean_data.py'), 'w') as f:
        f.write(clean_code)

# Step 4: Write the analysis script
def write_analysis_script(base_dir):
    analysis_code = '''\
import pandas as pd
import os

# Load the cleaned data
df = pd.read_csv(os.path.join('data_clean', 'clean_frailty_data.csv'))

# Perform basic analysis
mean_grip_strength = df['Grip_strength'].mean()
frailty_count = df['Frailty'].value_counts()

# Save results to a text file
with open(os.path.join('results', 'analysis_results.txt'), 'w') as f:
    f.write(f"Mean Grip Strength: {mean_grip_strength:.2f} kg\\n")
    f.write("Frailty Counts:\\n")
    f.write(f"{frailty_count}\\n")
print("Analysis results saved to 'analysis_results.txt'")
'''

    with open(os.path.join(base_dir, 'src', 'analysis.py'), 'w') as f:
        f.write(analysis_code)



In [19]:
import pandas as pd

# Load the raw data
raw_data_path = os.path.join(base_dir, 'data_raw/frailty_data.csv')
df = pd.read_csv(raw_data_path)

# Clean the data (e.g., remove missing values)
df_clean = df.dropna()

# Save the cleaned data to the data_clean directory
clean_data_path = os.path.join(base_dir, 'data_clean/clean_frailty_data.csv')
df_clean.to_csv(clean_data_path, index=False)

print("Data cleaning complete. Clean data saved to:", clean_data_path)




Data cleaning complete. Clean data saved to: frailty_project/data_clean/clean_frailty_data.csv


In [20]:
import pandas as pd

# Load the cleaned data
clean_data_path = os.path.join(base_dir, 'data_clean/clean_frailty_data.csv')
df_clean = pd.read_csv(clean_data_path)

# Perform a simple analysis: summary statistics
summary_stats = df_clean.describe()

# Check grip strength by frailty status
frailty_summary = df_clean.groupby('Frailty')['Grip_strength'].mean()

# Write the analysis results to a text file
analysis_results = f"""
Summary Statistics:
{summary_stats}

Average Grip Strength by Frailty Status:
{frailty_summary}
"""

with open(os.path.join(base_dir, 'results/analysis_results.txt'), 'w') as f:
    f.write(analysis_results)

print("Analysis complete. Results saved to results/analysis_results.txt")


Analysis complete. Results saved to results/analysis_results.txt


In [21]:
cat frailty_project/results/analysis_results.txt



Summary Statistics:
          Height      Weight        Age  Grip_strength
count  10.000000   10.000000  10.000000      10.000000
mean   68.600000  131.900000  32.500000      26.000000
std     1.670662   14.231811  12.860361       4.521553
min    65.800000  112.000000  17.000000      19.000000
25%    67.825000  120.750000  22.250000      22.500000
50%    68.450000  136.000000  29.500000      27.000000
75%    69.700000  141.750000  43.500000      29.750000
max    71.500000  153.000000  51.000000      31.000000

Average Grip Strength by Frailty Status:
Frailty
N    27.666667
Y    23.500000
Name: Grip_strength, dtype: float64
