<a href="https://colab.research.google.com/github/Grandediw/Threpsy/blob/main/PatientData.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

# Function to generate random patient data
def generate_patient_data(num_patients, disease_prob=0.0, obesity_prob=0.0, health_prob=0.0):
    np.random.seed(0)

    heights = np.random.normal(loc=170, scale=10, size=num_patients)  # Average height 170 cm
    weights = np.random.normal(loc=70, scale=15, size=num_patients)  # Average weight 70 kg

    activity_levels = ['Active', 'Moderate', 'Sedentary', 'IT Worker']
    activity_level_choices = np.random.choice(activity_levels, num_patients)

    diseases = ['None', 'Diabetes', 'Hypertension', 'Obesity', 'Nutrition Deficiency']
    disease_prob = disease_prob / 4  # Correcting the probability to ensure the total sums to 1
    health_prob = 1 - (disease_prob * 3 + obesity_prob)

    disease_choices = np.random.choice(diseases, num_patients, p=[
        health_prob,
        disease_prob,
        disease_prob,
        obesity_prob,
        disease_prob
    ])

    patient_data = pd.DataFrame({
        'Patient_ID': np.arange(1, num_patients + 1),
        'Height_cm': heights,
        'Weight_kg': weights,
        'Activity_Level': activity_level_choices,
        'Disease': disease_choices
    })

    return patient_data

# Function to generate patient data for athletes
def generate_athlete_data(num_athletes):
    np.random.seed(0)

    heights = np.random.normal(loc=180, scale=10, size=num_athletes)  # Average height 180 cm for athletes
    weights = np.random.normal(loc=75, scale=10, size=num_athletes)  # Average weight 75 kg for athletes

    activity_levels = ['Athlete'] * num_athletes
    diseases = ['None'] * num_athletes

    athlete_data = pd.DataFrame({
        'Patient_ID': np.arange(1, num_athletes + 1),
        'Height_cm': heights,
        'Weight_kg': weights,
        'Activity_Level': activity_levels,
        'Disease': diseases
    })

    return athlete_data

# Total number of patients
num_patients = 10000

# Randomly generate the number of patients in each category
np.random.seed(0)
category_proportions = np.random.dirichlet(np.ones(4), size=1)[0]
num_diseased_patients = int(num_patients * category_proportions[0])
num_athletes = int(num_patients * category_proportions[1])
num_obesity_patients = int(num_patients * category_proportions[2])
num_health_patients = num_patients - (num_diseased_patients + num_athletes + num_obesity_patients)

# Generate data for each category
diseased_patients = generate_patient_data(num_diseased_patients, disease_prob=1.0)
athletes = generate_athlete_data(num_athletes)
obesity_patients = generate_patient_data(num_obesity_patients, obesity_prob=1.0)
health_patients = generate_patient_data(num_health_patients, health_prob=1.0)

# Adjust Patient_ID for concatenation
athletes['Patient_ID'] += num_diseased_patients
obesity_patients['Patient_ID'] += num_diseased_patients + num_athletes
health_patients['Patient_ID'] += num_diseased_patients + num_athletes + num_obesity_patients

# Combine all datasets
all_patients = pd.concat([diseased_patients, athletes, obesity_patients, health_patients], ignore_index=True)

# Function to generate sample meal data
def generate_meal_data(num_meals):
    np.random.seed(0)

    meal_names = ['Meal A', 'Meal B', 'Meal C', 'Meal D', 'Meal E', 'Meal F']

    calorie_intake = np.random.normal(loc=500, scale=100, size=num_meals)
    protein = np.random.normal(loc=30, scale=5, size=num_meals)
    fat = np.random.normal(loc=20, scale=5, size=num_meals)
    carbs = np.random.normal(loc=50, scale=10, size=num_meals)

    meal_data = pd.DataFrame({
        'Meal_ID': np.arange(1, num_meals + 1),
        'Meal_Name': np.random.choice(meal_names, num_meals),
        'Calories_kcal': calorie_intake,
        'Protein_g': protein,
        'Fat_g': fat,
        'Carbs_g': carbs
    })

    return meal_data

# Generate meal data for the patients
num_meals = 10000
meal_data = generate_meal_data(num_meals)

# Display the first few rows of each dataframe to verify the data
print("All Patients Data Sample:")
print(all_patients.head())

print("\nMeal Data Sample:")
print(meal_data.head())

# Save the datasets to CSV files for further analysis or use
all_patients.to_csv('all_patients_data.csv', index=False)
meal_data.to_csv('meal_data.csv', index=False)

print("Datasets generated and saved to 'all_patients_data.csv' and 'meal_data.csv'")

All Patients Data Sample:
   Patient_ID   Height_cm  Weight_kg Activity_Level       Disease
0           1  187.640523  53.748600      Sedentary          None
1           2  174.001572  68.040694         Active          None
2           3  179.787380  91.010336      IT Worker          None
3           4  192.408932  60.225157      Sedentary  Hypertension
4           5  188.675580  77.572232       Moderate  Hypertension

Meal Data Sample:
   Meal_ID Meal_Name  Calories_kcal  Protein_g      Fat_g    Carbs_g
0        1    Meal E     676.405235  28.989415  21.650229  53.712321
1        2    Meal D     540.015721  25.833845  19.997600  53.047839
2        3    Meal A     597.873798  38.668001  24.090579  55.041246
3        4    Meal E     724.089320  30.953245  22.141069  51.352996
4        5    Meal F     686.755799  29.110948   7.480263  56.537588
Datasets generated and saved to 'all_patients_data.csv' and 'meal_data.csv'
