In [6]:
import numpy as np
import pandas as pd

In [7]:

def generate_step_count(df, num_days=30):
    np.random.seed(42)  # Ensuring reproducibility
    time_series_data = []
    today = pd.Timestamp.today()  # Avoid repeated function calls

    for _, row in df.iterrows():
        patient_id = row["patient_id"]

        for day in range(1, num_days + 1):
            # Baseline Steps by Age
            if row['age'] < 18:
                base_steps = np.random.normal(12000, 2000)
            elif row['age'] < 40:
                base_steps = np.random.normal(10000, 1500)
            elif row['age'] < 60:
                base_steps = np.random.normal(8000, 1200)
            else:
                base_steps = np.random.normal(6000, 1000)

            # Adjust for BMI
            if row['bmi'] >= 35:
                base_steps *= np.random.uniform(0.5, 0.7)
            elif row['bmi'] >= 30:
                base_steps *= np.random.uniform(0.7, 0.85)
            elif row['bmi'] >= 25:
                base_steps *= np.random.uniform(0.85, 0.95)

            # Adjust for Hypertension
            if row['hypertension'] == 1:
                base_steps *= np.random.uniform(0.75, 0.9)

            # Adjust for Stroke
            if row['stroke'] == 1:
                base_steps *= np.random.uniform(0.3, 0.5)

            # Daily Variability
            base_steps *= np.random.uniform(0.9, 1.1)

            # Ensure step count is within realistic bounds
            step_count = round(max(500, min(base_steps, 14000)))

            # Append to list
            time_series_data.append({
                "patient_id": patient_id,
                "date": today - pd.DateOffset(days=day),
                "step_count": step_count
            })

    return pd.DataFrame(time_series_data)

# Example usage
health_data = pd.DataFrame({
    "patient_id": [1, 2, 3],
    "age": [25, 45, 65],
    "bmi": [22, 32, 37],
    "hypertension": [0, 1, 1],
    "stroke": [0, 0, 1]
})

# Generate time-series step count data
time_series_df = generate_step_count(health_data, num_days=30)

# Save the time-series data separately
time_series_df.to_csv("time_series_wearable_data.csv", index=False)

# Preview the saved data
print("Time-series data saved successfully!")
print(time_series_df.head())


Time-series data saved successfully!
   patient_id                       date  step_count
0           1 2025-01-10 06:35:14.141115       11244
1           1 2025-01-09 06:35:14.141115        9986
2           1 2025-01-08 06:35:14.141115        8796
3           1 2025-01-07 06:35:14.141115       10355
4           1 2025-01-06 06:35:14.141115       11183


In [8]:
# Generate time-series step count data (for 30 days)
time_series_df = generate_step_count(health_data, num_days=30)

# Save the time-series data separately
time_series_df.to_csv("time_series_wearable_data.csv", index=False)

# Preview the saved data
print("Time-series data saved successfully!")
print(time_series_df.head())

Time-series data saved successfully!
   patient_id                       date  step_count
0           1 2025-01-10 06:35:14.174427       11244
1           1 2025-01-09 06:35:14.174427        9986
2           1 2025-01-08 06:35:14.174427        8796
3           1 2025-01-07 06:35:14.174427       10355
4           1 2025-01-06 06:35:14.174427       11183


In [None]:
sns.histplot(df['bmi'])

In [None]:
# Create a histogram for the columns with BMI NAN
# Create Subplots
fig, axs = plt.subplots(2, 3, figsize=(15, 9))

# Add a title for the histogram
fig.suptitle('Countplot for the healthcare dataset categorical variable', fontsize=16)

# Plot the histogram
sns.histplot(data=df,kde=True, x="bmi", color="skyblue", ax=axs[0, 0])
sns.histplot(data=df,kde=True, x="avg_glucose_level", color="olive", ax=axs[0, 1])
sns.histplot(data=df,kde=True, x="hypertension", color="gold", ax=axs[0, 2])
sns.histplot(data=df,kde=True, x="heart_disease", color="teal", ax=axs[1, 0])
sns.histplot(data=df,kde=True, x="stroke", color="brown", ax=axs[1, 1])
sns.histplot(data=df,kde=True, x="age", color="orange", ax=axs[1, 2])
#plt.xticks(rotation=45)
# Adjust Layout
plt.tight_layout()
plt.show()

In [None]:
# Create a histogram for the categorical variable
# Create Subplots
fig, axs = plt.subplots(2, 3, figsize=(15, 9))

# Add a title for the histogram
fig.suptitle('Countplot for the healthcare dataset categorical variable', fontsize=16)

# Plot the histogram
sns.countplot(data=df, x="gender", color="skyblue", ax=axs[0, 0])
sns.countplot(data=df, x="ever_married", color="olive", ax=axs[0, 1])
sns.countplot(data=df, x="work_type", color="gold", ax=axs[0, 2])
sns.countplot(data=df, x="Residence_type", color="teal", ax=axs[1, 0])
sns.countplot(data=df, x="smoking_status", color="brown", ax=axs[1, 1])
sns.histplot(data=df, x="bmi", color="orange", ax=axs[1, 2])
#plt.xticks(rotation=45)
# Adjust Layout
plt.tight_layout()
plt.show()

In [None]:
# Create a histogram for the categorical variable
# Create Subplots
fig, axs = plt.subplots(2, 3, figsize=(15, 9))

# Add a title for the histogram
fig.suptitle('Countplot for the healthcare dataset categorical variable', fontsize=16)

# Plot the histogram
sns.countplot(data=df, x="gender", hue="stroke", ax=axs[0, 0])
sns.countplot(data=df, x="ever_married", hue="stroke", ax=axs[0, 1])
sns.countplot(data=df, x="work_type", hue="stroke", ax=axs[0, 2])
sns.countplot(data=df, x="Residence_type", hue="stroke", ax=axs[1, 0])
sns.countplot(data=df, x="smoking_status", hue="stroke", ax=axs[1, 1])
sns.histplot(data=df, x="bmi", color="orange", ax=axs[1, 2])
#plt.xticks(rotation=45)
# Adjust Layout
plt.tight_layout()
plt.show()

In [None]:
### Dealing with outliers
- Boxplot

In [None]:
sns.boxplot(data=df,x='stroke',y='age')

In [None]:
### Relationship between age and stroke

In [None]:
df.loc[19] 

In [None]:
# Create a histogram for the columns with BMI NAN
# Create Subplots
fig, axs = plt.subplots(2, 3, figsize=(15, 9))

# Add a title for the histogram
fig.suptitle('Countplot for the healthcare dataset categorical variable', fontsize=16)

# Plot the histogram
sns.histplot(data=null_data,kde=True, x="bmi", color="skyblue", ax=axs[0, 0])
sns.histplot(data=null_data,kde=True, x="avg_glucose_level", color="olive", ax=axs[0, 1])
sns.histplot(data=null_data,kde=True, x="hypertension", color="gold", ax=axs[0, 2])
sns.histplot(data=null_data,kde=True, x="heart_disease", color="teal", ax=axs[1, 0])
sns.histplot(data=null_data,kde=True, x="stroke", color="brown", ax=axs[1, 1])
sns.histplot(data=null_data,kde=True, x="age", color="orange", ax=axs[1, 2])
#plt.xticks(rotation=45)
# Adjust Layout
plt.tight_layout()
plt.show()