In [9]:
## Task 1: Synthetic Dataset Generation
##This task involves generating a synthetic dataset to simulate the study of the effects of different exercise regimens on blood pressure. We will use NumPy for random number generation and create a dataset of 100 participants. Each participant is assigned to one of three exercise groups (jogging, weightlifting, or yoga), and their blood pressure is measured before and after a 6-week exercise program.


In [20]:
import numpy as np
import pandas as pd

# Set random seed for reproducibility
np.random.seed(42)

# Number of participants
num_participants = 100

# Generate participant IDs
participant_ids = np.arange(1, num_participants + 1)

# Define exercise groups
exercise_groups_list = ['jogging', 'weightlifting', 'yoga']

# Randomly assign participants to exercise groups
exercise_groups = np.random.choice(exercise_groups_list, size=num_participants)

# Generate pre-exercise systolic blood pressure values
pre_exercise_bp = np.random.normal(120, 15, num_participants).astype(int)

# Initialize a list to store post-exercise systolic blood pressure values
post_exercise_bp = []

# Calculate post-exercise systolic blood pressure for each participant
for i, group in enumerate(exercise_groups):
    if group == 'jogging':
        reduction = np.random.randint(5, 16)
    elif group == 'weightlifting':
        reduction = np.random.randint(0, 11)
    elif group == 'yoga':
        reduction = np.random.randint(10, 21)
    
    # Calculate post-exercise blood pressure and add it to the list
    post_exercise_bp.append(pre_exercise_bp[i] - reduction)

# Create a DataFrame
data = {
    'Participant_ID': participant_ids,
    'Exercise_Group': exercise_groups,
    'Pre_Exercise_BP': pre_exercise_bp,
    'Post_Exercise_BP': post_exercise_bp
}
df = pd.DataFrame(data)

# Set Participant_ID as the index
df.set_index('Participant_ID', inplace=True)

# Print the DataFrame
print(df)

# Save to CSV
df.to_csv('exercise_data.csv')

print("Synthetic dataset 'exercise_data.csv' created successfully.")


               Exercise_Group  Pre_Exercise_BP  Post_Exercise_BP
Participant_ID                                                  
1                        yoga              128               117
2                     jogging              133               128
3                        yoga              133               119
4                        yoga              131               117
5                     jogging              116               101
...                       ...              ...               ...
96                    jogging              128               122
97                    jogging              135               120
98                       yoga              127               108
99                    jogging              140               134
100                   jogging              158               144

[100 rows x 3 columns]
Synthetic dataset 'exercise_data.csv' created successfully.


In [None]:
## Task 2: Highest Pre-Exercise Blood Pressure by Group
##This task involves identifying the participant with the highest pre-exercise systolic blood pressure in each exercise group. We will use pandas to read the CSV file and perform the necessary grouping and aggregation.


In [10]:
# Read the dataset
df = pd.read_csv('exercise_data.csv')

# Find the participant with the highest pre-exercise blood pressure in each exercise group
highest_pre_bp = df.loc[df.groupby('Exercise_Group')['Pre_Exercise_BP'].idxmax()]

print("Participants with the highest pre-exercise systolic blood pressure in each exercise group:")
print(highest_pre_bp)


Participants with the highest pre-exercise systolic blood pressure in each exercise group:
    Participant_ID Exercise_Group  Pre_Exercise_BP  Post_Exercise_BP
20              21        jogging              170               158
82              83  weightlifting              145               145
55              56           yoga              141               126


In [None]:
## Task 3: Extract the 5 Participants with Highest Blood Pressure
##This task involves sorting the participants based on their pre-exercise blood pressure and displaying the full records of the top 5 participants.


In [11]:
# Sort the participants based on pre-exercise blood pressure
sorted_df = df.sort_values(by='Pre_Exercise_BP', ascending=False)

# Extract the top 5 participants with the highest pre-exercise blood pressure
top_5_pre_bp = sorted_df.head(5)

print("Top 5 participants with the highest pre-exercise systolic blood pressure:")
print(top_5_pre_bp)


Top 5 participants with the highest pre-exercise systolic blood pressure:
    Participant_ID Exercise_Group  Pre_Exercise_BP  Post_Exercise_BP
20              21        jogging              170               158
99             100        jogging              158               144
82              83  weightlifting              145               145
18              19  weightlifting              142               132
55              56           yoga              141               126


In [None]:
## Task 4: Monthly Blood Pressure Changes
##Assuming blood pressure measurements were taken monthly, this task computes the average change in blood pressure for each exercise group. Note that the original study duration is 6 weeks, which we approximate as 1.5 months.


In [13]:
# Calculate the change in blood pressure
df['BP_Change'] = df['Pre_Exercise_BP'] - df['Post_Exercise_BP']

# Group by exercise group and calculate the average change
monthly_change = df.groupby('Exercise_Group')['BP_Change'].mean() / 1.5  # Assuming 1.5 months

print("Average monthly change in blood pressure for each exercise group:")
print(monthly_change)


Average monthly change in blood pressure for each exercise group:
Exercise_Group
jogging          6.343434
weightlifting    2.555556
yoga             9.440860
Name: BP_Change, dtype: float64


In [19]:
## Task 5: Compare Pre- and Post-Exercise Blood Pressure for Top 5
##This task involves comparing the pre- and post-exercise blood pressure for the top 5 participants identified in Task 3.


In [14]:
# Extract the post-exercise blood pressure for the top 5 participants
top_5_comparison = df.loc[top_5_pre_bp.index, ['Pre_Exercise_BP', 'Post_Exercise_BP']]
top_5_comparison['BP_Difference'] = top_5_comparison['Pre_Exercise_BP'] - top_5_comparison['Post_Exercise_BP']

print("Comparison of pre- and post-exercise blood pressure for the top 5 participants:")
print(top_5_comparison)


Comparison of pre- and post-exercise blood pressure for the top 5 participants:
    Pre_Exercise_BP  Post_Exercise_BP  BP_Difference
20              170               158             12
99              158               144             14
82              145               145              0
18              142               132             10
55              141               126             15


In [18]:
## Task 6: Total Blood Pressure Reduction for Each Exercise Group
##This task computes the measures of central tendency (mean, mode, standard deviation) for blood pressure reduction in each exercise group.


In [15]:
# Calculate the mean and standard deviation of blood pressure reduction for each exercise group
total_reduction = df.groupby('Exercise_Group')['BP_Change'].agg(['mean', 'std'])

print("Mean and standard deviation of blood pressure reduction for each exercise group:")
print(total_reduction)

# Calculate the mode separately due to its handling of multiple modes
modes = df.groupby('Exercise_Group')['BP_Change'].apply(lambda x: x.mode().values)

print("Mode of blood pressure reduction for each exercise group:")
print(modes)



Mean and standard deviation of blood pressure reduction for each exercise group:
                     mean       std
Exercise_Group                     
jogging          9.515152  3.308334
weightlifting    3.833333  2.932576
yoga            14.161290  3.001075
Mode of blood pressure reduction for each exercise group:
Exercise_Group
jogging          [5, 6, 7, 11, 12, 15]
weightlifting                      [0]
yoga                              [12]
Name: BP_Change, dtype: object


In [17]:
# Explanation of Computational and Statistical Aspects

# 1. Synthetic Dataset Generation:
#    Computational Aspect: NumPy is used for random number generation and to create synthetic data.
#    Systolic blood pressure values are normally distributed around 120 with a standard deviation of 15.
#    Statistical Aspect: The reduction in blood pressure after exercise varies based on the exercise group, simulating realistic variations.

# 2. Highest Pre-Exercise Blood Pressure by Group:
#    Computational Aspect: Grouping and aggregation functions in pandas are used to identify the participants with the highest pre-exercise blood pressure in each group.
#    Statistical Aspect: This helps in identifying outliers and comparing maximum values across different exercise groups.

# 3. Extract the 5 Participants with Highest Blood Pressure:
#    Computational Aspect: Sorting and slicing operations in pandas are used to extract the top 5 participants based on pre-exercise blood pressure.
#    Statistical Aspect: This focuses on the participants with the highest initial blood pressure to analyze their data in more detail.

# 4. Monthly Blood Pressure Changes:
#    Computational Aspect: Assumes measurements were taken monthly and calculates the average change in blood pressure by dividing the total change by 1.5 (since 6 weeks is approximately 1.5 months).
#    Statistical Aspect: Standardizes the blood pressure change over time for better comparison across groups.

# 5. Compare Pre- and Post-Exercise Blood Pressure for Top 5:
#    Computational Aspect: Compares the pre- and post-exercise blood pressure for the top 5 participants identified in Task 3.
#    Statistical Aspect: Evaluates the effectiveness of the exercise regimen on those with the highest pre-exercise blood pressure.

# 6. Total Blood Pressure Reduction for Each Exercise Group:
#    Computational Aspect: Uses pandas' aggregation functions to compute measures of central tendency (mean, standard deviation) for blood pressure reduction in each exercise group.
#    Statistical Aspect: The mode is calculated separately to handle cases where multiple modes may exist, providing a comprehensive view of blood pressure reduction.