In [1]:
pip install pandas openpyxl scipy numpy

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.



In [5]:
import pandas as pd
import numpy as np
from scipy import stats

# Load data from Excel
file_path = 'Test 1 Data.xlsx' 

# Read data from the Excel
# Use specific ranges to load the data
sem_data = pd.read_excel(file_path, sheet_name='Sheet1', header=None, usecols='B', skiprows=4).dropna().values.flatten()
diet_exercise_data = pd.read_excel(file_path, sheet_name='Sheet1', header=None, usecols='I', skiprows=3).dropna().values.flatten()

# Check for empty arrays after conversion
if len(sem_data) == 0 or len(diet_exercise_data) == 0:
    raise ValueError("One of the data arrays is empty after cleaning. Check the input data.")

# 1. Mean value for each treatment group
mean_sem = np.mean(sem_data)
mean_diet_exercise = np.mean(diet_exercise_data)

# 2. Variance for each treatment group
variance_sem = np.var(sem_data, ddof=1)
variance_diet_exercise = np.var(diet_exercise_data, ddof=1)

# 3. Standard deviation for each treatment group
std_dev_sem = np.sqrt(variance_sem)
std_dev_diet_exercise = np.sqrt(variance_diet_exercise)

# Results
print(f"SEM 0.5 mg (OZEMPIC) - Mean: {mean_sem}, Variance: {variance_sem}, Standard Deviation: {std_dev_sem}")
print(f"Diet and Exercise only - Mean: {mean_diet_exercise}, Variance: {variance_diet_exercise}, Standard Deviation: {std_dev_diet_exercise}")

# 4. Define the null and alternative hypotheses
# H0: The means of the two groups are equal (no difference in pounds lost)
# H1: The means of the two groups are not equal (difference in pounds lost)

# 5. Perform two-sample t-test to compare the means
t_statistic, p_value = stats.ttest_ind(sem_data, diet_exercise_data, equal_var=False)

# Print test statistic and p-value
print(f"T-Statistic: {t_statistic}, P-Value: {p_value}")

# 6. State results
alpha = 0.05
print(f"Alpha level: {alpha}")
if p_value < alpha:
    print("Reject null hypothesis: There is a significant difference in pounds lost between the two groups.")
else:
    print("Fail to reject null hypothesis: There is not enough evidence to suggest a significant difference in pounds lost between the two groups.")

# 7. Additional Analysis: Calculate 95% Confidence
confidence_interval_sem = stats.norm.interval(0.95, loc=mean_sem, scale=std_dev_sem/np.sqrt(len(sem_data)))
confidence_interval_diet_exercise = stats.norm.interval(0.95, loc=mean_diet_exercise, scale=std_dev_diet_exercise/np.sqrt(len(diet_exercise_data)))

print(f"95% Confidence Interval for SEM 0.5 mg (OZEMPIC): {confidence_interval_sem}")
print(f"95% Confidence Interval for Diet and Exercise only: {confidence_interval_diet_exercise}")


SEM 0.5 mg (OZEMPIC) - Mean: 4.688888888888888, Variance: 4.504575163398692, Standard Deviation: 2.1223984459565295
Diet and Exercise only - Mean: 6.85, Variance: 10.593000000000002, Standard Deviation: 3.254688925227725
T-Statistic: -2.664837221010822, P-Value: 0.010890261350631628
Alpha level: 0.05
Reject null hypothesis: There is a significant difference in pounds lost between the two groups.
95% Confidence Interval for SEM 0.5 mg (OZEMPIC): (np.float64(3.708408847873899), np.float64(5.669368929903877))
95% Confidence Interval for Diet and Exercise only: (np.float64(5.598960842898794), np.float64(8.101039157101205))
