### import library


In [7]:
import pandas as pd
import numpy as np
from scipy.stats import f_oneway
import matplotlib.pyplot as plt
import seaborn as sns

### Load the dataset

In [8]:
data = pd.read_csv('Sleep_health_and_lifestyle_dataset.csv')

In [9]:
### check the dataset
data.head()


Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea


In [10]:
# checking the dataset for missing values
data.isnull().sum()

Person ID                  0
Gender                     0
Age                        0
Occupation                 0
Sleep Duration             0
Quality of Sleep           0
Physical Activity Level    0
Stress Level               0
BMI Category               0
Blood Pressure             0
Heart Rate                 0
Daily Steps                0
Sleep Disorder             0
dtype: int64

## Define the research question:
### Does the level of physical activity influence the quality of sleep among different occupations?

## Hypotheses:
### Null Hypothesis (H0): There is no difference in the quality of sleep based on the level of physical activity across different occupations.
### Alternative Hypothesis (H1): The quality of sleep varies with the level of physical activity among different occupations.


In [15]:
# Segment data by occupation and categorize physical activity levels
occupations = data['Occupation'].unique()
activity_levels = pd.cut(data['Physical Activity Level'], bins=[0, 33, 66, 100], labels=['Low', 'Medium', 'High'])

# Prepare data for ANOVA
anova_results = {}
for occupation in occupations:
    occupation_data = data[data['Occupation'] == occupation]
    grouped = occupation_data.groupby(activity_levels)['Quality of Sleep']
    
    # Collect groups for ANOVA
    sleep_data = [group for name, group in grouped if not group.empty]
    if len(sleep_data) > 1:
        f_stat, p_value = f_oneway(*sleep_data)
        anova_results[occupation] = (f_stat, p_value)

# Output results
print("ANOVA Results:")
print("{:<20} {:<15} {:<15}".format("Occupation", "F-statistic", "P-value"))
for occupation, results in anova_results.items():
    print("{:<20} {:<15.2f} {:<15.5f}".format(occupation, results[0], results[1]))


ANOVA Results:
Occupation           F-statistic     P-value        
Software Engineer    6.25            0.12961        
Doctor               55.78           0.00000        
Teacher              0.00            0.95700        
Nurse                8.10            0.00578        
Engineer             68.64           0.00000        
Accountant           18.28           0.00014        
Scientist            inf             0.00000        
Lawyer               3.26            0.07789        




## Conclusion
### The ANOVA analysis conducted on the dataset reveals significant insights into the relationship between physical activity levels and sleep quality across various occupations. The results are as follows:
### Software Engineers, Teachers, and Lawyers showed no significant differences in sleep quality based on physical activity levels. This suggests that for these professions, factors other than physical activity might be more influential on sleep quality.
### Doctors, Nurses, Engineers, Accountants, and Scientists demonstrated significant differences in sleep quality based on physical activity levels. For these groups, increasing physical activity could potentially enhance sleep quality.
### Notably, the infinite F-statistic for Scientists indicates a potential anomaly or lack of variance within some activity level groups, which could skew the results. This warrants further investigation.
### The findings underscore the importance of considering occupation-specific wellness programs that include physical activity components, particularly for those professions where a significant relationship was observed. However, it's crucial to consider other confounding factors and ensure data integrity and normal distribution for more definitive conclusions.