# Data Visualization: Sleep Quality

In [1]:
import pandas as pd
import numpy as np
import re

import requests as req
from bs4 import BeautifulSoup as bs

import json

In [2]:
pd.set_option('display.max_columns', None)  # display all columns
pd.set_option('display.max_rows', None)  # display all rows

In [3]:
# Download the CSV file containing the sleep quality data for further analysis.

sleep_quality = pd.read_csv("/Users/david/Desktop/IronHack/Projects/Data_visualization/data/raw/Sleep_health_and_lifestyle_dataset 2.csv")

## Initial data exploration

In [4]:
sleep_quality.head()

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea


In [5]:
sleep_quality.shape

(374, 13)

## Data cleaning and transformation

In [6]:
# Create two columns ("High Pressure" and "Low Pressure") with information from the "Blood Pressure" column, as it is not in the correct format due to the presence of the character "/"

sleep_quality[['High Pressure', 'Low Pressure']] = sleep_quality['Blood Pressure'].str.split('/', expand=True)

sleep_quality.drop('Blood Pressure', axis=1, inplace=True)


In [7]:
sleep_quality.head()

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,High Pressure,Low Pressure
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,77,4200,,126,83
1,2,Male,28,Doctor,6.2,6,60,8,Normal,75,10000,,125,80
2,3,Male,28,Doctor,6.2,6,60,8,Normal,75,10000,,125,80
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,85,3000,Sleep Apnea,140,90
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,85,3000,Sleep Apnea,140,90


In [8]:
# Convert the columns "High Pressure" and "Low Pressure" to integers for easier utilization in Tableau.

sleep_quality["High Pressure"] = sleep_quality["High Pressure"].astype(int)

sleep_quality["Low Pressure"] = sleep_quality["Low Pressure"].astype(int)


In [9]:
sleep_quality.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374 entries, 0 to 373
Data columns (total 14 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Person ID                374 non-null    int64  
 1   Gender                   374 non-null    object 
 2   Age                      374 non-null    int64  
 3   Occupation               374 non-null    object 
 4   Sleep Duration           374 non-null    float64
 5   Quality of Sleep         374 non-null    int64  
 6   Physical Activity Level  374 non-null    int64  
 7   Stress Level             374 non-null    int64  
 8   BMI Category             374 non-null    object 
 9   Heart Rate               374 non-null    int64  
 10  Daily Steps              374 non-null    int64  
 11  Sleep Disorder           374 non-null    object 
 12  High Pressure            374 non-null    int64  
 13  Low Pressure             374 non-null    int64  
dtypes: float64(1), int64(9), o

## Perform Data Analysis

In [10]:
# Make a correlation analysis of the data

sleep_quality.corr()

  sleep_quality.corr()


Unnamed: 0,Person ID,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,Heart Rate,Daily Steps,High Pressure,Low Pressure
Person ID,1.0,0.990516,0.296305,0.431612,0.149882,-0.394287,-0.225467,0.043844,0.611551,0.59067
Age,0.990516,1.0,0.344709,0.473734,0.178993,-0.422344,-0.225606,0.057973,0.605878,0.593839
Sleep Duration,0.296305,0.344709,1.0,0.883213,0.21236,-0.811023,-0.516455,-0.039533,-0.180406,-0.16657
Quality of Sleep,0.431612,0.473734,0.883213,1.0,0.192896,-0.898752,-0.659865,0.016791,-0.121632,-0.110151
Physical Activity Level,0.149882,0.178993,0.21236,0.192896,1.0,-0.034134,0.136971,0.772723,0.265416,0.382651
Stress Level,-0.394287,-0.422344,-0.811023,-0.898752,-0.034134,1.0,0.670026,0.186829,0.102818,0.091811
Heart Rate,-0.225467,-0.225606,-0.516455,-0.659865,0.136971,0.670026,1.0,-0.030309,0.294143,0.271092
Daily Steps,0.043844,0.057973,-0.039533,0.016791,0.772723,0.186829,-0.030309,1.0,0.103342,0.241986
High Pressure,0.611551,0.605878,-0.180406,-0.121632,0.265416,0.102818,0.294143,0.103342,1.0,0.972885
Low Pressure,0.59067,0.593839,-0.16657,-0.110151,0.382651,0.091811,0.271092,0.241986,0.972885,1.0


## Export the tables as .csv documents

In [11]:
# sleep_quality.to_csv('/Users/david/Desktop/IronHack/Projects/Data_visualization/data/cleaned/sleep_quality.csv')
