https://www.kaggle.com/datasets/uom190346a/sleep-health-and-lifestyle-dataset/data


In [3]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("uom190346a/sleep-health-and-lifestyle-dataset")



Downloading from https://www.kaggle.com/api/v1/datasets/download/uom190346a/sleep-health-and-lifestyle-dataset?dataset_version_number=2...


100%|██████████| 2.54k/2.54k [00:00<00:00, 3.53MB/s]

Extracting files...





In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, LabelEncoder


file_path = '/root/.cache/kagglehub/datasets/uom190346a/sleep-health-and-lifestyle-dataset/versions/2/Sleep_health_and_lifestyle_dataset.csv'
df = pd.read_csv(file_path)

In [5]:
df.head()

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea


In [6]:
df.drop('Blood Pressure', axis=1, inplace=True)
df.drop('Sleep Disorder', axis=1, inplace=True)
df.drop('Heart Rate', axis=1, inplace=True)
df.drop('Person ID', axis=1, inplace=True)

In [7]:
unique_genders = df['Gender'].unique()
unique_occupations = df['Occupation'].unique()
unique_bmi_categories = df['BMI Category'].unique()

In [8]:
encoder_gender = LabelEncoder().fit(unique_genders)
encoder_occupation = LabelEncoder().fit(unique_occupations)
encoder_bmi = LabelEncoder().fit(unique_bmi_categories)

In [9]:
df['Gender'] = encoder_gender.transform(df['Gender'])
df['Occupation'] = encoder_occupation.transform(df['Occupation'])
df['BMI Category'] = encoder_bmi.transform(df['BMI Category'])

In [10]:
X = df.drop("Quality of Sleep", axis=1)
y = df["Quality of Sleep"]

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [12]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [13]:
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

In [14]:
y_pred = model.predict(X_test)

In [15]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [16]:
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 0.015258666666666665
R-squared: 0.9898856940843742


In [17]:
random_data = {
    "Gender": ["Male"],  # "Male" lub "Female"
    "Age": [21],         # Przykład wieku
    "Occupation": ["Doctor"],  # Dowolny zawód (musi być zakodowany tak jak w danych treningowych)
    "Sleep Duration": [5],   # Czas snu w godzinach
    "Physical Activity Level": [20],  # Dowolna wartość aktywności fizycznej
    "Stress Level": [2],       # Poziom stresu (skala 1-10)
    "BMI Category": ["Normal"],  # Kategoria BMI (np. "Normal", "Overweight")
    "Daily Steps": [2000]       # Liczba kroków dziennie
}

In [18]:
random_df = pd.DataFrame(random_data)

In [19]:
random_df['Gender'] = encoder_gender.transform(random_df['Gender'])
random_df['Occupation'] = encoder_occupation.transform(random_df['Occupation'])
random_df['BMI Category'] = encoder_bmi.transform(random_df['BMI Category'])


In [20]:
random_scaled = scaler.transform(random_df)

# Przewiduj jakość snu
predicted_quality = model.predict(random_scaled)

print(f"Przewidywana jakość snu (1-10): {predicted_quality[0]:.2f}")

Przewidywana jakość snu (1-10): 6.28


In [21]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [22]:
import joblib
import os

# Ścieżka do katalogu na Dysku Google
save_path = '/content/drive/My Drive/model_and_scaler'
os.makedirs(save_path, exist_ok=True)

# Zapisz model
model_path = os.path.join(save_path, 'sleep_quality_model.joblib')
joblib.dump(model, model_path)

# Zapisz scaler
scaler_path = os.path.join(save_path, 'scaler.joblib')
joblib.dump(scaler, scaler_path)

print(f"Model zapisany w: {model_path}")
print(f"Scaler zapisany w: {scaler_path}")


Model zapisany w: /content/drive/My Drive/model_and_scaler/sleep_quality_model.joblib
Scaler zapisany w: /content/drive/My Drive/model_and_scaler/scaler.joblib
