Source: https://www.kaggle.com/datasets/monicahjones/steps-tracker-dataset

# Data fetching and loading

In [None]:
import kagglehub
import pandas as pd

# Download latest version
path = kagglehub.dataset_download("monicahjones/steps-tracker-dataset")

print("Path to dataset files:", path)

In [None]:
from kagglehub import KaggleDatasetAdapter
from pandas import DataFrame

df: DataFrame = kagglehub.dataset_load(
    KaggleDatasetAdapter.PANDAS,
    "monicahjones/steps-tracker-dataset",
    "steps_tracker_dataset.csv",
)

df.date = pd.to_datetime(df.date, format="%d/%m/%Y")
df = df.sort_values(by="date")
df.reset_index(drop=True, inplace=True)

df.head(10)

In [None]:
df.describe()

In [None]:
df['mood'].unique()

# Data preprocessing

In [None]:
df = df.groupby(by='date').median(numeric_only=True).reset_index()

df.head()

In [None]:
df.describe()

# Data overview

## Single

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots()

ax.set_title('Steps every day')
ax.plot(df['date'], df['steps'])
plt.show()

In [None]:
_, ax = plt.subplots()

ax.set_title('Distance every day')
ax.plot(df['date'], df['distance_km'])
plt.show()

In [None]:
_, ax = plt.subplots()

ax.set_title('Calories every day')
ax.plot(df['date'], df['calories_burned'])
plt.show()

In [None]:
_, ax = plt.subplots()

ax.set_title('Drink every day')
ax.plot(df['date'], df['water_intake_liters'])
plt.show()

## Correlations

In [None]:
_, ax = plt.subplots()

print(df['steps'].corr(df['calories_burned']))

ax.scatter(df['steps'], df['calories_burned'])
plt.show()

In [None]:
_, ax = plt.subplots()

ax.scatter(df['steps'], df['distance_km'])
plt.show()

In [None]:
_, ax = plt.subplots()

print(df['steps'].corr(df['water_intake_liters']))

ax.scatter(df['steps'], df['water_intake_liters'])
plt.show()

In [None]:
_, ax = plt.subplots()

print(df['active_minutes'].corr(df['water_intake_liters']))

ax.scatter(df['active_minutes'], df['water_intake_liters'])
plt.show()

In [None]:
df.head()