In [1]:
#import libraries
import numpy as np
import pandas as pd

In [2]:
# create a sample daily steps count for 30 days
np.random.seed(42)
days = np.arange(1, 31)
step = np.random.randint(3456, 12034, size=30)

data = pd.DataFrame({
    "day": days,
    "step": step
})

data

Unnamed: 0,day,step
0,1,10726
1,2,4316
2,3,8846
3,4,8647
4,5,9190
5,6,9721
6,7,3922
7,8,7882
8,9,9034
9,10,11778


In [3]:
#find the basic stats
mean_step = np.mean(step)
median_step = np.median(step)
min_step = np.min(step)
max_step = np.max(step)
std_step = np.std(step)

print("Average daily step:\n", mean_step)
print("Median dialy step:\n", median_step)
print("Lowest value step:\n", min_step)
print("Highest value step:\n", max_step)
print("Standard deviation step:\n", std_step)

Average daily step:
 7500.2
Median dialy step:
 7946.5
Lowest value step:
 3645
Highest value step:
 11778
Standard deviation step:
 2309.770715893679


In [4]:
#find goal analysis (days exceeding 10,000 steps)
goal_days = np.where(step >= 10000)[0] + 1

print("Goal days:", goal_days)
print("Count:", len(goal_days))

Goal days: [ 1 10 13 23]
Count: 4


In [5]:
#find weekly analysis (split into four weeks)
weeks = np.array_split(step, 4)
print(list(weeks))
week_total = [int(week.sum()) for week in weeks]
week_avg = [float(week.mean()) for week in weeks]

print("Total Step per week: ", week_total)
print("The average step per week: ", week_avg)

[array([10726,  4316,  8846,  8647,  9190,  9721,  3922,  7882],
      dtype=int32), array([ 9034, 11778,  5141,  4225, 10405,  5889,  8767,  8507],
      dtype=int32), array([ 9876,  4640,  8011,  6841,  9852,  6014, 11305], dtype=int32), array([5503, 6203, 3645, 6190, 6461, 8114, 5355], dtype=int32)]
Total Step per week:  [63250, 63746, 56539, 41471]
The average step per week:  [7906.25, 7968.25, 8077.0, 5924.428571428572]


In [6]:
#find 7 rolling days moving average
window = 7
moving_avg = np.convolve(step, np.ones(window)/window, mode="valid")

print("Rolling 7 days moving average: ", moving_avg)

Rolling 7 days moving average:  [7909.71428571 7503.42857143 8177.42857143 8596.28571429 8095.42857143
 7386.14285714 7483.85714286 7764.85714286 7891.28571429 7816.
 7544.28571429 7472.71428571 8013.57142857 7504.42857143 8070.57142857
 7677.28571429 8077.         7452.28571429 7675.57142857 7051.85714286
 6958.85714286 6474.42857143 6774.42857143 5924.42857143]


In [7]:
#find the anomaly detection (outliers)
lower_bond =  mean_step - 2 * std_step
upper_bond = mean_step + 2 * std_step
anomalies = np.where((step < lower_bond) | (step > upper_bond))[0] + 1
print("Anomaly detection:", anomalies)

Anomaly detection: []
