In [None]:
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
pio.templates.default = "plotly_white"
import plotly.express as px



In [None]:
data = pd.read_csv("Apple-Fitness-Data.csv")
data.head()

Unnamed: 0,Date,Time,Step Count,Distance,Energy Burned,Flights Climbed,Walking Double Support Percentage,Walking Speed
0,2023-03-21,16:01:23,46,0.02543,14.62,3,0.304,3.06
1,2023-03-21,16:18:37,645,0.40041,14.722,3,0.309,3.852
2,2023-03-21,16:31:38,14,0.00996,14.603,4,0.278,3.996
3,2023-03-21,16:45:37,13,0.00901,14.811,3,0.278,5.04
4,2023-03-21,17:10:30,17,0.00904,15.153,3,0.281,5.184


In [None]:
data.shape

(149, 8)

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 8 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   Date                               149 non-null    object 
 1   Time                               149 non-null    object 
 2   Step Count                         149 non-null    int64  
 3   Distance                           149 non-null    float64
 4   Energy Burned                      149 non-null    float64
 5   Flights Climbed                    149 non-null    int64  
 6   Walking Double Support Percentage  149 non-null    float64
 7   Walking Speed                      149 non-null    float64
dtypes: float64(4), int64(2), object(2)
memory usage: 9.4+ KB


In [None]:
data.describe()

Unnamed: 0,Step Count,Distance,Energy Burned,Flights Climbed,Walking Double Support Percentage,Walking Speed
count,149.0,149.0,149.0,149.0,149.0,149.0
mean,224.959732,0.147963,14.497933,2.463087,0.29847,3.973772
std,277.91939,0.181177,9.607919,1.57052,0.022408,1.049368
min,1.0,0.00046,0.171,1.0,0.261,1.548
25%,22.0,0.01402,12.621,1.0,0.279,3.312
50%,89.0,0.0629,14.422,3.0,0.295,4.14
75%,365.0,0.219,15.441,3.0,0.317,4.896
max,1082.0,0.70118,60.514,13.0,0.354,5.58


In [None]:
data.isnull().sum()

Date                                 0
Time                                 0
Step Count                           0
Distance                             0
Energy Burned                        0
Flights Climbed                      0
Walking Double Support Percentage    0
Walking Speed                        0
dtype: int64

In [None]:
len(data["Date"].unique())

12

In [None]:
len(data["Time"].unique())

148

In [None]:
# Step Count Over Time
fig1 = px.line(data, x="Time",
               y="Step Count",
               title="Step Count Over Time")
fig1.show()

In [None]:
# Distance Covered Over Time
fig2 = px.line(data, x="Time",
               y="Distance",
               title="Distance Covered Over Time")
fig2.show()

In [None]:
# Energy Burned Over Time
fig3 = px.line(data, x="Time",
               y="Energy Burned",
               title="Energy Burned Over Time")
fig3.show()

In [None]:
# Walking Speed Over Time
fig4 = px.line(data, x="Time",
               y="Walking Speed",
               title="Walking Speed Over Time")
fig4.show()

In [None]:
# Calculate Average Step Count per Day
average_step_count_per_day = data.groupby("Date")["Step Count"].mean().reset_index()

fig5 = px.bar(average_step_count_per_day, x="Date",
              y="Step Count",
              title="Average Step Count per Day")
fig5.update_xaxes(type='category')
fig5.show()

In [None]:
# Calculate Walking Efficiency
data["Walking Efficiency"] = data["Distance"] / data["Step Count"]

fig6 = px.line(data, x="Time",
               y="Walking Efficiency",
               title="Walking Efficiency Over Time")
fig6.show()

In [None]:
# Create Time Intervals
time_intervals = pd.cut(pd.to_datetime(data["Time"]).dt.hour,
                        bins=[0, 12, 18, 24],
                        labels=["Morning", "Afternoon", "Evening"],
                        right=False)

data["Time Interval"] = time_intervals

# Variations in Step Count and Walking Speed by Time Interval
fig7 = px.scatter(data, x="Step Count",
                  y="Walking Speed",
                  color="Time Interval",
                  title="Step Count and Walking Speed Variations by Time Interval",
                  trendline='ols')
fig7.show()


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.



In [None]:
# Convert the 'Date' column from object to datetime
data['Date'] = pd.to_datetime(data['Date'])

In [None]:
data1=data.drop("Time", axis=1)

In [None]:
# Ensure other relevant columns are numeric (excluding 'Date')
numeric_columns = ["Step Count", "Distance", "Energy Burned", "Flights Climbed",
                   "Walking Double Support Percentage", "Walking Speed"]
for col in numeric_columns:
    data1[col] = pd.to_numeric(data1[col], errors='coerce')


In [None]:
data1=data.drop(["Time Interval","Time"], axis=1)

In [None]:
data1

Unnamed: 0,Date,Step Count,Distance,Energy Burned,Flights Climbed,Walking Double Support Percentage,Walking Speed,Walking Efficiency
0,2023-03-21,46,0.02543,14.620,3,0.304,3.060,0.000553
1,2023-03-21,645,0.40041,14.722,3,0.309,3.852,0.000621
2,2023-03-21,14,0.00996,14.603,4,0.278,3.996,0.000711
3,2023-03-21,13,0.00901,14.811,3,0.278,5.040,0.000693
4,2023-03-21,17,0.00904,15.153,3,0.281,5.184,0.000532
...,...,...,...,...,...,...,...,...
144,2023-04-01,87,0.05872,13.162,3,0.298,5.364,0.000675
145,2023-04-01,37,0.02039,4.135,1,0.295,5.400,0.000551
146,2023-04-01,969,0.65379,31.416,3,0.274,5.292,0.000675
147,2023-04-01,640,0.42356,13.250,6,0.282,4.860,0.000662


In [None]:
# Reshape data for treemap
daily_avg_metrics = data1.groupby("Date").mean().reset_index()

daily_avg_metrics_melted = daily_avg_metrics.melt(id_vars=["Date"],
                                                  value_vars=["Step Count", "Distance",
                                                              "Energy Burned", "Flights Climbed",
                                                              "Walking Double Support Percentage",
                                                              "Walking Speed"])

# Treemap of Daily Averages for Different Metrics Over Several Weeks
fig = px.treemap(daily_avg_metrics_melted,
                 path=["variable"],
                 values="value",
                 color="variable",
                 hover_data=["value"],
                 title="Daily Averages for Different Metrics")
fig.show()

In [None]:
# Select metrics excluding Step Count
metrics_to_visualize = ["Distance", "Energy Burned", "Flights Climbed",
                        "Walking Double Support Percentage", "Walking Speed"]

# Reshape data for treemap
daily_avg_metrics_melted = daily_avg_metrics.melt(id_vars=["Date"], value_vars=metrics_to_visualize)

fig = px.treemap(daily_avg_metrics_melted,
                 path=["variable"],
                 values="value",
                 color="variable",
                 hover_data=["value"],
                 title="Daily Averages for Different Metrics (Excluding Step Count)")
fig.show()