In [34]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

data = pd.read_csv("/content/realtime_data_12 - Sheet1.csv")
print(data.head())

        ID ActivityDate  TotalSteps  TotalDistance  TrackerDistance  \
0  Pradeep    9/26/2022       17800          13.56            13.56   
1  Pradeep    9/27/2022       16200          12.34            12.34   
2  Pradeep    9/28/2022       16345          12.45            12.45   
3  Pradeep    9/29/2022       16500          12.57            12.57   
4  Pradeep    9/30/2022       17003          12.96            12.96   

   LoggedActivitiesDistance  VeryActiveDistance  ModeratelyActiveDistance  \
0                         0                3.51                      2.20   
1                         0                2.34                      1.34   
2                         0                2.45                      1.88   
3                         0                2.57                      1.84   
4                         0                2.96                      1.34   

   LightActiveDistance  SedentaryActiveDistance  VeryActiveMinutes  \
0                 7.85                  

In [35]:
#here we can see the total no. of rows and columns in the dataset
rows = data.shape[0];
columns = data.shape[1];
print(rows)
print(columns)

63
15


In [36]:
#checking if there is a null entry in the dataset
print(data.isnull().sum())

ID                          0
ActivityDate                0
TotalSteps                  0
TotalDistance               0
TrackerDistance             0
LoggedActivitiesDistance    0
VeryActiveDistance          0
ModeratelyActiveDistance    0
LightActiveDistance         0
SedentaryActiveDistance     0
VeryActiveMinutes           0
FairlyActiveMinutes         0
LightlyActiveMinutes        0
SedantaryMinutes            0
Calories                    0
dtype: int64


In [37]:
#let us get some information about the columns
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63 entries, 0 to 62
Data columns (total 15 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   ID                        63 non-null     object 
 1   ActivityDate              63 non-null     object 
 2   TotalSteps                63 non-null     int64  
 3   TotalDistance             63 non-null     float64
 4   TrackerDistance           63 non-null     float64
 5   LoggedActivitiesDistance  63 non-null     int64  
 6   VeryActiveDistance        63 non-null     float64
 7   ModeratelyActiveDistance  63 non-null     float64
 8   LightActiveDistance       63 non-null     float64
 9   SedentaryActiveDistance   63 non-null     int64  
 10  VeryActiveMinutes         63 non-null     int64  
 11  FairlyActiveMinutes       63 non-null     int64  
 12  LightlyActiveMinutes      63 non-null     int64  
 13  SedantaryMinutes          63 non-null     int64  
 14  Calories    

In [38]:
# Changing datatype of ActivityDate
data["ActivityDate"] = pd.to_datetime(data["ActivityDate"],format="%m/%d/%Y")
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63 entries, 0 to 62
Data columns (total 15 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   ID                        63 non-null     object        
 1   ActivityDate              63 non-null     datetime64[ns]
 2   TotalSteps                63 non-null     int64         
 3   TotalDistance             63 non-null     float64       
 4   TrackerDistance           63 non-null     float64       
 5   LoggedActivitiesDistance  63 non-null     int64         
 6   VeryActiveDistance        63 non-null     float64       
 7   ModeratelyActiveDistance  63 non-null     float64       
 8   LightActiveDistance       63 non-null     float64       
 9   SedentaryActiveDistance   63 non-null     int64         
 10  VeryActiveMinutes         63 non-null     int64         
 11  FairlyActiveMinutes       63 non-null     int64         
 12  LightlyActiveMinutes    

In [39]:
#adding an extra coulumn total minutes
data["TotalMinutes"] = data["VeryActiveMinutes"] + data["FairlyActiveMinutes"] + data["LightlyActiveMinutes"] + data["SedantaryMinutes"]
print(data["TotalMinutes"].sample(5))

2     1000
32    1000
39    1000
18    1000
42    1000
Name: TotalMinutes, dtype: int64


In [40]:
#descriptive stats about the dataset
print(data.describe())

         TotalSteps  TotalDistance  TrackerDistance  LoggedActivitiesDistance  \
count     63.000000      63.000000        63.000000                      63.0   
mean   16646.317460      12.684921        12.684921                       0.0   
std     2031.196095       1.548158         1.548158                       0.0   
min    10032.000000       7.640000         7.640000                       0.0   
25%    16345.000000      12.450000        12.450000                       0.0   
50%    16989.000000      12.950000        12.950000                       0.0   
75%    17653.500000      13.455000        13.455000                       0.0   
max    19600.000000      14.940000        14.940000                       0.0   

       VeryActiveDistance  ModeratelyActiveDistance  LightActiveDistance  \
count           63.000000                 63.000000            63.000000   
mean             2.984603                  1.713016             7.987302   
std              0.857097                 

**Let's analyze the smartwatch data**

In [41]:
#relation between calories burnt and total steps in a day
figure = px.scatter(data_frame = data, x="Calories",
                    y="TotalSteps", size="VeryActiveMinutes", 
                    trendline="ols", 
                    title="Relationship between Calories & Total Steps")
figure.show()

**You can see that there is a linear relationship between the total number of steps and the number of calories burned in a day.**

In [42]:
#Now we gonna look at Average Total number of active minutes in a day
label = ["Very Active Minutes", "Fairly Active Minutes", 
         "Lightly Active Minutes", "Inactive Minutes"]
counts = data[["VeryActiveMinutes", "FairlyActiveMinutes", 
               "LightlyActiveMinutes", "SedantaryMinutes"]].mean()
colors = ['gold','lightgreen', "pink", "blue"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Total Active Minutes')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()

Observation (on an average):


*   796.63 are the inactive minutes in a day
*   153.28 are the lightly active minutes
*   16.63 minutes are fairly active &
*   33.49 minutes are very active throughout the day





In [43]:
#adding a new column in the dataset
data["Day"] = data["ActivityDate"].dt.day_name()
print(data["Day"].head())

0       Monday
1      Tuesday
2    Wednesday
3     Thursday
4       Friday
Name: Day, dtype: object


In [44]:
#Now we'll look at the very active, fairly active, and lightly active minutes on
#each day of the week
fig = go.Figure()
fig.add_trace(go.Bar(
    x=data["Day"],
    y=data["VeryActiveMinutes"],
    name='Very Active',
    marker_color='purple'
))
fig.add_trace(go.Bar(
    x=data["Day"],
    y=data["FairlyActiveMinutes"],
    name='Fairly Active',
    marker_color='green'
))
fig.add_trace(go.Bar(
    x=data["Day"],
    y=data["LightlyActiveMinutes"],
    name='Lightly Active',
    marker_color='pink'
))
fig.update_layout(barmode='group', xaxis_tickangle=-45)
fig.show()

In [45]:
#Now let’s have a look at the number of inactive minutes on each day of the week
day = data["Day"].value_counts()
label = day.index
counts = data["SedantaryMinutes"]
colors = ['gold','lightgreen', "pink", "blue", "skyblue", "cyan", "orange"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Inactive Minutes Daily')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()

**So Wednesday is the most inactive day according to the lifestyle of all the individuals in the dataset.**

In [46]:
#Now let’s have a look at the number of calories burned on each day of the week
calories = data["Day"].value_counts()
label = calories.index
counts = data["Calories"]
colors = ['gold','lightgreen', "pink", "blue", "skyblue", "cyan", "orange"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Calories Burned Daily')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()

**Monday is, therefore, one of the most active days for all individuals in the dataset, as the highest number of calories were burned on Mondays.**