In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

data = pd.read_csv("https://raw.githubusercontent.com/amankharwal/Website-data/master/dailyActivity_merged.csv")
print(data.head())

In [None]:
#here we can see the total no. of rows and columns in the dataset
rows = data.shape[0];
columns = data.shape[1];
print(rows)
print(columns)

In [None]:
#checking if there is a null entry in the dataset
print(data.isnull().sum())

In [None]:
#let us get some information about the columns
print(data.info())

In [None]:
# Changing datatype of ActivityDate
data["ActivityDate"] = pd.to_datetime(data["ActivityDate"],format="%m/%d/%Y")
print(data.info())

In [None]:
#adding an extra coulumn total minutes
data["TotalMinutes"] = data["VeryActiveMinutes"] + data["FairlyActiveMinutes"] + data["LightlyActiveMinutes"] + data["SedentaryMinutes"]
print(data["TotalMinutes"].sample(5))

In [None]:
#descriptive stats about the dataset
print(data.describe())

**Let's analyze the Smartwatch data.**

In [None]:
#relation between calories burnt and total steps in a day
figure = px.scatter(data_frame = data, x="Calories",
                    y="TotalSteps", size="VeryActiveMinutes", 
                    trendline="ols", 
                    title="Relationship between Calories & Total Steps")
figure.show()

**You can see that there is a linear relationship between the total number of steps and the number of calories burned in a day.**

In [14]:
#Now we gonna look at Average Total number of active minutes in a day
label = ["Very Active Minutes", "Fairly Active Minutes", 
         "Lightly Active Minutes", "Inactive Minutes"]
counts = data[["VeryActiveMinutes", "FairlyActiveMinutes", 
               "LightlyActiveMinutes", "SedentaryMinutes"]].mean()
colors = ['gold','lightgreen', "pink", "blue"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Total Active Minutes')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
print(fig.show())

None



Observation :

*   81.3% of Total inactive minutes in a day
*   15.8% of Lightly active minutes in a day
*   On an average, only 21 minutes (1.74%) were very active
*   and 1.11% (13 minutes) of fairly active minutes in a day





In [None]:
#adding a new column in the dataset
data["Day"] = data["ActivityDate"].dt.day_name()
print(data["Day"].head())

In [None]:
#Now we'll look at the very active, fairly active, and lightly active minutes on
#each day of the week
fig = go.Figure()
fig.add_trace(go.Bar(
    x=data["Day"],
    y=data["VeryActiveMinutes"],
    name='Very Active',
    marker_color='purple'
))
fig.add_trace(go.Bar(
    x=data["Day"],
    y=data["FairlyActiveMinutes"],
    name='Fairly Active',
    marker_color='green'
))
fig.add_trace(go.Bar(
    x=data["Day"],
    y=data["LightlyActiveMinutes"],
    name='Lightly Active',
    marker_color='pink'
))
fig.update_layout(barmode='group', xaxis_tickangle=-45)
fig.show()

In [None]:
#Now let’s have a look at the number of inactive minutes on each day of the week
day = data["Day"].value_counts()
label = day.index
counts = data["SedentaryMinutes"]
colors = ['gold','lightgreen', "pink", "blue", "skyblue", "cyan", "orange"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Inactive Minutes Daily')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))

**So Thursday is the most inactive day according to the lifestyle of all the individuals in the dataset.**

In [None]:
#Now let’s have a look at the number of calories burned on each day of the week
calories = data["Day"].value_counts()
label = calories.index
counts = data["Calories"]
colors = ['gold','lightgreen', "pink", "blue", "skyblue", "cyan", "orange"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Calories Burned Daily')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()

**Tuesday is, therefore, one of the most active days for all individuals in the dataset, as the highest number of calories were burned on Tuesdays.**