In [6]:
import pandas as pd

url = "https://raw.githubusercontent.com/Basil1361/March-2026-Workshop-Resources/master/Resources/data.csv"

table = pd.read_csv(url)
table.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Duration  32 non-null     int64  
 1   Date      31 non-null     object 
 2   Pulse     32 non-null     int64  
 3   Maxpulse  32 non-null     int64  
 4   Calories  30 non-null     float64
dtypes: float64(1), int64(3), object(1)
memory usage: 1.4+ KB


In [8]:
# 1. count the number of missing values per column
table.isna().sum()

Duration    0
Date        1
Pulse       0
Maxpulse    0
Calories    2
dtype: int64

In [19]:
# 2. count the number of records where pulse > maxpulse
(table["Pulse"] > table["Maxpulse"]).sum()

1

In [None]:
# 3. remove missing/invalid values
table_clean = table[
    (table["Pulse"] <= table["Maxpulse"]) &
    (table["Duration"] <= 240)      
].reset_index(drop=True)

table_clean = table_clean.dropna()
table_clean["Date"] = pd.to_datetime(table_clean["Date"], errors="coerce", format="mixed")

table_clean.head(5)

Unnamed: 0,Duration,Date,Pulse,Maxpulse,Calories
0,60,2020-12-01,110,130,409.1
1,60,2020-12-02,117,145,479.0
2,60,2020-12-03,103,135,340.0
3,45,2020-12-04,109,175,282.4
4,45,2020-12-05,117,148,406.0


In [24]:
# 4. calculate the average pulse rate and average duration of all workouts
avg_pulse = table_clean["Pulse"].mean()
avg_duration = table_clean["Duration"].mean()

print(f"Average pulse is: {avg_pulse:.2f}")
print(f"Average duration is: {avg_duration:.2f}")

Average pulse is: 103.15
Average duration is: 56.67


In [None]:
# 5. create new feature (intensity)
table_clean["Intensity"] = "Medium"

table_clean.loc[table_clean["Pulse"] <= 100, "Intensity"] = "Low"
table_clean.loc[table_clean["Pulse"] >= 120, "Intensity"] = "High"

table_clean["Intensity"].value_counts()

Intensity
Medium    16
Low       11
Name: count, dtype: int64