In [119]:
import pandas as pd
import plotly.express as px



In [120]:
df = pd.read_csv('Activities.csv')

In [121]:
df.head()

Unnamed: 0,Activity Type,Date,Favorite,Title,Distance,Calories,Time,Avg HR,Max HR,Aerobic TE,...,Min Temp,Surface Interval,Decompression,Best Lap Time,Number of Laps,Max Temp,Moving Time,Elapsed Time,Min Elevation,Max Elevation
0,Running,2024-03-17 09:35:21,False,Carmarthenshire - Great Welsh Marathon (Half M...,21.23,2389,02:26:52,167,184,5.0,...,18.0,0:00,No,01:23.91,22,27.0,02:26:46,02:26:52,8,22
1,Strength Training,2024-03-13 17:19:57,False,Strength,0.0,663,01:45:42,100,142,1.3,...,0.0,0:00,No,01:45:42.42,1,0.0,01:45:42,01:45:42,--,--
2,Running,2024-03-08 17:34:30,False,Cardiff Running,10.01,1052,01:07:10,147,171,4.6,...,17.0,0:00,No,00:05.11,11,27.0,01:07:00,01:07:10,6,15
3,Strength Training,2024-03-04 17:17:51,False,Strength,0.0,794,02:06:42,102,144,1.5,...,0.0,0:00,No,02:06:42.49,1,0.0,02:06:42,02:06:42,--,--
4,Strength Training,2024-02-26 16:35:29,False,Strength,0.0,612,01:48:02,95,148,0.7,...,0.0,0:00,No,01:48:01.86,1,0.0,01:33:05,01:48:02,--,--


In [122]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66 entries, 0 to 65
Data columns (total 42 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Activity Type             66 non-null     object 
 1   Date                      66 non-null     object 
 2   Favorite                  66 non-null     bool   
 3   Title                     66 non-null     object 
 4   Distance                  66 non-null     float64
 5   Calories                  66 non-null     object 
 6   Time                      66 non-null     object 
 7   Avg HR                    66 non-null     int64  
 8   Max HR                    66 non-null     int64  
 9   Aerobic TE                66 non-null     float64
 10  Avg Run Cadence           66 non-null     object 
 11  Max Run Cadence           66 non-null     object 
 12  Avg Pace                  66 non-null     object 
 13  Best Pace                 66 non-null     object 
 14  Total Ascent

In [128]:
def create_activity_pie_chart(df):
    """
    Creates a pie chart showing the distribution of activity types.

    Parameters:
    - df (DataFrame): Input DataFrame containing activity data.
    """
    activity_counts = df['Activity Type'].value_counts()

    fig = px.pie(activity_counts, values=activity_counts.values, names=activity_counts.index, 
                 title='Distribution of Activity Types', color_discrete_sequence=['#FF5733', '#FFBD33', '#33FF57'])

    fig.show()

# Assuming your DataFrame is named df
create_activity_pie_chart(df)


In [124]:
def create_running_distance_histogram(df):
    # Filter to include only Running and Trail Running activities
    df_running = df[df['Activity Type'].isin(['Running', 'Trail Running'])]

    # Create histogram with color distinction and improved aesthetics
    fig = px.histogram(df_running, x='Distance', color='Activity Type',
                       title='Distribution of Distances for Running and Trail Running Activities',
                       labels={'Distance': 'Distance (km)', 'count': 'Number of Activities'},
                       barmode='overlay', # Overlay the bars of different activities
                       histnorm='percent', # Represent bins as percentage of total to facilitate comparison
                       color_discrete_map={'Running':'#1f77b4', 'Trail Running':'#ff7f0e'}) # Customize colors

    # Customize the layout
    fig.update_layout(
        xaxis_title="Distance (km)",
        yaxis_title="Percentage of Activities",
        legend_title="Activity Type",
        template="plotly_white", # A light theme that is generally appealing
        hovermode='x unified', # Shows a single hover box for all bars in the same x position
    )

    # Improve bin size for better visualization
    fig.update_traces(xbins=dict(start=0, end=df_running['Distance'].max(), size=2)) # Adjust 'size' as needed

    # Add hover data
    fig.update_traces(hovertemplate='Distance: %{x} km<br>Percentage: %{y}%<extra></extra>')

    fig.show()


In [125]:
# Function to create a scatter plot of distance vs. calories for running and trail running activities
def create_running_distance_calories_scatter(df):
    df_running = df[df['Activity Type'].isin(['Running', 'Trail Running'])]
    fig = px.scatter(df_running, x='Distance', y='Calories', title='Calories Burned vs. Distance for Running and Trail Running Activities', 
                     labels={'Distance': 'Distance (km)', 'Calories': 'Calories Burned'})
    # Ensure proper ordering of y-axis
    fig.update_yaxes(type="linear")
    display(fig)

In [126]:

def create_running_time_series_moving_time(df):
    # Filter for Running and Trail Running activities and create a copy to avoid warnings
    df_running = df[df['Activity Type'].isin(['Running', 'Trail Running'])].copy()
    
    # Convert 'Date' to datetime
    df_running['Date'] = pd.to_datetime(df_running['Date'])
    
    # First ensure that all values in 'Moving Time' are strings and not mixed types
    df_running['Moving Time'] = df_running['Moving Time'].astype(str)
    
    # Then convert 'Moving Time' to timedelta, coerce errors to NaT
    df_running['Moving Time'] = pd.to_timedelta(df_running['Moving Time'], errors='coerce')
    
    # Drop rows where 'Moving Time' is NaT after conversion
    df_running.dropna(subset=['Moving Time'], inplace=True)
    
    # Convert 'Moving Time' to total seconds for plotting
    df_running['Moving Time Seconds'] = df_running['Moving Time'].dt.total_seconds()
    
    # Sort the running dataframe by 'Date'
    df_running.sort_values(by='Date', inplace=True)
    
    # Plotting with total seconds
    fig = px.line(df_running, x='Date', y='Moving Time Seconds', title='Moving Time Over Time for Running and Trail Running Activities')
    
    # Customize the y-axis format to display hours, minutes, and seconds
    fig.update_layout(
        yaxis_tickformat='%H:%M:%S',
        yaxis_title='Moving Time (hh:mm:ss)'
    )
    
    # Display the figure
    fig.show()




In [127]:

# Example usage:
create_running_distance_histogram(df)
create_running_distance_calories_scatter(df)
create_running_time_series_moving_time(df)