In [78]:
# Step 1: Import libraries
import pandas as pd

# Step 2: Load the CSV data
file_path = 'Users/tim/Documents/GitHub/The-Running-Machine/Script/RunningData.csv'  # Replace with your actual file path 
# Specify the filename
filename = 'RunningData.csv'  # replace with your actual CSV file name

# Read the CSV file into a DataFrame
try:
    df = pd.read_csv(filename,header=1)
    print(df.head())  # Print the first few rows to check if data is loaded correctly
except pd.errors.EmptyDataError:
    print("The file is empty or could not be read.")
except FileNotFoundError:
    print("The file was not found. Please check the file name and path.")
except Exception as e:
    print(f"An error occurred: {e}") 

# Strip any leading/trailing whitespace from column names
df.columns = df.columns.str.strip() 

# Define the cleaning function
def clean_time_format(time_str):
    # Check if time_str is NaN
    if pd.isna(time_str):
        return time_str  # Return NaN as is
    
    # Split the time string into components
    parts = time_str.split(':')
    
    # If there are only two components (mm:ss), add "00:" in front
    if len(parts) == 2:
        return f"00:{time_str}"
    
    # If there are three components (hh:mm:ss), return as is
    return time_str

# Apply the cleaning function to the 'Time' column
df['Cleaned Time'] = df['Time'].apply(clean_time_format)

# Create a new column for Total Seconds
def calculate_total_seconds(time_str):
    if pd.isna(time_str):
        return None  # Return None for NaN
    hours, minutes, seconds = map(int, time_str.split(':'))
    return hours * 3600 + minutes * 60 + seconds

# Apply the function to create the Total Seconds column
df['Total Seconds'] = df['Cleaned Time'].apply(calculate_total_seconds)

# Check the DataFrame with the new columns
print(df[['Time', 'Cleaned Time', 'Total Seconds']])

   Run  Sat, 10/19/2024                                  GINOBILI  2:55:56  \
0  Run  Thu, 10/17/2024                                   Slumped  1:02:49   
1  Run  Wed, 10/16/2024  Err, like hop, I hopped out like wassup?  1:19:11   
2  Run  Tue, 10/15/2024                                 Lunch Run    50:01   
3  Run  Tue, 10/15/2024                                   warm up    14:23   
4  Run  Mon, 10/14/2024                                  SMUCKERS  1:05:59   

   20.01 mi  460 ft  162  Unnamed: 7  
0   7.25 mi  134 ft  163         NaN  
1  10.27 mi  237 ft  174         NaN  
2   6.08 mi  154 ft  163         NaN  
3   1.64 mi   35 ft  148         NaN  
4   8.50 mi  205 ft  173         NaN  


KeyError: 'Time'

In [70]:
import pandas as pd

# Assuming df is your DataFrame and it has already been read from the file

# Step 1: Reset index
df.reset_index(drop=True, inplace=True)

# Step 2: Convert necessary columns to appropriate data types
df['Distance'] = df['Distance'].str.replace(' mi', '').astype(float)
df['Heart Rate'] = df['Heart Rate'].astype(int)

# Convert Time to timedelta
df['Time'] = pd.to_timedelta(df['Time'], errors='coerce')  # Convert Time to timedelta
print(df["Time"])
# Calculate total seconds from the time
df['Total Seconds'] = df['Time'].dt.total_seconds() 


# Convert cleaned Pace times to total seconds
df['Total Seconds'] = pd.to_timedelta(df['Time']).dt.total_seconds()

print(df['Total Seconds'])
# # Calculate pace (in seconds per mile)
# df['Pace (sec/mile)'] = df['Total Seconds'] / df['Distance']

# # Convert pace from seconds to a readable format (MM:SS)
# def convert_seconds_to_pace(seconds):
#     minutes = int(seconds // 60)
#     seconds = int(seconds % 60)
#     return f"{minutes}:{seconds:02d}"

# df['Pace'] = df['Pace (sec/mile)'].apply(convert_seconds_to_pace)

# # Drop the temporary Total Seconds and Pace (sec/mile) columns if not needed
# df.drop(columns=['Total Seconds', 'Pace (sec/mile)'], inplace=True)

# # Step 3: Calculate statistics
# # 1. Total Runs
# total_runs = df.shape[0]

# # 2. Total Distance
# total_distance = df['Distance'].sum()

# # 3. Average Heart Rate
# average_heart_rate = df['Heart Rate'].mean()

# # 4. Average Pace
# average_pace = df['Pace'].mean()

# # 5. Longest Run
# longest_run = df.loc[df['Distance'].idxmax()]

# # 6. Fastest Run (Minimum pace)
# fastest_run = df.loc[df['Pace'].idxmin()]

# # 7. Run Frequency
# df['Date'] = pd.to_datetime(df['Date'])  # Ensure Date is in datetime format
# runs_per_week = df.resample('W-Mon', on='Date').size()  # Weekly runs

# # Print calculated statistics
# print("Total Runs:", total_runs)
# print("Total Distance (mi):", total_distance)
# print("Average Heart Rate:", average_heart_rate)
# print("Average Pace:", average_pace)
# print("Longest Run:", longest_run)
# print("Fastest Run:", fastest_run)

# # Optional: Print run frequency
# print("Runs per Week:")
# print(runs_per_week)


0     0 days 02:55:56
1     0 days 01:02:49
2     0 days 01:19:11
3     2 days 02:01:00
4     0 days 00:14:23
            ...      
135   0 days 01:04:54
136   0 days 01:17:58
137   0 days 01:08:04
138   0 days 01:10:33
139   0 days 01:58:59
Name: Time, Length: 140, dtype: timedelta64[ns]
0       10556.0
1        3769.0
2        4751.0
3      180060.0
4         863.0
         ...   
135      3894.0
136      4678.0
137      4084.0
138      4233.0
139      7139.0
Name: Total Seconds, Length: 140, dtype: float64


In [47]:
# Ensure necessary columns are in the right format
df['Time'] = pd.to_timedelta(df['Time'], errors='coerce')  # Convert Time to timedelta

# Convert 'Distance' from string to float (removing ' mi')
df['Distance'] = df['Distance'].str.replace(' mi', '').astype(float)

# Calculate total seconds from the time
df['Total Seconds'] = df['Time'].dt.total_seconds()

# Calculate pace (in seconds per mile)
df['Pace (sec/mile)'] = df['Total Seconds'] / df['Distance']

# Convert pace from seconds to a readable format (MM:SS)
def convert_seconds_to_pace(seconds):
    minutes = int(seconds // 60)
    seconds = int(seconds % 60)
    return f"{minutes}:{seconds:02d}"

df['Pace'] = df['Pace (sec/mile)'].apply(convert_seconds_to_pace)

# Drop the temporary Total Seconds column if not needed
df.drop(columns=['Total Seconds'], inplace=True)

# Check the updated DataFrame
print(df[['Date', 'Distance', 'Time', 'Pace']])


ValueError: cannot convert float NaN to integer

In [43]:
# Check the columns in the DataFrame
print(df.columns)


Index(['Sport', 'Date', 'Title', 'Time', 'Distance ', 'Elevation',
       'Heart Rate', 'Pace '],
      dtype='object')
