In [None]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from moviepy.editor import *
import numpy as np
import pandas as pd
import subprocess
import cv2
import os
import time


In [None]:
# Defining variables
runnersite = "https://runalyze.com/athlete/Schrottie" # The URL which would be parsed
ffmpeg_path = "C:\\Users\\24007120\\Documents\\FFMPEG\\bin\\ffmpeg.exe"
chart_file_pic = 'tmp/chart.png' # Temporary chartpic
chart_file_vid = 'tmp/chart.mp4' # Temporary video file
video_data_file = 'tmp/data.txt' # Textfile with the content of the loaded dataframe
final_video_clip = 'movies/activity_movie.mp4' # Name of th final clip, only temporary filename, will be changed later
width, height = 1920, 1080 # Video resolution
duration_per_row = 3  # Duration for displaying each row of data in seconds
fade_duration = 1  # Fade duration in seconds
final_duration = 7  # Duration of the final display in seconds

In [None]:
def wait_for_file_creation(file_path, timeout=30, min_size=1000):
    start_time = time.time()
    while not os.path.exists(file_path) or os.path.getsize(file_path) < min_size:
        if time.time() - start_time > timeout:
            raise TimeoutError("Timeout while waiting for file creation")
        time.sleep(1)

In [None]:
# Function for retrieving the website and extracting the relevant information
def extract_activity_data():
    response = requests.get(runnersite)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    activities = []
    today = datetime.today().strftime('%d.%m.%Y')
    
    # Find all lines with running activities
    activity_rows = soup.find_all('tr', class_='r')
    
    data = {
        'date': [],
        'a_type': [],
        'r_type': [],
        'distance': [],
        'duration': [],
        'pace': []
    }
    
    current_date = None
    
    for row in activity_rows:
        cols = row.find_all('td')
        
        if cols:
            date_cell = cols[1].text.strip()
            if date_cell == '': # completely empty means that it is not the first activity of the day
                offset = 0
            else:
                offset = 1
                new_date = date_cell.split()[0] + '.' + str(datetime.today().year)
                # Remember the date so that it can be used in lines without a date
                if new_date and new_date != current_date:
                    current_date = new_date
                    
            if row.find('i', class_='icons8-Running'):
                rt_str = cols[3 + offset].text.strip() if len(cols) > 3 + offset else ''
                distance_str = cols[4 + offset].text.strip().split()[0].replace(',', '.') if len(cols) > 4 + offset else ''
                duration_str = cols[5 + offset].text.strip() if len(cols) > 5 + offset else ''
                pace_str = cols[6 + offset].text.strip() if len(cols) > 6 + offset else ''
                
                # Check that the current values are not empty
                if distance_str and duration_str and pace_str:
                    activities.append({'date': current_date, 'a_type': 'run', 'r_type': rt_str, 'distance': distance_str, 'duration': duration_str, 'pace': pace_str})
                else:
                    # Use the previous date if the current values are empty
                    if data['date']:
                        activities.append({'date': data['date'][-1], 'a_type': 'run', 'r_type': rt_str, 'distance': distance_str, 'duration': duration_str, 'pace': pace_str})
    
    df = pd.DataFrame(activities)
    df = df[df['a_type'] == 'run'] # Filter by activity type "run"
    
    # Clean up dataframe so that data types fit and do not cause errors later
    df['distance'] = pd.to_numeric(df['distance'], errors='coerce')
    df['pace'] = df['pace'].str.replace("/km", "")  # Remove "/km"
    df['duration'] = pd.to_timedelta(df['duration'])
    df['duration_minutes'] = df['duration'].dt.total_seconds() / 60

    # Set new name for the final videoclip and the chartpic
    min_date_str = df['date'].min().replace('.', '')  # Format date without dots
    max_date_str = df['date'].max().replace('.', '')  # Format date without dots
    final_video_clip = f"movies/activity_movie_{min_date_str}_{max_date_str}.mp4"

    # Give me the dataframe!
    return df

In [None]:
# Function for creating the chart
def create_chart(activities):
    # Number of lines in the dataframe
    n_rows = activities.shape[0]

    # Create a figure object with square dimensions
    fig, ax = plt.subplots(figsize=(10, 10))

    # Set the labeling of the Y-axis to "from - to"
    ax.set_ylabel(f"{activities['date'].min()} to {activities['date'].max()}")
    
    # Create a horizontal bar plot for the distance
    for i, (dist, dur, pace) in enumerate(zip(activities["distance"], activities["duration"], activities["pace"])):
        
        # Use two colors for the bars
        # if i % 2 == 0:
        #     color = mcolors.to_rgba_array(plt.cm.bone(0.3 + i * 0.3 / n_rows))
        # else:
        #     color = mcolors.to_rgba_array(plt.cm.pink(0.3 + i * 0.3 / n_rows))

        # Use only one color for the bars
        color = mcolors.to_rgba_array(plt.cm.bone(0.3 + i * 0.3 / n_rows))

        # Paint th bars
        ax.barh(i, dist, color=color, edgecolor='black')
        
        # Format the duration so that it is not preceded by "0 days"
        dur_formatted = str(dur).split()[-1]

        # Label the bars with the distance values on the right-hand edge of the bar
        ax.text(dist, i, f"{dur_formatted} / {pace}  ", ha='right', va='center', rotation=0, color='white')

    # Set the labels of the X-axis for the distance
    ax.set_xlabel("Running distance (km)")
    
    # Remove the labels from the Y-axis
    ax.set_yticks([])

    # Set background color
    ax.set_facecolor('#fffff0')

    # Save chart as an image
    fig.savefig(chart_file_pic, bbox_inches='tight', dpi=350)  
    
    # Show the chart
    plt.show()

In [None]:
# Function for antialiasing the video
def resize_with_antialiasing(img, new_size):
    
    img = cv2.resize(img, new_size, interpolation=cv2.INTER_AREA)  # Interpolate for antialiasing
    return np.clip(img, 0, 255).astype(np.uint8)  # Limit values and convert to uint8

In [None]:
# Function for creating the video
def create_video(activities):

    # Create text for the final display
    sum_text = f"Total: Distance {activities['distance'].sum()} km, Duration {activities['duration'].sum()}, Pace {activities['pace'].apply(lambda x: datetime.strptime(x, '%M:%S')).mean()}"
    
    # Create the clip for the chart
    chart_clip = (ImageClip(resize_with_antialiasing(cv2.imread(chart_file_pic), (width, height // 2)))
               .set_duration(duration_per_row * len(activities))
               .set_position(('center', 'bottom')))

    # Create clips for data rows
    data_clips = []
    for i, (_, row) in enumerate(activities.iterrows()):
        text = f"Date: {row['date']} - Distance: {row['distance']} km - Duration: {row['duration']} - Pace: {row['pace']}"
        data_clips.append(text)

    # Create temporary text file
    with open(video_data_file, 'w') as file:
        file.write('\n'.join(data_clips))

    # Generate the final command for ffmpeg -- Change ffmpeg_path to 'ffmpeg' if ffmpeg is in PATH
    ffmpeg_command = [
        ffmpeg_path, '-y', '-loop', '1', '-i', chart_file_pic, '-vf',
        f"drawtext=fontsize=40:fontcolor=white:fontfile=Arial.ttf:textfile={video_data_file}:y=h-line_h-10:x=w/2-tw/2:reload=1",
        '-t', str(duration_per_row * len(activities)), '-pix_fmt', 'yuv420p', chart_file_vid
    ]

    # Execute ffmpeg command
    subprocess.run(ffmpeg_command)
    
    # Wait until the videofile is created
    #wait_for_file_creation(chart_file_vid)
    
    # Concatenate chart video with final display
    final_clip = concatenate_videoclips([VideoFileClip(chart_file_vid), TextClip(sum_text, fontsize=70, color='white', bg_color='black').set_duration(final_duration)])

    # Resize final clip
    final_clip = final_clip.resize(width=width)

    # Save the final video
    final_clip.write_videofile(final_video_clip, fps=24, codec='libx264')

    # Clean up temporary files
    os.remove(chart_file_pic)
    os.remove(video_data_file)
    os.remove(chart_file_vid)

In [None]:
# Main function
def main():
    activities = extract_activity_data()
    if not activities.empty:  # Check if activities is not empty
        create_chart(activities)
        create_video(activities)
        #print("Video was successfully created.")
    else:
        print("No activities found.")

if __name__ == "__main__":
    main()