In [2]:
from bs4 import BeautifulSoup
import requests
import os
import shutil
import pandas as pd

MUSCLE_LIST = ["neck", "trapezius", "shoulders", "chest", "back", "erector-spinae", "biceps", "triceps", "forearm", "abs", "leg", "calf", "hip", "cardio", "full-body"]

for directory in MUSCLE_LIST:
    if not os.path.exists(directory):
        os.makedirs(directory)

In [27]:
#SCRAPING FUNCTIONS
def add_page(link):
    NUMBERS = [str(i) for i in range(1,100)]
    link_parts = link.split('/')
    if link_parts[-1] in NUMBERS:
        val = int(link_parts.pop())
        link_parts.append(NUMBERS[val])
    else :
        link_parts.extend(['page', '2'])
    new_link = "/".join(link_parts)
    return new_link

def download_images(images_link, muscle):
    for image_url in images_link:
        image_url_parts = image_url.split("/")
        response = requests.get(image_url, stream=True)
        with open(muscle + "/" + image_url_parts[-1], "wb") as img:
            shutil.copyfileobj(response.raw, img)

def list_all_exercises_gif(path):
    gif_files = []
    for dirpath, _, files in os.walk(path):
        for file in files:
            if file.endswith(".gif"):
                filename = file[:-4]
                filename = filename.capitalize().replace("-", " ")
                muscle = dirpath[2:].capitalize()
                gif_files.append([filename, muscle])
    return gif_files


def fuse_csv(path):
    csv_list = []
    for dirpath, _, files in os.walk(path):
        for file in files:
            if file.endswith(".csv"):
                csv_list.append(os.path.join(dirpath, file))
    dfs = [pd.read_csv(file, header=None) for file in csv_list]
    final_df = pd.concat(dfs, ignore_index=True)
    return final_df

In [4]:
for muscle in MUSCLE_LIST:
    link = "https://fitnessprogramer.com/exercise-primary-muscle/" + muscle
    img_count = 0
    while True:
        try : 
            response = requests.get(link)
            if response.status_code != 200:
                raise Exception("Link error")
            page_content = response.content
            soup = BeautifulSoup(page_content, 'lxml')
            images = soup.find_all("img")
            images_links = [image["src"] for image in images if image["src"].endswith("gif")]
            exercises_list = [f"{os.path.basename(link)[:-4].capitalize().replace('-', ' ')},{muscle.title()},{link}" for link in images_links]
            # download_images(images_links, muscle)
            filepath = muscle + '/' + muscle + "_linkslist.csv"
            with open(filepath, "a+") as file :
                file.write("\n")
                file.write("\n".join(exercises_list))
                file.close()
            link = add_page(link)
            img_count += len(images_links)
        except:
            print(f"Saved {img_count} GIF files for {muscle}.")
            break

Saved 23 GIF files for neck.
Saved 53 GIF files for trapezius.
Saved 192 GIF files for shoulders.
Saved 153 GIF files for chest.
Saved 125 GIF files for back.
Saved 56 GIF files for erector-spinae.
Saved 69 GIF files for biceps.
Saved 104 GIF files for triceps.
Saved 30 GIF files for forearm.
Saved 154 GIF files for abs.
Saved 233 GIF files for leg.
Saved 40 GIF files for calf.
Saved 219 GIF files for hip.
Saved 91 GIF files for cardio.
Saved 74 GIF files for full-body.


In [29]:
df = fuse_csv('.').rename(columns={0: "exercise_name", 1 : "muscle", 2 : "GIF_link"})
df.to_csv("exercises_list.csv", index=False)

In [None]:
all_exercises = list_all_exercises_gif(".")

print(all_exercises)
# with open("exercises_list.txt", "w+") as f :
#     f.write("\n".join(all_exercises))
#     f.close()