In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
# URL of the target page
base_url = "https://www.docteur-fitness.com/exercice-musculation"

# Step 1: Get all the initial links from the main page
response = requests.get(base_url)
soup = BeautifulSoup(response.text, 'html.parser')

# Select target divs containing the initial links
target_divs = soup.select('.wp-container-core-columns-is-layout-1, .wp-container-core-columns-is-layout-2')

# Extract all links
links = [link['href'] for div in target_divs for link in div.find_all('a', href=True)]

# Step 2: Visit each link and extract the target div
results = {}

for link in links:
    try:
        print(f"Processing: {link}")
        page_response = requests.get(link)
        page_soup = BeautifulSoup(page_response.text, 'html.parser')

        # Find the target div on the current page
        target_div = page_soup.find('div', class_='archive-main archive-grid columns-3')

        # If the target div exists, process each article inside it
        if target_div:
            articles = target_div.find_all('article')
            for article in articles:
                title = article.find('h2', class_='entry-title')
                excerpt = article.find('div', class_='post-excerpt')

                # Save the article title and excerpt
                if title and excerpt:
                    results[link] = results.get(link, []) + [(title.get_text(), excerpt.get_text())]

        else:
            results[link] = "No target div found"
    except Exception as e:
        print(f"Error processing {link}: {e}")
        results[link] = f"Error: {e}"
exercise_data = []
# Print or save results
for link, articles in results.items():
    # Extract only the last part of the link
    name = link.split('/')[-1]

    # Print only the name (last part of the link)
    print(f"\nName: {name}")
    if isinstance(articles, list):
        for title, excerpt in articles:
            print(f"Title: {title}")
            print(f"Excerpt: {excerpt}\n")
            exercise_data.append({
                    'Name': name,
                    'title': title,
                    'Excerpt': excerpt,
                })
    else:
        print(f"Content: {articles}\n")



# Step 5: Save data to CSV
if exercise_data:
    df = pd.DataFrame(exercise_data)
    df.to_csv('exercises.csv', index=False)
    print("Data successfully saved to 'exercises.csv'")
else:
    print("No exercise data was found.")


Processing: https://www.docteur-fitness.com/exercices-epaules
Processing: https://www.docteur-fitness.com/exercices-biceps
Processing: https://www.docteur-fitness.com/exercices-triceps
Processing: https://www.docteur-fitness.com/exercices-pectoraux
Processing: https://www.docteur-fitness.com/exercices-dos
Processing: https://www.docteur-fitness.com/exercices-abdominaux
Processing: https://www.docteur-fitness.com/exercices-fessiers
Processing: https://www.docteur-fitness.com/exercices-quadriceps
Processing: https://www.docteur-fitness.com/exercices-ischio-jambiers
Processing: https://www.docteur-fitness.com/exercices-mollets

Name: exercices-epaules
Title: Russian twist avec développé épaules
Excerpt: L’exercice avec développé épaules (en anglais Russian Twist with Overhead Press) est une combinaison dynamique entre le Russian Twist et le développé épaules, offrant une approche complète de renforcement musculaire.…

Title: Développé militaire
Excerpt: Le développé militaire (military pr