In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


def get_country_links(base_url):
    response = requests.get(base_url)
    links = {}
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        footer = soup.find("footer", id="footer")
        if footer:
            accordians = footer.find_all("div", class_="accordian")
            for bloc in accordians:
                title = bloc.find("div", class_="accordian_title")
                if title and "Pays" in title.text.strip():
                    for a in bloc.find_all("a"):
                        country_name = a.text.strip()
                        country_url = a.get("href")
                        if not country_url.startswith("http"):
                            country_url = "https://www.dabadoc.com" + country_url
                        links[country_name] = country_url
    return links


def get_frequent_searches(url):
    response = requests.get(url)
    results = []
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        footer = soup.find("footer", id="footer")
        if footer:
            accordians = footer.find_all("div", class_="accordian")
            for bloc in accordians:
                title = bloc.find("div", class_="accordian_title")
                if title and "Recherches fréquentes" in title.text.strip():
                    for a in bloc.find_all("a"):
                        search = a.text.strip()
                        search_url = a.get("href")
                        if not search_url.startswith("http"):
                            search_url = "https://www.dabadoc.com" + search_url
                        results.append((search, search_url))
    return results


def extract_doctors(search_url):
    response = requests.get(search_url)
    data = []

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        doctors = soup.find_all("div", class_="result-box")

        for doc in doctors:
            name_tag = doc.find("h2")
            speciality_tag = doc.find("p")
            link_tag = name_tag.find("a") if name_tag else None

            if name_tag and speciality_tag and link_tag:
                name = name_tag.get_text(strip=True)
                speciality = speciality_tag.get_text(strip=True)
                link = link_tag["href"]
                full_link = link if link.startswith("http") else "https://www.dabadoc.com" + link

                
                detail_resp = requests.get(full_link)
                if detail_resp.status_code == 200:
                    detail_soup = BeautifulSoup(detail_resp.text, 'html.parser')
                    
                    cards = detail_soup.find_all("div", class_="card")
                    access = ""
                    for c in cards:
                        title = c.find("h3")
                        if title and "Indications" in title.get_text():
                            card_body = c.find("div", class_="card-text")
                            if card_body:
                                access = card_body.get_text(strip=True)
                            break

                    data.append({
                        "Nom": name,
                        "Spécialité": speciality,
                        "Lien": full_link,
                        "Accès": access
                    })
    return data


pays_liens = get_country_links("https://www.dabadoc.com/tn")
lien_recherches = []
for nom_pays, url_pays in pays_liens.items():
    recherches = get_frequent_searches(url_pays)
    for label, lien in recherches:
        lien_recherches.append((label, lien))



df_total = pd.DataFrame()
for label, lien in lien_recherches[:1]:
    try:
        doctors_data = extract_doctors(lien)
        df = pd.DataFrame(doctors_data)
        df_total = pd.concat([df_total, df], ignore_index=True)
    except Exception as e:
        print(f"Erreur avec {label} : {e}")


In [None]:
df_total.head(10)

In [None]:
def extract_doctors(url):
    reponse = requests.get(url)
    data = []

    if reponse.status_code == 200:
        bs = BeautifulSoup(reponse.text, 'html.parser')
        docteurs = bs.find_all("div", class_="result-box")

        for doc in docteurs:
            bal_nom = doc.find("h2")
            bal_specialite = doc.find("p")
            bal_lien = bal_nom.find("a") if bal_nom else None

            if bal_nom and bal_specialite and bal_lien:
                nom = bal_nom.get_text(strip=True)
                specialite = bal_specialite.get_text(strip=True)
                lien = bal_lien["href"]
                lien_complet = lien if lien.startswith("http") else "https://www.dabadoc.com" + lien

                detail_rep = requests.get(lien_complet)
                if detail_rep.status_code == 200:
                    detail_bs = BeautifulSoup(detail_rep.text, 'html.parser')
                    bal_numero = detail_bs.find(id='phone-number-btn')
                    numero = ""
                    if bal_numero:
                        numero = bal_numero.get('href')
    
                    data.append({
                        "Nom": nom,
                        "Spécialité": specialite,
                        "Lien": lien_complet,
                        "Numero": numero[4:-1]
                    })
    return data

import pandas as pd
import requests
from bs4 import BeautifulSoup
ville=['Casablanca']#,'marrakech','tanger','agadir','rabat']
specialite=['Dentiste']#,'dermatologue','gynecologue','ophtalmologue','pediatre']
df_total = pd.DataFrame()
for v in ville:
    for s in specialite:
        url = f"https://www.dabadoc.com/ma/{s}/{v}"
        print(f"Extraction des médecins pour {v} et {s}")
        try:
            docteurs_data = extract_doctors(url)
            df = pd.DataFrame(docteurs_data)
            df_total = pd.concat([df_total, df], ignore_index=True)
        except Exception as e:
            print(f"Erreur avec {v} et {s} : {e}")
        


Extraction des médecins pour Casablanca et Dentiste


In [3]:
df_total

Unnamed: 0,Nom,Spécialité,Lien,Numero
0,Dr Faissel Bennouna,"Dentiste, Endodontiste, Esthétique dentaire, I...",https://www.dabadoc.com/ma/dentiste/casablanca...,21252229528.0
1,Dr Mouhssine Alj,Dentiste à Casablanca,https://www.dabadoc.com/ma/dentiste/casablanca...,21252223678.0
2,Dr Akesbi Jihane,"Dentiste, Endodontiste, Orthodontiste, Parodon...",https://www.dabadoc.com/ma/dentiste/casablanca...,21252286533.0
3,Dr Sara Barkaoui,"Chirurgie buccale, Dentiste, Endodontiste, Est...",https://www.dabadoc.com/ma/chirurgie-buccale/c...,21252248496.0
4,Dr Zineb El Menjra,"Dentiste, Esthétique dentaire, Endodontiste, O...",https://www.dabadoc.com/ma/dentiste/casablanca...,21252222366.0
5,Dr Ghita Aboumazi,"Dentiste, Pédodontiste, Orthodontiste, Endodon...",https://www.dabadoc.com/ma/dentiste/casablanca...,21252294800.0
6,Dr Meryem Akallal,Dentiste à Casablanca,https://www.dabadoc.com/ma/dentiste/casablanca...,21252221016.0
7,Dr Othmane Zizi,"Dentiste, Pédodontiste à Casablanca",https://www.dabadoc.com/ma/dentiste/casablanca...,21252239138.0
8,Art's Clinic,"Esthétique dentaire, Dentiste, Endodontiste, I...",https://www.dabadoc.com/ma/esthetique-dentaire...,
9,Dr Nezha Iraqui Benzakour,"Parodontologiste, Implantologiste , Dentiste, ...",https://www.dabadoc.com/ma/parodontologiste/ca...,21252299696.0


In [60]:
import requests

# Exemple d'URL d'une API
api_url = "https://anapioficeandfire.com/api/characters"

# Effectuer une requête GET
response = requests.get(api_url)
print(response.status_code)  # Afficher le code de statut de la réponse

# Vérifier si la requête a réussi
if response.status_code == 200:
    # Convertir la réponse en JSON
    data = response.json()
    
else:
    print(f"Erreur lors de la requête : {response.status_code}")

200


API 

In [80]:
data[9]

{'url': 'https://anapioficeandfire.com/api/characters/10',
 'name': '',
 'gender': 'Female',
 'culture': 'Braavosi',
 'born': '',
 'died': '',
 'titles': [],
 'aliases': ['The Veiled Lady'],
 'father': '',
 'mother': '',
 'spouse': '',
 'allegiances': [],
 'books': ['https://anapioficeandfire.com/api/books/5'],
 'povBooks': [],
 'tvSeries': [],
 'playedBy': []}

In [76]:

for x in range(0,len(data)): 
	print(x)
	print(data[x].get('name'))

0

1
Walder
2

3

4

5

6

7

8

9

