In [271]:
# Web-scraping for dataset setup
from bs4 import BeautifulSoup
import requests

In [272]:
# Import url and then perform scraping to extract the list of categories of doctors/physicians
base_url = 'https://www.miodottore.it/categorie' # Set the link baseline
headers = {
    "User-Agent": "Mozilla/5.0"  # To mimic a browser and avoid bot blocking
}
page = requests.get(base_url, headers=headers)
soup = BeautifulSoup(page.content, 'html.parser')

In [273]:
# Extract the categories, get the text in the right class, sub-class
categories_list = soup.select("h3.flex-grow-1.mb-0 a.text-muted")
# Extract the location detail pages from each category
categories_locations = soup.select("div.mt-1 a.text-muted")

In [274]:
# Create a list to hold the category names, main url, location url
category_list_data = [] # Empty list
# Print the list of categories, urls, locations
for category, location_url in zip(categories_list, categories_locations):
    # Extract the category name and URL
    category_name = category.get_text(strip=True) # Get only text part
    category_url = category['href'] # The href part of the category
    main_url = f"https://www.miodottore.it{category_url}"
    
    # Append the data as a tuple to the list
    category_list_data.append({
        'name': category_name,
        'main_page': main_url,
        'location_page': location_url['href'] # Extract the href from the location URL
    })

In [275]:
category_list_data

[{'name': 'Agopuntori',
  'main_page': 'https://www.miodottore.it/agopuntore',
  'location_page': 'https://www.miodottore.it/categorie/nel-dettaglio/agopuntore'},
 {'name': 'Allergologi',
  'main_page': 'https://www.miodottore.it/allergologo',
  'location_page': 'https://www.miodottore.it/categorie/nel-dettaglio/allergologo'},
 {'name': 'Analisti Clinici',
  'main_page': 'https://www.miodottore.it/analista-clinico',
  'location_page': 'https://www.miodottore.it/categorie/nel-dettaglio/analista-clinico'},
 {'name': 'Anatomopatologi',
  'main_page': 'https://www.miodottore.it/anatomopatologo',
  'location_page': 'https://www.miodottore.it/categorie/nel-dettaglio/anatomopatologo'},
 {'name': 'Andrologi',
  'main_page': 'https://www.miodottore.it/andrologo',
  'location_page': 'https://www.miodottore.it/categorie/nel-dettaglio/andrologo'},
 {'name': 'Anestesisti',
  'main_page': 'https://www.miodottore.it/anestesista',
  'location_page': 'https://www.miodottore.it/categorie/nel-dettaglio/a

In [276]:
# Find all specific city links from the location page
for entry in category_list_data:
    main_url = entry['main_page'] # Get the main page URL (Found at the beginning of all links)
    location_page_url = entry['location_page'] # Get the location detail page URL, collective not specific cities
    headers = {
    "User-Agent": "Mozilla/5.0"  # To mimic a browser and avoid bot blocking
    }
    # Derive the new page and soup object for the location page
    location_page = requests.get(location_page_url, headers=headers)
    location_soup = BeautifulSoup(location_page.content, 'html.parser')
    # Extract the specific city links from the location page
    city_links = location_soup.select("li.col-md-6 a")
    city_link_list = [f'https://www.miodottore.it{link["href"]}' for link in city_links]
    # Add the city links to the entry
    entry['city_links'] = city_link_list

In [277]:
# Sanity check
category_list_data

[{'name': 'Agopuntori',
  'main_page': 'https://www.miodottore.it/agopuntore',
  'location_page': 'https://www.miodottore.it/categorie/nel-dettaglio/agopuntore',
  'city_links': ['https://www.miodottore.it/agopuntore/abbadia-alpina',
   'https://www.miodottore.it/agopuntore/acerra',
   'https://www.miodottore.it/agopuntore/acilia',
   'https://www.miodottore.it/agopuntore/acireale',
   'https://www.miodottore.it/agopuntore/acqui-terme',
   'https://www.miodottore.it/agopuntore/agliana',
   'https://www.miodottore.it/agopuntore/airola',
   'https://www.miodottore.it/agopuntore/alba',
   'https://www.miodottore.it/agopuntore/albano-laziale',
   'https://www.miodottore.it/agopuntore/albignasego',
   'https://www.miodottore.it/agopuntore/albino',
   'https://www.miodottore.it/agopuntore/alessandria',
   'https://www.miodottore.it/agopuntore/ancona',
   'https://www.miodottore.it/agopuntore/angri',
   'https://www.miodottore.it/agopuntore/antegnate',
   'https://www.miodottore.it/agopuntore

In [278]:
# Check that we get doctor's urls for a specific city (Sanity check 2)
doctor_urls = []
city_page = requests.get('https://www.miodottore.it/anestesista/abano-terme', headers= headers)
city_soup = BeautifulSoup(city_page.content, 'html.parser')
doctor_links = city_soup.select("h3.h4.mb-0.flex-wrap a.text-body")
doctor_urls.extend([f'{link["href"]}' for link in doctor_links])
doctor_urls


['https://www.miodottore.it/vincenzo-tegazzin/anestesista/padova',
 'https://www.miodottore.it/ariadna-tudurachi/anestesista/albignasego',
 'https://www.miodottore.it/giorgio-davia/anestesista/selvazzano-dentro',
 'https://www.miodottore.it/dorotea-magaldi/anestesista-agopuntore/padova',
 'https://www.miodottore.it/cinzia-favaro/terapista-del-dolore/vicenza',
 'https://www.miodottore.it/strutture/fisiotecnik-s-r-l-2',
 'https://www.miodottore.it/giandomenico-babbolin/anestesista-medico-di-base-dentista/puegnago-sul-garda',
 'https://www.miodottore.it/strutture/centro-medico-serena-2',
 'https://www.miodottore.it/mario-trivellato/anestesista-cardiologo-internista/padova',
 'https://www.miodottore.it/massimo-rossato/agopuntore-anestesista-terapista-del-dolore/padova',
 'https://www.miodottore.it/gianfranco-sattin/anestesista/padova',
 'https://www.miodottore.it/angela-menegazzo/anestesista/padova',
 'https://www.miodottore.it/ivaldo-polo/medico-di-base-anestesista-medico-dello-sport/aban

In [None]:
# Now, we will collect doctor's page url from each city link
# Original code (Taking too long)
# doctor_urls = []  # List to hold all doctor URLs
#for entry in category_list_data:
#    city_links = entry['city_links']  # Get the list of city links
#    headers = {
#            "User-Agent": "Mozilla/5.0"  # To mimic a browser and avoid bot blocking
#        }
#    for city_link in city_links:
        
        # Fetch the page for the specific city
#        city_page = requests.get(city_link, headers=headers)
#        city_soup = BeautifulSoup(city_page.content, 'html.parser')
        
        # Extract doctor links from the city page
#       doctor_links = city_soup.select("h3.h4.mb-0.flex-wrap a.text-body")
#       doctor_urls.extend([f'{link["href"]}' for link in doctor_links])


In [279]:
# Define a function to fetch doctor links from a city URL using threading
from concurrent.futures import ThreadPoolExecutor, as_completed

import time, random

def fetch_doctor_links(city_url):
    headers = {
        "User-Agent": "Mozilla/5.0"
    }
    try:
        time.sleep(random.uniform(0.5, 1.2))  # Add delay per request
        resp = requests.get(city_url, headers=headers, timeout=10)
        soup = BeautifulSoup(resp.content, 'html.parser')
        doctor_links = soup.select("h3.h4.mb-0 a.text-body")

        if len(doctor_links) == 0:
            print(f"⚠️ No doctor links found for: {city_url}")
        return [f"{link['href']}" for link in doctor_links]

    except Exception as e:
        print(f"❌ Failed: {city_url}\nReason: {e}")
        return []



In [18]:
# Multithreaded scraping to fetch doctor links from all city pages
from tqdm import tqdm  # optional, for progress bar

doctor_urls = []  # master list

for entry in tqdm(category_list_data):
    city_links = entry['city_links']
    
    entry_doctor_urls = []
    with ThreadPoolExecutor(max_workers= 5) as executor:
        futures = [executor.submit(fetch_doctor_links, city) for city in city_links]
        for future in as_completed(futures):
            entry_doctor_urls.extend(future.result())

    entry["doctor_urls"] = entry_doctor_urls
    doctor_urls.extend(entry_doctor_urls)  # As I want a full list


  1%|          | 1/96 [03:37<5:44:31, 217.59s/it]

⚠️ No doctor links found for: https://www.miodottore.it/allergologo/san-vito-di-cadore


  5%|▌         | 5/96 [16:54<5:47:54, 229.39s/it]

❌ Failed: https://www.miodottore.it/anestesista/aquileia
Reason: Response ended prematurely


 23%|██▎       | 22/96 [1:15:01<3:27:24, 168.17s/it]

⚠️ No doctor links found for: https://www.miodottore.it/covid-test/livorno


 24%|██▍       | 23/96 [1:15:13<2:27:41, 121.39s/it]

❌ Failed: https://www.miodottore.it/dentista/cassino-d-alberi
Reason: Exceeded 30 redirects.
❌ Failed: https://www.miodottore.it/dentista/san-giorgio-al-tagliamento
Reason: Exceeded 30 redirects.
❌ Failed: https://www.miodottore.it/dentista/verona
Reason: Response ended prematurely


 65%|██████▍   | 62/96 [5:10:57<4:34:02, 483.59s/it] 

❌ Failed: https://www.miodottore.it/odontotecnico/rimini
Reason: Response ended prematurely


 92%|█████████▏| 88/96 [7:55:13<37:17, 279.64s/it]  

❌ Failed: https://www.miodottore.it/sessuologo/viterbo
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 93%|█████████▎| 89/96 [7:55:46<24:00, 205.76s/it]

⚠️ No doctor links found for: https://www.miodottore.it/tecnico-radiologo/adelfia
⚠️ No doctor links found for: https://www.miodottore.it/tecnico-radiologo/casirate-d-adda
⚠️ No doctor links found for: https://www.miodottore.it/tecnico-radiologo/castello-d-agogna
⚠️ No doctor links found for: https://www.miodottore.it/tecnico-radiologo/castellammare-di-stabia
⚠️ No doctor links found for: https://www.miodottore.it/tecnico-radiologo/catania
⚠️ No doctor links found for: https://www.miodottore.it/tecnico-radiologo/cecina
⚠️ No doctor links found for: https://www.miodottore.it/tecnico-radiologo/cormons
⚠️ No doctor links found for: https://www.miodottore.it/tecnico-radiologo/cortemaggiore
⚠️ No doctor links found for: https://www.miodottore.it/tecnico-radiologo/fiorenzuola-d-arda
⚠️ No doctor links found for: https://www.miodottore.it/tecnico-radiologo/isola-della-scala
⚠️ No doctor links found for: https://www.miodottore.it/tecnico-radiologo/l-aquila
⚠️ No doctor links found for: https:/

 94%|█████████▍| 90/96 [7:56:30<15:43, 157.32s/it]

⚠️ No doctor links found for: https://www.miodottore.it/tecnico-radiologo/viterbo
⚠️ No doctor links found for: https://www.miodottore.it/tecnico-sanitario/monselice


100%|██████████| 96/96 [8:17:54<00:00, 311.19s/it]


In [284]:
doctor_urls

['https://www.miodottore.it/vincenzo-tegazzin/anestesista/padova',
 'https://www.miodottore.it/ariadna-tudurachi/anestesista/albignasego',
 'https://www.miodottore.it/giorgio-davia/anestesista/selvazzano-dentro',
 'https://www.miodottore.it/dorotea-magaldi/anestesista-agopuntore/padova',
 'https://www.miodottore.it/cinzia-favaro/terapista-del-dolore/vicenza',
 'https://www.miodottore.it/strutture/fisiotecnik-s-r-l-2',
 'https://www.miodottore.it/giandomenico-babbolin/anestesista-medico-di-base-dentista/puegnago-sul-garda',
 'https://www.miodottore.it/strutture/centro-medico-serena-2',
 'https://www.miodottore.it/mario-trivellato/anestesista-cardiologo-internista/padova',
 'https://www.miodottore.it/massimo-rossato/agopuntore-anestesista-terapista-del-dolore/padova',
 'https://www.miodottore.it/gianfranco-sattin/anestesista/padova',
 'https://www.miodottore.it/angela-menegazzo/anestesista/padova',
 'https://www.miodottore.it/ivaldo-polo/medico-di-base-anestesista-medico-dello-sport/aban

In [20]:
# Save the doctor URLS list to a file, to make it usable later
import json
# Save the doctor URLs to a JSON file
with open('doctor_urls.json', 'w') as f:
    json.dump(doctor_urls, f, indent=4)

In [285]:
# Load the doctor URLs from the JSON file
import json
with open('doctor_urls.json', 'r') as f:
    loaded_doctor_urls = json.load(f)

In [288]:
# Check the total number of doctor URLS collected
len(set(loaded_doctor_urls))

121636

In [303]:
# Sanity check for specific doctor URL, found in doctor list or not?
target_url = 'https://www.miodottore.it/maria-rita-bongiorno/dermatologo-venereologo/palermo'

found = False
if target_url in loaded_doctor_urls:
    print(f"✅ Found")
    found = True
else:
    print("❌ Not found in any category")


✅ Found


In [304]:
name_parts = soup.select("div.unified-doctor-header-info__name span") if soup.select("div.unified-doctor-header-info__name span") else None
# Combine name parts
name = " ".join(part.get_text(strip = True) for part in name_parts) if name_parts else None


In [305]:
name

'Dr. Valentina Bellato'

In [313]:
# Check the location, review, and more extraction before starting entire loops
import requests
from bs4 import BeautifulSoup
headers = {
        "User-Agent": "Mozilla/5.0"
    }
resp = requests.get(target_url, headers=headers)
soup = BeautifulSoup(resp.content, 'html.parser')
# Extracting the required details
location = soup.select_one("div.mb-0-25 p")

In [314]:
import re

location_tag = soup.select_one('p[data-test-id="doctor-locations"]')
if location_tag:
    match = re.search(r'\b[A-Z][a-z]+\b', location_tag.get_text())
    location_text = match.group(0) if match else None
else:
    location_text = None


In [315]:
location_text

'Palermo'

In [309]:
# Extracting recensioni (reviews)
rating = soup.select_one('div.rating.rating-lg')
if rating:
    rating_score = rating['data-score']
else:
    rating_score = None

In [310]:
rating_score

'5'

In [323]:
# Check total reviews for specific urls
number_of_reviews = soup.select_one('div.d-flex.justify-content-between.flex-column.flex-sm-row p.mb-0')
if number_of_reviews:
    total_reviews = number_of_reviews.get_text(strip=True).split(" ")[0]
else:
    total_reviews = None

In [324]:
number_of_reviews

<p class="mb-0">10 recensioni</p>

In [325]:
total_reviews

'10'

In [39]:
# Check available services (`Prestazioni`)
services = soup.select('li[data-id="service-item"]')
available_services = {}

for service in services:
    name_tag = service.select_one('h3[itemprop="availableService"]')
    price_tag = service.select_one('div.d-flex div.mr-1')  # gets the first <font> which contains the price

    name = name_tag.get_text(strip=True) if name_tag else None # get one value
    price = price_tag.get_text(strip=True) if price_tag else None # get one value

    if name:
        available_services[name] = price.replace('\xa0', ' ') if price else None # Cut the non-breaking space character



In [41]:
available_services

{}

In [42]:
# Try to get all categoy of eah doctor
# Find the container that holds specializations
# Select specialization tags
specialization_list = []
specialization_tags = soup.select('a.text-base-size.font-weight-normal.text-body')

# Extract and clean the text, excluding "Altro"
if specialization_tags:
    specializations = [
        tag.get_text(strip=True)
        for tag in specialization_tags
        if tag.get_text(strip=True).lower() != 'altro'
    ]

    # Join into a string
    specialization_str = ', '.join(specializations)
    specialization_list.append(specialization_str)
else:
    specialization_list = None


In [43]:
specialization_list

['Agopuntore, Terapeuta, Anestesista']

In [44]:
# Select all <ul> elements with the desired class
ul_elements = soup.select('ul.toggleable-list.pl-2.mb-0')

# Dictionary to hold results by id
categorized_data = {}

for ul in ul_elements:
    ul_id = ul.get('id')
    if ul_id:
        items = [li.get_text(strip=True) for li in ul.find_all('li')]
        categorized_data[ul_id] = items

# Example access
# print(categorized_data['disease'])  # will show list of disease items
# print(categorized_data.keys())      # all section names (ids)


In [45]:
categorized_data

{}

In [None]:
#from tqdm import tqdm  # optional, for progress bar (Manually, slow)
#from bs4 import BeautifulSoup
#import requests
#doctor_names = []  # List to hold doctor names
#for doctor_url in tqdm(loaded_doctor_urls):
#    resp = requests.get(doctor_url, headers={"User-Agent": "Mozilla/5.0"})
#    soup = BeautifulSoup(resp.content, 'html.parser')
#    # Extracting the required details
#    name_parts = soup.select("div.unified-doctor-header-info__name span") if soup.select("div.unified-doctor-header-info__name span") else None
#    # Combine name parts
#    name = " ".join(part.get_text(strip = True) for part in name_parts) if name_parts else None
#    # extend the list with the name
#    doctor_names.append(name)

Full Function for Miodottore-Scraping

In [None]:
# Extract the doctor's data(Name, Location, Recensioni, Prestazioni, Esperienze)
# The last scraping part is getting all these details from each doctor's page
import time, random
import requests
from bs4 import BeautifulSoup
import re
def fetch_doctor_details(doctor_url):
    headers = {
        "User-Agent": "Mozilla/5.0"
    }
    try:
        time.sleep(random.uniform(0.5, 1.2))  # Add delay per request
        resp = requests.get(doctor_url, headers=headers, timeout=10)
        soup = BeautifulSoup(resp.content, 'html.parser')

        # Extracting the name details
        name_parts = soup.select("div.unified-doctor-header-info__name span") if soup.select("div.unified-doctor-header-info__name span") else None
        # Combine name parts
        name = " ".join(part.get_text(strip = True) for part in name_parts) if name_parts else None

        # Extract category and specialization
        # Try to get all categoy of eah doctor
        # Find the container that holds specializations
        # Select specialization tags
        specialization_list = []
        specialization_tags = soup.select('a.text-base-size.font-weight-normal.text-body')

        # Extract and clean the text, excluding "Altro"
        if specialization_tags:
            specializations = [
                tag.get_text(strip=True)
                for tag in specialization_tags
                if tag.get_text(strip=True).lower() != 'altro']

            # Join into a string
            specialization_str = ', '.join(specializations)
            specialization_list.append(specialization_str)
        else:
            specialization_list = None
        
        # Now extract the location
        location_tag = soup.select_one('p[data-test-id="doctor-locations"]')
        if location_tag:
            match = re.search(r'\b[A-Z][a-z]+\b', location_tag.get_text())
            location_text = match.group(0) if match else None
        else:
            location_text = None    
        
        # Extracting reviews (`Recensioni`)
        rating = soup.select_one('div.rating.rating-lg')
        if rating:
            rating_score = rating['data-score']
        else:
            rating_score = None

        # Also extract number of reviews
        number_of_reviews = soup.select_one('div.d-flex.justify-content-between.flex-column.flex-sm-row p.mb-0')
        if number_of_reviews:
            total_reviews = number_of_reviews.get_text(strip=True).split(" ")[0]
        else:
            total_reviews = None

        # Extract all available services (`Prestazioni`)
        # Check available services (`Prestazioni`)
        services = soup.select('li[data-id="service-item"]')
        available_services = {}

        for service in services:
            name_tag = service.select_one('h3[itemprop="availableService"]')
            price_tag = service.select_one('div.d-flex div.mr-1')  # gets the first <font> which contains the price

            name_service = name_tag.get_text(strip=True) if name_tag else None # get one value
            price = price_tag.get_text(strip=True) if price_tag else None # get one value

            if name_service:
                available_services[name_service] = price.replace('\xa0', ' ') if price else None # Cut the non-breaking space character

        # Extract Espirienze
        # Select all <ul> elements with the desired class
        ul_elements = soup.select('ul.toggleable-list.pl-2.mb-0')

        # Dictionary to hold results by id
        categorized_data = {}

        for ul in ul_elements:
            ul_id = ul.get('id')
            if ul_id:
                items = [li.get_text(strip=True) for li in ul.find_all('li')]
                categorized_data[ul_id] = items

        return {
            'name': name,
            'categories': specialization_list,
            'url': doctor_url,
            'location': location_text,
            'rating': rating_score,
            'number_of_reviews': total_reviews,
            'prestazioni': available_services,
            'espierienze': categorized_data
        }

    except Exception as e:
        print(f"❌ Failed to fetch details for: {doctor_url}\nReason: {e}")
        return None



In [59]:
# Multithreaded scraping to fetch doctor details from all doctor URLs, check small portion of loaded_doctor_urls first
 # For testing, use a small portion  
from tqdm import tqdm  # optional, for progress bar
from concurrent.futures import ThreadPoolExecutor, as_completed
small_doctor_urls = loaded_doctor_urls[900000:]  # Take first 10 URLs for testing
doctor_details = []  # master list
with ThreadPoolExecutor(max_workers= 5) as executor:
    futures = [executor.submit(fetch_doctor_details, url) for url in small_doctor_urls]
    for future in tqdm(as_completed(futures), total=len(small_doctor_urls)):
        doctor_details.append(future.result())

  1%|          | 1789/177906 [14:51<223:55:01,  4.58s/it]

❌ Failed to fetch details for: https://www.miodottore.it/teresa-colaiacovo/psicologo-sessuologo-psicoterapeuta/genova
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out.


  3%|▎         | 4731/177906 [38:35<139:37:37,  2.90s/it]

❌ Failed to fetch details for: https://www.miodottore.it/dario-agradi-2/psicologo-clinico-psicologo/sesto-san-giovanni
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


  5%|▍         | 8129/177906 [1:05:02<15:33:26,  3.03it/s] IOStream.flush timed out
  6%|▌         | 10659/177906 [1:25:27<286:31:54,  6.17s/it]

❌ Failed to fetch details for: https://www.miodottore.it/aurora-quaranta/psicologo-psicologo-clinico/melegnano
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


  7%|▋         | 11621/177906 [1:33:09<275:57:37,  5.97s/it]

❌ Failed to fetch details for: https://www.miodottore.it/lorenzo-giacomi/psicologo-psicologo-clinico-sessuologo/roma
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


  7%|▋         | 12134/177906 [1:36:59<16:30:01,  2.79it/s] IOStream.flush timed out
  7%|▋         | 12299/177906 [1:38:20<16:43:27,  2.75it/s] IOStream.flush timed out
  8%|▊         | 13633/177906 [1:49:13<276:46:45,  6.07s/it]IOStream.flush timed out
  8%|▊         | 13958/177906 [1:51:34<17:57:43,  2.54it/s] IOStream.flush timed out
  8%|▊         | 14940/177906 [1:59:35<200:22:12,  4.43s/it]

❌ Failed to fetch details for: https://www.miodottore.it/gianpietro-rossi/psicologo-psicoterapeuta-psicologo-clinico/concesio
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


  9%|▊         | 15457/177906 [2:03:42<167:25:28,  3.71s/it]

❌ Failed to fetch details for: https://www.miodottore.it/rossella-campigotto/psicologo-psicoterapeuta-psicologo-clinico/desenzano-del-garda
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


  9%|▉         | 16784/177906 [2:13:48<27:08:56,  1.65it/s] IOStream.flush timed out
 13%|█▎        | 22470/177906 [2:58:53<139:41:00,  3.24s/it]

❌ Failed to fetch details for: https://www.miodottore.it/strutture/centro-salus-palermo-poliambulatorio-medico-specialistico
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 13%|█▎        | 23301/177906 [3:05:41<135:20:15,  3.15s/it]

❌ Failed to fetch details for: https://www.miodottore.it/rossella-campigotto/psicologo-psicoterapeuta-psicologo-clinico/desenzano-del-garda
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 15%|█▍        | 26582/177906 [3:32:02<238:22:16,  5.67s/it]

❌ Failed to fetch details for: https://www.miodottore.it/lorenzo-giacomi/psicologo-psicologo-clinico-sessuologo/roma
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 15%|█▌        | 26747/177906 [3:33:21<193:56:47,  4.62s/it]

❌ Failed to fetch details for: https://www.miodottore.it/enrico-saya/psicologo-psicoterapeuta-psicologo-clinico/siracusa
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 15%|█▌        | 27080/177906 [3:36:00<256:01:46,  6.11s/it]

❌ Failed to fetch details for: https://www.miodottore.it/piergiorgio-annunzi/psicologo-clinico-psicoterapeuta-psicologo/senigallia
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out.


 18%|█▊        | 32262/177906 [4:16:45<12:05:05,  3.35it/s] 

❌ Failed to fetch details for: https://www.miodottore.it/maria-chiara-leveque-2/psicologo-clinico-psicologo/umbertide
Reason: Response ended prematurely


 19%|█▊        | 33209/177906 [4:24:23<212:23:51,  5.28s/it]

❌ Failed to fetch details for: https://www.miodottore.it/donna-almares-palmirotta/psicologo-psicologo-clinico/altamura
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 19%|█▉        | 34255/177906 [4:32:55<239:06:57,  5.99s/it]

❌ Failed to fetch details for: https://www.miodottore.it/francesco-sonzogni/psicologo-psicoterapeuta-psicologo-clinico/bergamo
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 19%|█▉        | 34581/177906 [4:35:16<13:43:11,  2.90it/s] IOStream.flush timed out
 20%|██        | 35786/177906 [4:45:08<14:41:26,  2.69it/s] IOStream.flush timed out
 20%|██        | 36469/177906 [4:50:35<14:54:47,  2.63it/s] IOStream.flush timed out
 21%|██▏       | 38098/177906 [5:04:04<136:32:24,  3.52s/it]

❌ Failed to fetch details for: https://www.miodottore.it/rossella-campigotto/psicologo-psicoterapeuta-psicologo-clinico/desenzano-del-garda
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 25%|██▍       | 44385/177906 [5:56:41<213:29:50,  5.76s/it]

❌ Failed to fetch details for: https://www.miodottore.it/raffaella-argiro/psicologo-clinico/torino
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 26%|██▌       | 45844/177906 [6:08:29<9:06:57,  4.02it/s]  IOStream.flush timed out
 26%|██▌       | 46156/177906 [6:10:58<16:13:21,  2.26it/s] IOStream.flush timed out
 27%|██▋       | 48105/177906 [6:26:56<16:38:35,  2.17it/s] IOStream.flush timed out
 29%|██▉       | 52175/177906 [7:00:39<228:39:04,  6.55s/it]

❌ Failed to fetch details for: https://www.miodottore.it/serena-vitale/psicologo-clinico/passo-corese
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 30%|██▉       | 52498/177906 [7:02:45<10:37:28,  3.28it/s] IOStream.flush timed out
 30%|██▉       | 52499/177906 [7:03:07<174:24:22,  5.01s/it]

❌ Failed to fetch details for: https://www.miodottore.it/luca-vocino/psicologo-clinico-psicologo/agrate-brianza
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 30%|███       | 53494/177906 [7:11:14<173:30:57,  5.02s/it]

❌ Failed to fetch details for: https://www.miodottore.it/michele-scala/psicologo-psicoterapeuta-neuropsicologo/belluno
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 31%|███▏      | 55676/177906 [7:29:06<134:07:42,  3.95s/it]

❌ Failed to fetch details for: https://www.miodottore.it/desiree-esposito/psicologo-psicoterapeuta-psicologo-clinico/montano-lucino
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 31%|███▏      | 55998/177906 [7:31:51<229:02:46,  6.76s/it]

❌ Failed to fetch details for: https://www.miodottore.it/giulia-elisabetta-lombardi/psicoterapeuta-psicologo-psicologo-clinico/bussero
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 32%|███▏      | 56133/177906 [7:32:42<12:24:11,  2.73it/s] IOStream.flush timed out
 33%|███▎      | 58249/177906 [7:49:56<16:37:56,  2.00it/s] IOStream.flush timed out
 35%|███▍      | 62151/177906 [8:21:38<14:54:43,  2.16it/s] IOStream.flush timed out
 36%|███▌      | 63786/177906 [8:34:34<10:08:14,  3.13it/s] IOStream.flush timed out
 38%|███▊      | 66748/177906 [8:59:44<174:32:48,  5.65s/it]

❌ Failed to fetch details for: https://www.miodottore.it/nadia-zucchi/psicoterapeuta-psicologo-clinico-psicologo/genova
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 38%|███▊      | 66908/177906 [9:01:05<130:30:08,  4.23s/it]

❌ Failed to fetch details for: https://www.miodottore.it/ludovica-autelitano/psicologo-psicologo-clinico-psicoterapeuta/reggio-calabria
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 38%|███▊      | 67050/177906 [9:02:23<180:34:10,  5.86s/it]

❌ Failed to fetch details for: https://www.miodottore.it/simona-guerreschi/psicologo-psicoterapeuta/luzzara
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out.


 39%|███▊      | 68622/177906 [9:15:14<7:52:44,  3.85it/s]  IOStream.flush timed out
 41%|████      | 72096/177906 [9:44:17<166:08:16,  5.65s/it]

❌ Failed to fetch details for: https://www.miodottore.it/monica-zaffanella/psicologo/parma
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 41%|████▏     | 73549/177906 [9:56:11<210:29:06,  7.26s/it]

❌ Failed to fetch details for: https://www.miodottore.it/giulia-ghiotto/psicologo-clinico-psicologo/vicenza
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 42%|████▏     | 73890/177906 [9:58:58<10:10:19,  2.84it/s] IOStream.flush timed out
 42%|████▏     | 73893/177906 [9:59:23<103:20:55,  3.58s/it]

❌ Failed to fetch details for: https://www.miodottore.it/stefania-maretti/psicoterapeuta/bolzano
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 43%|████▎     | 76849/177906 [10:24:24<165:00:25,  5.88s/it]

❌ Failed to fetch details for: https://www.miodottore.it/pietronilla-nicita/psicologo-psicoterapeuta-psicologo-clinico/giarre
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 44%|████▎     | 77503/177906 [10:30:01<7:53:57,  3.53it/s]  IOStream.flush timed out
 44%|████▍     | 78639/177906 [10:40:32<124:30:32,  4.52s/it]

❌ Failed to fetch details for: https://www.miodottore.it/claudia-tosello-2/psicoterapeuta-psicologo-psicologo-clinico/asti
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out.


 45%|████▌     | 80292/177906 [10:54:44<131:22:37,  4.85s/it]

❌ Failed to fetch details for: https://www.miodottore.it/cristina-barachetti/psicologo-psicoterapeuta-psicologo-clinico/medolago
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out.


 46%|████▌     | 82231/177906 [11:11:03<82:25:40,  3.10s/it] 

❌ Failed to fetch details for: https://www.miodottore.it/valentina-maisano-2/psicoterapeuta-psicologo/castellanza
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out.


 46%|████▌     | 82232/177906 [11:11:04<62:03:51,  2.34s/it]

❌ Failed to fetch details for: https://www.miodottore.it/michele-scala/psicologo-psicoterapeuta-neuropsicologo/belluno
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out.


 47%|████▋     | 83791/177906 [11:27:08<177:07:26,  6.78s/it]

❌ Failed to fetch details for: https://www.miodottore.it/teresa-perretta/psicologo-psicoterapeuta/santa-maria-capua-vetere
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /teresa-perretta/psicologo-psicoterapeuta/santa-maria-capua-vetere (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53a9396d0>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))❌ Failed to fetch details for: https://www.miodottore.it/diana-di-lorenzo-3/psicoterapeuta-psicologo-psicologo-clinico/caserta
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /diana-di-lorenzo-3/psicoterapeuta-psicologo-psicologo-clinico/caserta (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538b1c990>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))

❌ Failed to fetch details for: https://w

 47%|████▋     | 83797/177906 [11:27:09<49:45:31,  1.90s/it] 

❌ Failed to fetch details for: https://www.miodottore.it/laura-vitagliano-2/psicoterapeuta-psicologo-clinico-psicologo/napoli
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /laura-vitagliano-2/psicoterapeuta-psicologo-clinico-psicologo/napoli (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538ba5bd0>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/giovanni-saladino/neurologo-psicoterapeuta/caserta2
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /giovanni-saladino/neurologo-psicoterapeuta/caserta2 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539aed950>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/strutture/cli

 47%|████▋     | 83800/177906 [11:27:09<30:30:11,  1.17s/it]

❌ Failed to fetch details for: https://www.miodottore.it/giovanna-albano/psicologo-psicoterapeuta-psicologo-clinico/caserta2
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /giovanna-albano/psicologo-psicoterapeuta-psicologo-clinico/caserta2 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538854910>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83801/177906 [11:27:10<26:47:41,  1.03s/it]

❌ Failed to fetch details for: https://www.miodottore.it/ilva-salerno/psicoterapeuta-psicologo-psicologo-clinico/caserta
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /ilva-salerno/psicoterapeuta-psicologo-psicologo-clinico/caserta (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538855710>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/veronica-sferragatta/psicoterapeuta-psicologo-psicologo-clinico/maddaloni
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /veronica-sferragatta/psicoterapeuta-psicologo-psicologo-clinico/maddaloni (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538855f10>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83804/177906 [11:27:10<17:03:32,  1.53it/s]

❌ Failed to fetch details for: https://www.miodottore.it/strutture/rigenera-centro-medico-caserta
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /strutture/rigenera-centro-medico-caserta (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538857f10>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/luisa-merola/psicologo-psicoterapeuta/santa-maria-capua-vetere
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /luisa-merola/psicologo-psicoterapeuta/santa-maria-capua-vetere (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539aecc50>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83805/177906 [11:27:10<14:11:59,  1.84it/s]

❌ Failed to fetch details for: https://www.miodottore.it/carolina-mangiacapre/psicologo-psicoterapeuta/caserta2
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /carolina-mangiacapre/psicologo-psicoterapeuta/caserta2 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538ba5f90>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/adriana-colella/psicoterapeuta-psicologo-clinico-psicologo/santa-maria-capua-vetere
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /adriana-colella/psicoterapeuta-psicologo-clinico-psicologo/santa-maria-capua-vetere (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538cd2450>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83808/177906 [11:27:11<9:05:57,  2.87it/s] 

❌ Failed to fetch details for: https://www.miodottore.it/stefania-casale/psicologo-psicoterapeuta-psicologo-clinico/caserta2
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /stefania-casale/psicologo-psicoterapeuta-psicologo-clinico/caserta2 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53885d410>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/marta-ricciardi/psicologo-psicoterapeuta-psicologo-clinico/santa-maria-capua-vetere
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /marta-ricciardi/psicologo-psicoterapeuta-psicologo-clinico/santa-maria-capua-vetere (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53936f850>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed

 47%|████▋     | 83812/177906 [11:27:11<5:52:26,  4.45it/s]

❌ Failed to fetch details for: https://www.miodottore.it/fabrizio-cirillo/psicologo-psicoterapeuta/napoli
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /fabrizio-cirillo/psicologo-psicoterapeuta/napoli (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538ba7610>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/giampaolo-carotenuto/psicoterapeuta-psicologo-psicologo-clinico/marcianise
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /giampaolo-carotenuto/psicoterapeuta-psicologo-psicologo-clinico/marcianise (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538857b10>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/giovann

 47%|████▋     | 83814/177906 [11:27:12<4:40:14,  5.60it/s]

❌ Failed to fetch details for: https://www.miodottore.it/teresa-perretta/psicologo-psicoterapeuta/santa-maria-capua-vetere
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /teresa-perretta/psicologo-psicoterapeuta/santa-maria-capua-vetere (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53885db90>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/katia-frezza/psicologo-psicologo-clinico-psicoterapeuta/napoli
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /katia-frezza/psicologo-psicologo-clinico-psicoterapeuta/napoli (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538ba6290>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83815/177906 [11:27:12<6:43:18,  3.89it/s]

❌ Failed to fetch details for: https://www.miodottore.it/emanuela-la-marca/psicoterapeuta/caserta
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /emanuela-la-marca/psicoterapeuta/caserta (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538855dd0>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/diana-di-lorenzo-3/psicoterapeuta-psicologo-psicologo-clinico/caserta
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /diana-di-lorenzo-3/psicoterapeuta-psicologo-psicologo-clinico/caserta (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538857d10>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83817/177906 [11:27:12<5:23:10,  4.85it/s]

❌ Failed to fetch details for: https://www.miodottore.it/laura-vitagliano-2/psicoterapeuta-psicologo-clinico-psicologo/napoli
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /laura-vitagliano-2/psicoterapeuta-psicologo-clinico-psicologo/napoli (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539aefc90>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/michele-scala/psicologo-psicoterapeuta-neuropsicologo/belluno
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /michele-scala/psicologo-psicoterapeuta-neuropsicologo/belluno (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53936ec50>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83819/177906 [11:27:13<4:35:26,  5.69it/s]

❌ Failed to fetch details for: https://www.miodottore.it/walter-colesso/psicologo-psicoterapeuta-terapeuta/treviso
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /walter-colesso/psicologo-psicoterapeuta-terapeuta/treviso (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53885d7d0>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83820/177906 [11:27:13<4:52:48,  5.36it/s]

❌ Failed to fetch details for: https://www.miodottore.it/elena-santomartino-2/psicologo-clinico-psicoterapeuta-psicologo/preganziol
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /elena-santomartino-2/psicologo-clinico-psicoterapeuta-psicologo/preganziol (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x535ff4a90>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/chiara-da-ronch/psicologo-psicoterapeuta-psicologo-clinico/mestre
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /chiara-da-ronch/psicologo-psicoterapeuta-psicologo-clinico/mestre (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539aec790>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83823/177906 [11:27:13<4:41:02,  5.58it/s]

❌ Failed to fetch details for: https://www.miodottore.it/francesca-bozzo/psicoterapeuta/conegliano
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /francesca-bozzo/psicoterapeuta/conegliano (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539aec3d0>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/fulvia-fragiacomo/psicologo-psicoterapeuta-psicologo-clinico/torino
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /fulvia-fragiacomo/psicologo-psicoterapeuta-psicologo-clinico/torino (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538856910>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/sebastiano-pegorer/psicoterapeuta/t

 47%|████▋     | 83825/177906 [11:27:14<4:49:28,  5.42it/s]

❌ Failed to fetch details for: https://www.miodottore.it/francesco-tesser-2/psicologo-psicoterapeuta-sessuologo/roncade
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /francesco-tesser-2/psicologo-psicoterapeuta-sessuologo/roncade (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53885fd10>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/nicoletta-sferco-2/psicoterapeuta-psicologo-clinico-psicologo/silea
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /nicoletta-sferco-2/psicoterapeuta-psicologo-clinico-psicologo/silea (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x5398571d0>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83827/177906 [11:27:14<4:11:49,  6.23it/s]

❌ Failed to fetch details for: https://www.miodottore.it/eleonora-carraro/psicologo-psicologo-clinico-psicoterapeuta/treviso
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /eleonora-carraro/psicologo-psicologo-clinico-psicoterapeuta/treviso (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538b1f690>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83828/177906 [11:27:14<4:52:25,  5.36it/s]

❌ Failed to fetch details for: https://www.miodottore.it/lisa-cendron/psicoterapeuta/treviso
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /lisa-cendron/psicoterapeuta/treviso (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539aee190>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/raffaele-falato/psichiatra-psicoterapeuta-medico-certificatore/chieti
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /raffaele-falato/psichiatra-psicoterapeuta-medico-certificatore/chieti (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x4b8095190>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83834/177906 [11:27:15<3:34:16,  7.32it/s]

❌ Failed to fetch details for: https://www.miodottore.it/federica-bello2/psicoterapeuta/treviso
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /federica-bello2/psicoterapeuta/treviso (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538856550>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/roberto-lucchetta/psicologo-psicoterapeuta/frescada
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /roberto-lucchetta/psicologo-psicoterapeuta/frescada (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53885ddd0>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/stefano-angelini-4/psicologo-psicoterapeuta-sessuologo/treviso
Reason: HT

 47%|████▋     | 83836/177906 [11:27:16<5:40:58,  4.60it/s]

❌ Failed to fetch details for: https://www.miodottore.it/roberto-cetroni-2/psicologo-psicoterapeuta-psicologo-clinico/napoli
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /roberto-cetroni-2/psicologo-psicoterapeuta-psicologo-clinico/napoli (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x535ff4890>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/francesca-corocher/psicologo-psicoterapeuta/san-vendemiano
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /francesca-corocher/psicologo-psicoterapeuta/san-vendemiano (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539aecf90>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83839/177906 [11:27:16<4:14:48,  6.15it/s]

❌ Failed to fetch details for: https://www.miodottore.it/monica-inio/psicologo/treviso
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /monica-inio/psicologo/treviso (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53885f150>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/erika-scala/psicologo-psicoterapeuta-psicologo-clinico/casoli2
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /erika-scala/psicologo-psicoterapeuta-psicologo-clinico/casoli2 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539857190>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/michele-scala/psicologo-psicoterapeuta-neuropsicologo/belluno
Reason:

 47%|████▋     | 83841/177906 [11:27:17<4:33:36,  5.73it/s]

❌ Failed to fetch details for: https://www.miodottore.it/katia-marinelli/psicologo-clinico-psicologo/chieti-scalo
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /katia-marinelli/psicologo-clinico-psicologo/chieti-scalo (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539aeccd0>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/irene-ferrara2/psicoterapeuta/pescara
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /irene-ferrara2/psicoterapeuta/pescara (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53936ef90>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83844/177906 [11:27:17<4:23:40,  5.95it/s]

❌ Failed to fetch details for: https://www.miodottore.it/silvia-marfisi/psicologo/lanciano
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /silvia-marfisi/psicologo/lanciano (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53885f690>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/gisella-la-palombara/psicologo-psicologo-clinico-psicoterapeuta/lanciano
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /gisella-la-palombara/psicologo-psicologo-clinico-psicoterapeuta/lanciano (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538854c10>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/matteo-caffarelli/psicologo-psicoterapeut

 47%|████▋     | 83846/177906 [11:27:17<4:12:36,  6.21it/s]

❌ Failed to fetch details for: https://www.miodottore.it/maria-rapino/psicologo-psicoterapeuta-psicologo-clinico/lanciano
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /maria-rapino/psicologo-psicoterapeuta-psicologo-clinico/lanciano (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539854510>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/sara-d-aristotile/psicologo-psicologo-clinico-psicoterapeuta/pescara
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /sara-d-aristotile/psicologo-psicologo-clinico-psicoterapeuta/pescara (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538854750>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83849/177906 [11:27:18<3:56:45,  6.62it/s]

❌ Failed to fetch details for: https://www.miodottore.it/valentina-carloni-2/psicoterapeuta-psicologo/ancona
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /valentina-carloni-2/psicoterapeuta-psicologo/ancona (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53936e710>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/emanuela-sonsini-2/psicologo-psicoterapeuta-psicologo-clinico/chieti
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /emanuela-sonsini-2/psicologo-psicoterapeuta-psicologo-clinico/chieti (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539aeced0>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/laura-tavani/

 47%|████▋     | 83851/177906 [11:27:18<4:03:50,  6.43it/s]

❌ Failed to fetch details for: https://www.miodottore.it/alessia-malaguti-2/psicologo-psicoterapeuta-psicologo-clinico/san-lazzaro-di-savena
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /alessia-malaguti-2/psicologo-psicoterapeuta-psicologo-clinico/san-lazzaro-di-savena (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x4b8097450>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/ashanti-coticchia/psicologo-psicologo-clinico/ortona
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /ashanti-coticchia/psicologo-psicologo-clinico/ortona (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53a167bd0>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83852/177906 [11:27:18<4:17:08,  6.10it/s]

❌ Failed to fetch details for: https://www.miodottore.it/michela-bombardini2/psicologo-psicoterapeuta-psicologo-clinico/formigine
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /michela-bombardini2/psicologo-psicoterapeuta-psicologo-clinico/formigine (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x4bdf2ddd0>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83855/177906 [11:27:19<4:03:14,  6.44it/s]

❌ Failed to fetch details for: https://www.miodottore.it/marina-fregni/psicoterapeuta/modena
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /marina-fregni/psicoterapeuta/modena (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53885dd10>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/giacomo-galli-3/psichiatra-psicoterapeuta/modena
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /giacomo-galli-3/psichiatra-psicoterapeuta/modena (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539aec510>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/mario-aldovini/psicologo-psicoterapeuta/modena
Reason: HTTPSConnectionPool(host='www.

 47%|████▋     | 83857/177906 [11:27:19<4:47:20,  5.46it/s]

❌ Failed to fetch details for: https://www.miodottore.it/cecilia-tardini/psicologo-psicoterapeuta/sassuolo
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /cecilia-tardini/psicologo-psicoterapeuta/sassuolo (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x538854550>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/giulia-pratesi-2/psicologo-psicoterapeuta/modena
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /giulia-pratesi-2/psicologo-psicoterapeuta/modena (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53a167e10>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83860/177906 [11:27:20<4:22:35,  5.97it/s]

❌ Failed to fetch details for: https://www.miodottore.it/andres-langer/psicologo-psicoterapeuta-psicologo-clinico/reggio-emilia
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /andres-langer/psicologo-psicoterapeuta-psicologo-clinico/reggio-emilia (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x4bf4cd9d0>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/giovanni-previti/psicoterapeuta/guastalla
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /giovanni-previti/psicoterapeuta/guastalla (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539855e10>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/elisa-giacobazzi-2/psicoterap

 47%|████▋     | 83861/177906 [11:27:20<4:25:13,  5.91it/s]

❌ Failed to fetch details for: https://www.miodottore.it/lucio-aucello/psicoterapeuta-psicologo-terapeuta/modena
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /lucio-aucello/psicoterapeuta-psicologo-terapeuta/modena (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53885ef10>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83862/177906 [11:27:20<5:17:57,  4.93it/s]

❌ Failed to fetch details for: https://www.miodottore.it/michele-scala/psicologo-psicoterapeuta-neuropsicologo/belluno
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /michele-scala/psicologo-psicoterapeuta-neuropsicologo/belluno (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53885c7d0>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83863/177906 [11:27:21<6:52:56,  3.80it/s]

❌ Failed to fetch details for: https://www.miodottore.it/silvia-diana/psicologo-psicoterapeuta-psicologo-clinico/modena
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /silvia-diana/psicologo-psicoterapeuta-psicologo-clinico/modena (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539aec110>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/sabrina-pellacini/psicologo/reggio-emilia
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /sabrina-pellacini/psicologo/reggio-emilia (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x53885cb10>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/laura-forcina/psicologo/modena
Reason: HTTPSC

 47%|████▋     | 83866/177906 [11:27:21<4:40:13,  5.59it/s]

❌ Failed to fetch details for: https://www.miodottore.it/rosa-cristiano/psicologo-clinico-psicologo-psicoterapeuta/reggio-nell-emilia
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /rosa-cristiano/psicologo-clinico-psicologo-psicoterapeuta/reggio-nell-emilia (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x4b8095f10>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))


 47%|████▋     | 83867/177906 [11:27:21<5:16:15,  4.96it/s]

❌ Failed to fetch details for: https://www.miodottore.it/susanna-paterlini/psicologo-psicoterapeuta-psicologo-clinico/bagnolo-in-piano
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /susanna-paterlini/psicologo-psicoterapeuta-psicologo-clinico/bagnolo-in-piano (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x4bdf2dcd0>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.miodottore.it/enrico-piccinini/psicoterapeuta-psicologo-clinico/carpi
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Max retries exceeded with url: /enrico-piccinini/psicoterapeuta-psicologo-clinico/carpi (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x539aed310>: Failed to resolve 'www.miodottore.it' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Failed to fetch details for: https://www.m

 48%|████▊     | 85520/177906 [11:41:02<12:55:45,  1.98it/s] IOStream.flush timed out
 48%|████▊     | 85684/177906 [11:42:46<98:42:53,  3.85s/it] 

❌ Failed to fetch details for: https://www.miodottore.it/mauro-salone/psicoterapeuta-psicologo/cecina
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 48%|████▊     | 85863/177906 [11:43:50<10:40:50,  2.39it/s]IOStream.flush timed out
 50%|████▉     | 88892/177906 [12:09:22<13:00:00,  1.90it/s] IOStream.flush timed out
 50%|█████     | 89056/177906 [12:11:08<167:49:14,  6.80s/it]

❌ Failed to fetch details for: https://www.miodottore.it/pietro-viviani/neurologo-psicologo-psicoterapeuta/grisolia
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 51%|█████     | 89931/177906 [12:18:27<110:13:42,  4.51s/it]

❌ Failed to fetch details for: https://www.miodottore.it/antonella-mule/psicologo/savona
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 51%|█████     | 90090/177906 [12:19:41<102:25:46,  4.20s/it]

❌ Failed to fetch details for: https://www.miodottore.it/lorenza-bicchieri-2/psicoterapeuta/parma
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 51%|█████     | 90204/177906 [12:20:21<9:32:52,  2.55it/s]  

❌ Failed to fetch details for: https://www.miodottore.it/santina-claudia-micieli/psicologo-clinico-psicoterapeuta-psicologo/rosolini
Reason: Response ended prematurely


 52%|█████▏    | 93320/177906 [12:47:15<139:01:24,  5.92s/it]

❌ Failed to fetch details for: https://www.miodottore.it/francesca-falco/psicoterapeuta-psicologo-clinico-psicologo/l-aquila
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 55%|█████▍    | 97179/177906 [13:19:47<160:08:16,  7.14s/it]IOStream.flush timed out
 55%|█████▌    | 98150/177906 [13:27:43<10:20:28,  2.14it/s] IOStream.flush timed out
 56%|█████▌    | 98965/177906 [13:34:59<67:44:54,  3.09s/it] 

❌ Failed to fetch details for: https://www.miodottore.it/beatrice-carzola/psicoterapeuta-psicologo-clinico-psicologo/la-spezia
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 57%|█████▋    | 100801/177906 [13:50:23<135:43:15,  6.34s/it]

❌ Failed to fetch details for: https://www.miodottore.it/mauro-vargiu/psicoterapeuta-sessuologo-psicologo/milano
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out.


 57%|█████▋    | 102179/177906 [14:02:06<8:57:10,  2.35it/s]  

❌ Failed to fetch details for: https://www.miodottore.it/jasmine-scioscia/psicologo-clinico-psicoterapeuta-psicologo/milano
Reason: Response ended prematurely


 58%|█████▊    | 102799/177906 [14:07:43<113:05:45,  5.42s/it]

❌ Failed to fetch details for: https://www.miodottore.it/katia-loisi/psicologo-psicologo-clinico-psicoterapeuta/empoli
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 59%|█████▊    | 104260/177906 [14:19:51<6:09:55,  3.32it/s]  IOStream.flush timed out
 59%|█████▉    | 104725/177906 [14:23:55<8:38:54,  2.35it/s]  IOStream.flush timed out
 59%|█████▉    | 105194/177906 [14:28:18<76:36:26,  3.79s/it] 

❌ Failed to fetch details for: https://www.miodottore.it/francesco-de-paola/psicologo-psicoterapeuta-professional-counselor/milano
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out.


 59%|█████▉    | 105360/177906 [14:29:19<9:17:17,  2.17it/s] IOStream.flush timed out
 60%|██████    | 107222/177906 [14:45:07<142:01:19,  7.23s/it]

❌ Failed to fetch details for: https://www.miodottore.it/chiara-di-renzo/psicoterapeuta/padova
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 61%|██████    | 108755/177906 [14:57:51<130:43:57,  6.81s/it]

❌ Failed to fetch details for: https://www.miodottore.it/francesca-greco-4/psicologo-psicologo-clinico/sant-ambrogio
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 62%|██████▏   | 110471/177906 [15:12:02<113:44:30,  6.07s/it]

❌ Failed to fetch details for: https://www.miodottore.it/sarah-meli-3/psicoterapeuta-psicologo-psicologo-clinico/pisa
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 62%|██████▏   | 111149/177906 [15:17:19<8:19:14,  2.23it/s]  IOStream.flush timed out
 63%|██████▎   | 112285/177906 [15:27:21<129:09:46,  7.09s/it]

❌ Failed to fetch details for: https://www.miodottore.it/saverio-fucci/psicologo/milano
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 63%|██████▎   | 112460/177906 [15:28:52<140:59:07,  7.76s/it]

❌ Failed to fetch details for: https://www.miodottore.it/flora-galante-2/psicologo-psicoterapeuta/sinalunga
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 63%|██████▎   | 112632/177906 [15:30:17<83:31:38,  4.61s/it] 

❌ Failed to fetch details for: https://www.miodottore.it/paolo-granone/psicologo-psicoterapeuta/roma
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out.


 64%|██████▍   | 114749/177906 [15:47:54<6:57:01,  2.52it/s]  IOStream.flush timed out
 66%|██████▌   | 116725/177906 [16:04:03<6:34:15,  2.59it/s]  IOStream.flush timed out
 68%|██████▊   | 120590/177906 [16:30:11<64:26:16,  4.05s/it] 

❌ Failed to fetch details for: https://www.miodottore.it/strutture/casa-di-cura-clinica-del-mediterraneo
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 69%|██████▊   | 121877/177906 [16:38:24<53:30:01,  3.44s/it] 

❌ Failed to fetch details for: https://www.miodottore.it/giuseppe-tanfani/allergologo-radiologo/porto-sant-elpidio
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out.


 71%|███████   | 125538/177906 [17:01:18<4:51:50,  2.99it/s]  IOStream.flush timed out
 72%|███████▏  | 127763/177906 [17:15:23<82:40:19,  5.94s/it] 

❌ Failed to fetch details for: https://www.miodottore.it/francesco-palladino/ecografista-radiologo-diagnostico-radiologo/roma
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 72%|███████▏  | 128132/177906 [17:17:58<5:15:00,  2.63it/s]  

❌ Failed to fetch details for: https://www.miodottore.it/daniele-lubrano-2/reumatologo/pozzuoli
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 73%|███████▎  | 130099/177906 [17:30:57<4:14:29,  3.13it/s] IOStream.flush timed out
 76%|███████▋  | 136009/177906 [18:10:22<77:52:23,  6.69s/it] 

❌ Failed to fetch details for: https://www.miodottore.it/alessia-maccagnano/radiologo-senologo-ecografista/squinzano
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)❌ Failed to fetch details for: https://www.miodottore.it/stella-palmarini/senologo-radiologo-diagnostico-ecografista/cutrofiano
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)



 79%|███████▉  | 140318/177906 [18:42:24<44:36:17,  4.27s/it]

❌ Failed to fetch details for: https://www.miodottore.it/giorgio-cornacchia/psicologo-clinico-psicoterapeuta-sessuologo/pescara
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 79%|███████▉  | 140969/177906 [18:47:03<5:10:00,  1.99it/s] IOStream.flush timed out
 87%|████████▋ | 155094/177906 [20:25:25<2:03:14,  3.08it/s] IOStream.flush timed out
 89%|████████▊ | 157560/177906 [20:43:54<35:06:04,  6.21s/it]

❌ Failed to fetch details for: https://www.miodottore.it/corrado-zengarini/dermatologo-tricologo-venereologo/bologna
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 89%|████████▉ | 158366/177906 [20:49:39<2:09:39,  2.51it/s] IOStream.flush timed out
 92%|█████████▏| 164151/177906 [21:34:09<1:23:56,  2.73it/s] IOStream.flush timed out
 93%|█████████▎| 165672/177906 [21:45:17<1:15:43,  2.69it/s] IOStream.flush timed out
 93%|█████████▎| 166248/177906 [21:49:23<48:22,  4.02it/s]   IOStream.flush timed out
 96%|█████████▋| 171609/177906 [22:28:47<7:12:10,  4.12s/it] 

❌ Failed to fetch details for: https://www.miodottore.it/nicolo-cosimo-bizzarri/urologo/san-donato-milanese
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out.


 97%|█████████▋| 171997/177906 [22:31:36<11:34:25,  7.05s/it]

❌ Failed to fetch details for: https://www.miodottore.it/francesco-pietrantuono/urologo/lodi
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


 97%|█████████▋| 172753/177906 [22:36:32<23:55,  3.59it/s]   

❌ Failed to fetch details for: https://www.miodottore.it/orazio-risiglione/urologo/settimo-torinese
Reason: Response ended prematurely


 99%|█████████▉| 175864/177906 [22:59:22<17:13,  1.98it/s]  

❌ Failed to fetch details for: https://www.miodottore.it/umberto-alberio/internista/vimercate
Reason: Response ended prematurely


 99%|█████████▉| 176924/177906 [23:07:48<1:56:34,  7.12s/it]

❌ Failed to fetch details for: https://www.miodottore.it/luca-rubino/dermatologo/capurso
Reason: HTTPSConnectionPool(host='www.miodottore.it', port=443): Read timed out. (read timeout=10)


100%|██████████| 177906/177906 [23:14:42<00:00,  2.13it/s]  


In [60]:
doctor_details

[{'name': 'Dott.ssa Giulia Pedrotti ',
  'categories': ['Psicologo, Neuropsicologo'],
  'url': 'https://www.miodottore.it/giulia-pedrotti/psicologo-neuropsicologo/verona',
  'location': 'Verona',
  'rating': None,
  'number_of_reviews': None,
  'prestazioni': {'Colloquio psicologico': None,
   'Consulenza online': None,
   'Consulenza psicologica': None,
   'Orientamento scolastico': None,
   'Riabilitazione': None,
   'Sostegno psicologico': None,
   'Sostegno psicologico adolescenti': None,
   'Valutazione neuropsicologica': None},
  'espierienze': {'expertIn': ['Psicologia clinica', 'Neuropsicologia']}},
 {'name': 'Davide Cordioli ',
  'categories': ['Psicologo'],
  'url': 'https://www.miodottore.it/davide-cordioli-2/psicologo/verona',
  'location': 'Verona',
  'rating': None,
  'number_of_reviews': None,
  'prestazioni': {'Colloquio psicologico': 'Da 50 €',
   'Consulenza psicologica': 'Da 50 €',
   'Colloquio psicologico clinico': '50 €',
   'Prima Visita': '50 €',
   'Psicoterapi

In [61]:
 # Save the doctor details to a JSON file
import json
with open('doctor_details_example_12.json', 'w') as f:
    json.dump(doctor_details, f, indent=4)

In [62]:
# Import doctor details from the JSON file
import json
with open('doctor_details_example_12.json', 'r') as f:
    doctor_details_example_12 = json.load(f)

In [63]:
doctor_details_example_12[-1]

{'name': 'Prof.ssa Maria Rita Bongiorno ',
 'categories': ['Dermatologo, Venereologo'],
 'url': 'https://www.miodottore.it/maria-rita-bongiorno/dermatologo-venereologo/palermo',
 'location': 'Palermo',
 'rating': '5',
 'number_of_reviews': 'Recensioni',
 'prestazioni': {'Visita dermatologica': '220 €',
  'Visita a domicilio': '400 €',
  'Visita di controllo': '130 €',
  'Visita venereologica': '220 €'},
 'espierienze': {'expertIn': ['Dermatologia e venereologia'],
  'disease': ['Acne',
   'Verruche',
   'Alopecia',
   'Dermatite atopica',
   'Psoriasi',
   'Micosi',
   'Herpes zoster',
   'Malattie sessualmente trasmissibili',
   'Cheratosi attinica',
   'Malattie bollose'],
  'practice': ['.'],
  'publication': ['.'],
  'school': ['Università degli Studi di Palermo'],
  'prize': ['.']}}

In [64]:
len(doctor_details_example_12)  # Check the total number of doctor details collected

177906

In [65]:
import pandas as pd
import json

processed = []
for d in doctor_details_example_12:
    if d is None:
        continue  # Skip bad entries
    
    # Convert nested structures to JSON strings for CSV compatibility
    row = {}
    for k, v in d.items():
        if isinstance(v, (list, dict)):
            row[k] = json.dumps(v, ensure_ascii=False)
        else:
            row[k] = v
    processed.append(row)

df = pd.DataFrame(processed)
df.to_csv("doctor_details_part_10.csv", index=False)


Finish Web Scraping

In [291]:
# Merge all parts of the doctor details CSV files into one
import pandas as pd
import glob # glob is used to find all the pathnames matching a specified pattern

# Get all CSV files in the current directory
csv_files = glob.glob("doctor_details_part_*.csv") # read all parts 

# Read and concatenate all CSV files into a single DataFrame
df_list = [pd.read_csv(file) for file in csv_files]
df = pd.concat(df_list, ignore_index=True)

# Save the final DataFrame to a CSV file
df.to_csv("doctor_details_final.csv", index=False)

Data Cleaning

In [292]:
# Download DF from doctor_details_final.csv
import pandas as pd
# Load the final DataFrame from the CSV file
df = pd.read_csv("doctor_details_final.csv")
df.head()  # Display the first few rows of the DataFrame

  df = pd.read_csv("doctor_details_final.csv")


Unnamed: 0,name,categories,url,location,rating,number_of_reviews,prestazioni,espierienze
0,Dr. Antonio Iasi,"[""Pediatra""]",https://www.miodottore.it/antonio-iasi/pediatr...,Neviano,,,{},{}
1,Dr. Piera Angela Negro,"[""Pediatra""]",https://www.miodottore.it/piera-angela-negro/p...,Gallipoli,,,{},{}
2,Angela Russo,"[""Pediatra""]",https://www.miodottore.it/angela-russo-2/pedia...,Galatone,,,{},{}
3,Dott. Ivana Stapane,"[""Pediatra""]",https://www.miodottore.it/ivana-stapane/pediat...,Galatone,,,{},{}
4,Dr. Salvatore Fulgido,"[""Pediatra""]",https://www.miodottore.it/salvatore-fulgido/pe...,Alezio,,,{},{}


In [2]:
df

Unnamed: 0,name,categories,url,location,rating,number_of_reviews,prestazioni,espierienze
0,Dr. Antonio Iasi,"[""Pediatra""]",https://www.miodottore.it/antonio-iasi/pediatr...,Neviano,,,{},{}
1,Dr. Piera Angela Negro,"[""Pediatra""]",https://www.miodottore.it/piera-angela-negro/p...,Gallipoli,,,{},{}
2,Angela Russo,"[""Pediatra""]",https://www.miodottore.it/angela-russo-2/pedia...,Galatone,,,{},{}
3,Dott. Ivana Stapane,"[""Pediatra""]",https://www.miodottore.it/ivana-stapane/pediat...,Galatone,,,{},{}
4,Dr. Salvatore Fulgido,"[""Pediatra""]",https://www.miodottore.it/salvatore-fulgido/pe...,Alezio,,,{},{}
...,...,...,...,...,...,...,...,...
1077640,Dott.ssa Roberta Alaimo,"[""Dermatologo, Venereologo, Tricologo""]",https://www.miodottore.it/roberta-alaimo/derma...,Palermo,5.0,Recensioni,"{""Prima visita dermatologica"": ""Da 70 €"", ""Inf...","{""expertIn"": [""Dermatologia e venereologia""], ..."
1077641,Dott.ssa Ninfa Alessandra Giacalone,"[""Urologo, Andrologo, Venereologo""]",https://www.miodottore.it/ninfa-alessandra-gia...,Palermo,5.0,Recensioni,"{""Visita urologica"": null, ""Visita di controll...","{""expertIn"": [""Andrologia"", ""Urologia"", ""Medic..."
1077642,Dott.ssa Francesca Todaro,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/francesca-todaro-3/d...,Palermo,5.0,Recensioni,"{""Prima visita dermatologica"": ""100 €"", ""Visit...","{""expertIn"": [""Dermatologia e venereologia""], ..."
1077643,,,https://www.miodottore.it/strutture/mediclinic-2,,5.0,,{},{}


In [3]:
# Clean the doctor csv file by eliminating all rows that have no name, location, or url
df_cleaned_1 = df.dropna(subset=['name'], ignore_index=True)  # Drop rows where 'name' is NaN
df_cleaned_1


Unnamed: 0,name,categories,url,location,rating,number_of_reviews,prestazioni,espierienze
0,Dr. Antonio Iasi,"[""Pediatra""]",https://www.miodottore.it/antonio-iasi/pediatr...,Neviano,,,{},{}
1,Dr. Piera Angela Negro,"[""Pediatra""]",https://www.miodottore.it/piera-angela-negro/p...,Gallipoli,,,{},{}
2,Angela Russo,"[""Pediatra""]",https://www.miodottore.it/angela-russo-2/pedia...,Galatone,,,{},{}
3,Dott. Ivana Stapane,"[""Pediatra""]",https://www.miodottore.it/ivana-stapane/pediat...,Galatone,,,{},{}
4,Dr. Salvatore Fulgido,"[""Pediatra""]",https://www.miodottore.it/salvatore-fulgido/pe...,Alezio,,,{},{}
...,...,...,...,...,...,...,...,...
994773,Dott.ssa Elena Mazzola,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/elena-mazzola/dermat...,Palermo,5.0,Recensioni,"{""Visita dermatologica"": ""100 €"", ""Crioterapia...","{""expertIn"": [""Dermatologia e venereologia""], ..."
994774,Dott.ssa Roberta Alaimo,"[""Dermatologo, Venereologo, Tricologo""]",https://www.miodottore.it/roberta-alaimo/derma...,Palermo,5.0,Recensioni,"{""Prima visita dermatologica"": ""Da 70 €"", ""Inf...","{""expertIn"": [""Dermatologia e venereologia""], ..."
994775,Dott.ssa Ninfa Alessandra Giacalone,"[""Urologo, Andrologo, Venereologo""]",https://www.miodottore.it/ninfa-alessandra-gia...,Palermo,5.0,Recensioni,"{""Visita urologica"": null, ""Visita di controll...","{""expertIn"": [""Andrologia"", ""Urologia"", ""Medic..."
994776,Dott.ssa Francesca Todaro,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/francesca-todaro-3/d...,Palermo,5.0,Recensioni,"{""Prima visita dermatologica"": ""100 €"", ""Visit...","{""expertIn"": [""Dermatologia e venereologia""], ..."


In [4]:
# Separate the name column into names and prefixes
df_cleaned_1[['prefix', 'name']] = df_cleaned_1['name'].str.split(' ', n = 1, expand=True) # expand=True creates new columns
df_cleaned_1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_1[['prefix', 'name']] = df_cleaned_1['name'].str.split(' ', n = 1, expand=True) # expand=True creates new columns


Unnamed: 0,name,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix
0,Antonio Iasi,"[""Pediatra""]",https://www.miodottore.it/antonio-iasi/pediatr...,Neviano,,,{},{},Dr.
1,Piera Angela Negro,"[""Pediatra""]",https://www.miodottore.it/piera-angela-negro/p...,Gallipoli,,,{},{},Dr.
2,Russo,"[""Pediatra""]",https://www.miodottore.it/angela-russo-2/pedia...,Galatone,,,{},{},Angela
3,Ivana Stapane,"[""Pediatra""]",https://www.miodottore.it/ivana-stapane/pediat...,Galatone,,,{},{},Dott.
4,Salvatore Fulgido,"[""Pediatra""]",https://www.miodottore.it/salvatore-fulgido/pe...,Alezio,,,{},{},Dr.
...,...,...,...,...,...,...,...,...,...
994773,Elena Mazzola,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/elena-mazzola/dermat...,Palermo,5.0,Recensioni,"{""Visita dermatologica"": ""100 €"", ""Crioterapia...","{""expertIn"": [""Dermatologia e venereologia""], ...",Dott.ssa
994774,Roberta Alaimo,"[""Dermatologo, Venereologo, Tricologo""]",https://www.miodottore.it/roberta-alaimo/derma...,Palermo,5.0,Recensioni,"{""Prima visita dermatologica"": ""Da 70 €"", ""Inf...","{""expertIn"": [""Dermatologia e venereologia""], ...",Dott.ssa
994775,Ninfa Alessandra Giacalone,"[""Urologo, Andrologo, Venereologo""]",https://www.miodottore.it/ninfa-alessandra-gia...,Palermo,5.0,Recensioni,"{""Visita urologica"": null, ""Visita di controll...","{""expertIn"": [""Andrologia"", ""Urologia"", ""Medic...",Dott.ssa
994776,Francesca Todaro,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/francesca-todaro-3/d...,Palermo,5.0,Recensioni,"{""Prima visita dermatologica"": ""100 €"", ""Visit...","{""expertIn"": [""Dermatologia e venereologia""], ...",Dott.ssa


In [5]:
# Check prefixes
df_prefix = df_cleaned_1['prefix'].unique() # Check unique prefixes in the prefix column
# Check if the prefix is in the list of prefixes
prefixes = ['Dr.', 'Dott.', 'Prof.', 'Prof.ssa', 'Dott.ssa', 'Dottor.', 'Dottore', 'Dottoressa']
df_cleaned_1['is_prefix'] = df_cleaned_1['prefix'].isin(prefixes)  # Create a new column to check if the prefix is in the list
df_cleaned_1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_1['is_prefix'] = df_cleaned_1['prefix'].isin(prefixes)  # Create a new column to check if the prefix is in the list


Unnamed: 0,name,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix
0,Antonio Iasi,"[""Pediatra""]",https://www.miodottore.it/antonio-iasi/pediatr...,Neviano,,,{},{},Dr.,True
1,Piera Angela Negro,"[""Pediatra""]",https://www.miodottore.it/piera-angela-negro/p...,Gallipoli,,,{},{},Dr.,True
2,Russo,"[""Pediatra""]",https://www.miodottore.it/angela-russo-2/pedia...,Galatone,,,{},{},Angela,False
3,Ivana Stapane,"[""Pediatra""]",https://www.miodottore.it/ivana-stapane/pediat...,Galatone,,,{},{},Dott.,True
4,Salvatore Fulgido,"[""Pediatra""]",https://www.miodottore.it/salvatore-fulgido/pe...,Alezio,,,{},{},Dr.,True
...,...,...,...,...,...,...,...,...,...,...
994773,Elena Mazzola,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/elena-mazzola/dermat...,Palermo,5.0,Recensioni,"{""Visita dermatologica"": ""100 €"", ""Crioterapia...","{""expertIn"": [""Dermatologia e venereologia""], ...",Dott.ssa,True
994774,Roberta Alaimo,"[""Dermatologo, Venereologo, Tricologo""]",https://www.miodottore.it/roberta-alaimo/derma...,Palermo,5.0,Recensioni,"{""Prima visita dermatologica"": ""Da 70 €"", ""Inf...","{""expertIn"": [""Dermatologia e venereologia""], ...",Dott.ssa,True
994775,Ninfa Alessandra Giacalone,"[""Urologo, Andrologo, Venereologo""]",https://www.miodottore.it/ninfa-alessandra-gia...,Palermo,5.0,Recensioni,"{""Visita urologica"": null, ""Visita di controll...","{""expertIn"": [""Andrologia"", ""Urologia"", ""Medic...",Dott.ssa,True
994776,Francesca Todaro,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/francesca-todaro-3/d...,Palermo,5.0,Recensioni,"{""Prima visita dermatologica"": ""100 €"", ""Visit...","{""expertIn"": [""Dermatologia e venereologia""], ...",Dott.ssa,True


In [6]:
# Check the rows that have prefix = False
df_no_prefix = df_cleaned_1[df_cleaned_1['is_prefix'] == False]
df_no_prefix

Unnamed: 0,name,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix
2,Russo,"[""Pediatra""]",https://www.miodottore.it/angela-russo-2/pedia...,Galatone,,,{},{},Angela,False
5,Tarantino,"[""Pediatra""]",https://www.miodottore.it/angelica-tarantino/p...,Tuglie,,,{},{},Angelica,False
7,Cataldi,"[""Pediatra""]",https://www.miodottore.it/assunta-cataldi/pedi...,Tuglie,,,{},{},Assunta,False
8,Rizzo,"[""Pediatra""]",https://www.miodottore.it/lelio-rizzo/pediatra...,Galatina,,,{},{},Lelio,False
30,Pollini,"[""Pediatra""]",https://www.miodottore.it/sonia-pollini/pediat...,Arco,,,{},{},Sonia,False
...,...,...,...,...,...,...,...,...,...,...
993400,Antonella Spurio,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/maria-antonella-spur...,Vedano,5.0,Recensioni,"{""Visita dermatologica"": ""Da 140 €"", ""Asportaz...","{""expertIn"": [""Dermatologia e venereologia"", ""...",Maria,False
993940,Antonella Spurio,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/maria-antonella-spur...,Vedano,5.0,Recensioni,"{""Visita dermatologica"": ""Da 140 €"", ""Asportaz...","{""expertIn"": [""Dermatologia e venereologia"", ""...",Maria,False
994128,Antonella Spurio,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/maria-antonella-spur...,Vedano,5.0,Recensioni,"{""Visita dermatologica"": ""Da 140 €"", ""Asportaz...","{""expertIn"": [""Dermatologia e venereologia"", ""...",Maria,False
994723,Antonella Spurio,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/maria-antonella-spur...,Vedano,5.0,Recensioni,"{""Visita dermatologica"": ""Da 140 €"", ""Asportaz...","{""expertIn"": [""Dermatologia e venereologia"", ""...",Maria,False


In [7]:
# For thr rows that have prefix = False, we realize that it's a first name not prefix so we will add it back to the name column
df_cleaned_1.loc[df_cleaned_1['is_prefix'] == False, 'name'] = df_cleaned_1['prefix'] + ' ' + df_cleaned_1['name']
# Change the prefix to None for these rows using if_else
df_cleaned_1.loc[df_cleaned_1['is_prefix'] == False, 'prefix'] = None
# Check df_cleaned_1
df_cleaned_1

Unnamed: 0,name,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix
0,Antonio Iasi,"[""Pediatra""]",https://www.miodottore.it/antonio-iasi/pediatr...,Neviano,,,{},{},Dr.,True
1,Piera Angela Negro,"[""Pediatra""]",https://www.miodottore.it/piera-angela-negro/p...,Gallipoli,,,{},{},Dr.,True
2,Angela Russo,"[""Pediatra""]",https://www.miodottore.it/angela-russo-2/pedia...,Galatone,,,{},{},,False
3,Ivana Stapane,"[""Pediatra""]",https://www.miodottore.it/ivana-stapane/pediat...,Galatone,,,{},{},Dott.,True
4,Salvatore Fulgido,"[""Pediatra""]",https://www.miodottore.it/salvatore-fulgido/pe...,Alezio,,,{},{},Dr.,True
...,...,...,...,...,...,...,...,...,...,...
994773,Elena Mazzola,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/elena-mazzola/dermat...,Palermo,5.0,Recensioni,"{""Visita dermatologica"": ""100 €"", ""Crioterapia...","{""expertIn"": [""Dermatologia e venereologia""], ...",Dott.ssa,True
994774,Roberta Alaimo,"[""Dermatologo, Venereologo, Tricologo""]",https://www.miodottore.it/roberta-alaimo/derma...,Palermo,5.0,Recensioni,"{""Prima visita dermatologica"": ""Da 70 €"", ""Inf...","{""expertIn"": [""Dermatologia e venereologia""], ...",Dott.ssa,True
994775,Ninfa Alessandra Giacalone,"[""Urologo, Andrologo, Venereologo""]",https://www.miodottore.it/ninfa-alessandra-gia...,Palermo,5.0,Recensioni,"{""Visita urologica"": null, ""Visita di controll...","{""expertIn"": [""Andrologia"", ""Urologia"", ""Medic...",Dott.ssa,True
994776,Francesca Todaro,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/francesca-todaro-3/d...,Palermo,5.0,Recensioni,"{""Prima visita dermatologica"": ""100 €"", ""Visit...","{""expertIn"": [""Dermatologia e venereologia""], ...",Dott.ssa,True


In [58]:
# Remove the duplicate rows
df_cleaned_nodup = df_cleaned_1.drop_duplicates(subset=['url'], keep='first', ignore_index=True)
# Check the cleaned dataframe
df_cleaned_nodup

Unnamed: 0,name,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix
0,Antonio Iasi,"[""Pediatra""]",https://www.miodottore.it/antonio-iasi/pediatr...,Neviano,,,{},{},Dr.,True
1,Piera Angela Negro,"[""Pediatra""]",https://www.miodottore.it/piera-angela-negro/p...,Gallipoli,,,{},{},Dr.,True
2,Angela Russo,"[""Pediatra""]",https://www.miodottore.it/angela-russo-2/pedia...,Galatone,,,{},{},,False
3,Ivana Stapane,"[""Pediatra""]",https://www.miodottore.it/ivana-stapane/pediat...,Galatone,,,{},{},Dott.,True
4,Salvatore Fulgido,"[""Pediatra""]",https://www.miodottore.it/salvatore-fulgido/pe...,Alezio,,,{},{},Dr.,True
...,...,...,...,...,...,...,...,...,...,...
114749,Alice Ramondetta,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/alice-ramondetta/der...,Catania,5.0,Recensioni,"{""Visita dermatologica"": ""Da 80 €"", ""Consulenz...","{""expertIn"": [""Dermatologia e venereologia""], ...",Dott.ssa,True
114750,Francesca Todaro,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/francesca-todaro-3/d...,Palermo,5.0,Recensioni,"{""Prima visita dermatologica"": ""100 €"", ""Visit...","{""expertIn"": [""Dermatologia e venereologia""], ...",Dott.ssa,True
114751,Luigi Pisano,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/luigi-pisano/dermato...,Firenze,5.0,Recensioni,{},"{""expertIn"": [""Dermatologia e venereologia""], ...",Dr.,True
114752,Adele Sparavigna,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/adele-sparavigna/der...,Milano,5.0,Recensioni,"{""Prima visita dermatologica"": ""180 €"", ""Visit...","{""expertIn"": [""Dermatologia e venereologia"", ""...",Dott.ssa,True


In [59]:
# Check the final DataFrame
df_cleaned_1['name'].unique().shape  # Check unique names in the name column

# Check is there Maurizio Piani in the name column
maurizio_piani = df_cleaned_nodup[df_cleaned_nodup['name'] == 'Maria Rita Bongiorno ']



In [60]:
maurizio_piani

Unnamed: 0,name,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix
114753,Maria Rita Bongiorno,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/maria-rita-bongiorno...,Palermo,5.0,Recensioni,"{""Visita dermatologica"": ""220 €"", ""Visita a do...","{""expertIn"": [""Dermatologia e venereologia""], ...",Prof.ssa,True


In [None]:
# for df_cleaned_1, add a new location_2 by transforming location to province that the location is in
# For example, Milano -> Milano, Neviano -> Lecce, Gallipoli -> Lecce, etc.


Clean and Manage FNOMCEO

In [199]:
# Look up the fnomceo data
df_fnomceo = pd.read_csv('fnomceo-all-data-up-to-4-gen-2023.csv')
df_fnomceo

  df_fnomceo = pd.read_csv('fnomceo-all-data-up-to-4-gen-2023.csv')


Unnamed: 0,index,fnomceo_id,name,sex,birthdate,age,birthplace,birth_province,data_date,profession,is_active,physician_qualification_place,physician_qualification_date,physician_register_place,physician_register_number,physician_register_date,graduations_count,graduations_faculty_1,graduations_date_1,graduations_place_1,residencies_count,residencies_residency_1,residencies_place_1,residencies_date_1,special_attributes_count,special_attributes_attribute_1,special_attributes_date_1,dentist_register_place,dentist_register_number,dentist_register_date,residencies_residency_2,residencies_place_2,residencies_date_2,dentist_qualification_place,dentist_qualification_date,residencies_residency_3,residencies_place_3,residencies_date_3,residencies_residency_4,residencies_place_4,residencies_date_4,special_attributes_attribute_2,special_attributes_date_2,special_attributes_attribute_3,special_attributes_date_3,residencies_residency_5,residencies_place_5,residencies_date_5,graduations_faculty_2,graduations_date_2,graduations_place_2,special_attributes_attribute_4,special_attributes_date_4,residencies_residency_6,residencies_place_6,residencies_date_6,special_attributes_attribute_5,special_attributes_date_5,special_attributes_attribute_6,special_attributes_date_6,residencies_residency_7,residencies_place_7,residencies_date_7,residencies_residency_8,residencies_place_8,residencies_date_8,residencies_residency_9,residencies_place_9,residencies_date_9
0,0,20.0,Magrini Antonio,M,1953-04-30,69.0,SERRE,SA,2022-12-28,Odontoiatra,True,NAPOLI SECONDA UNIVERSITA,1982-02-01,SALERNO,5167.0,1982-12-31,1.0,MEDICINA E CHIRURGIA,1982-07-26,NAPOLI SECONDA UNIVERSITA,1.0,ODONTOIATRIA E PROTESI DENTARIA,GENOVA,1992-07-06,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1,21.0,Tamborini Emanuela,F,1963-03-28,59.0,VARESE,VA,2023-01-02,Medico,True,PAVIA,1989-01-01,VARESE,4366.0,1989-06-20,1.0,MEDICINA E CHIRURGIA,1988-12-16,PAVIA,1.0,PEDIATRIA,PAVIA,1993-05-25,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2,22.0,Di Fronzo Antonio,M,1954-06-26,68.0,SANT'ANTIMO,,2022-12-23,Medico,True,NAPOLI,1992-01-01,NAPOLI,26132.0,1992-06-29,1.0,MEDICINA E CHIRURGIA,1992-04-09,NAPOLI SECONDA UNIVERSITA,1.0,FONIATRIA,NAPOLI,1998-11-06,1.0,MEDICO DI MEDICINA GENERALE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,3,23.0,Ruta Giovanni,M,1960-08-18,62.0,MODICA,RG,2022-12-19,Medico,True,CATANIA,1986-02-01,RAGUSA,1660.0,1987-01-30,1.0,MEDICINA E CHIRURGIA,1986-10-27,CATANIA,1.0,TISIOLOGIA E MALATTIE DELL'APPARATO RESPIRATORIO,CATANIA,1991-12-02,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,4,24.0,Evangelisti Daniela,F,1955-07-09,67.0,CAMAIORE,LU,2022-11-24,Medico e odontoiatra,True,PISA,1988-01-01,LUCCA,2003.0,1989-06-13,1.0,MEDICINA E CHIRURGIA,1986-12-03,PISA,0.0,,,,0.0,,,LUCCA,41.0,1988-06-28,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
552297,552302,614424.0,Ferrari Fabio,M,1997-08-12,25.0,SERIATE,BG,2023-01-03,Odontoiatra,True,,,,,,1.0,ODONTOIATRIA E PROTESI DENTARIA,2022-10-12,BRESCIA,0.0,,,,0.0,,,BERGAMO,1377.0,2022-12-22,,,,BRESCIA,2022-02-01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
552298,552303,614425.0,Aresi Nicola,M,1995-12-22,27.0,BERGAMO,BG,2023-01-03,Odontoiatra,True,BRESCIA,2022-02-01,,,,1.0,ODONTOIATRIA E PROTESI DENTARIA,2022-10-12,BRESCIA,0.0,,,,0.0,,,BERGAMO,1376.0,2022-12-22,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
552299,552304,614426.0,Pozzoni Elena,F,1997-02-11,25.0,BRESCIA,BS,2023-01-03,Medico,True,,,BERGAMO,8543.0,2022-12-22,1.0,MEDICINA E CHIRURGIA,2022-12-13,BRESCIA,0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
552300,552305,614427.0,Tresoldi Maria Vittoria,F,1996-02-08,26.0,VAPRIO D'ADDA,MI,2023-01-03,Medico,True,,,BERGAMO,8542.0,2022-12-22,1.0,MEDICINA E CHIRURGIA,2022-12-14,PERUGIA,0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [201]:
# Check is there Maurizio Piani in the fnomceo data
maurizio_piani_fnomceo = df_fnomceo[df_fnomceo['name'].str.contains('Negro Piera Angela', na=False, case=False)]
maurizio_piani_fnomceo

Unnamed: 0,index,fnomceo_id,name,sex,birthdate,age,birthplace,birth_province,data_date,profession,is_active,physician_qualification_place,physician_qualification_date,physician_register_place,physician_register_number,physician_register_date,graduations_count,graduations_faculty_1,graduations_date_1,graduations_place_1,residencies_count,residencies_residency_1,residencies_place_1,residencies_date_1,special_attributes_count,special_attributes_attribute_1,special_attributes_date_1,dentist_register_place,dentist_register_number,dentist_register_date,residencies_residency_2,residencies_place_2,residencies_date_2,dentist_qualification_place,dentist_qualification_date,residencies_residency_3,residencies_place_3,residencies_date_3,residencies_residency_4,residencies_place_4,residencies_date_4,special_attributes_attribute_2,special_attributes_date_2,special_attributes_attribute_3,special_attributes_date_3,residencies_residency_5,residencies_place_5,residencies_date_5,graduations_faculty_2,graduations_date_2,graduations_place_2,special_attributes_attribute_4,special_attributes_date_4,residencies_residency_6,residencies_place_6,residencies_date_6,special_attributes_attribute_5,special_attributes_date_5,special_attributes_attribute_6,special_attributes_date_6,residencies_residency_7,residencies_place_7,residencies_date_7,residencies_residency_8,residencies_place_8,residencies_date_8,residencies_residency_9,residencies_place_9,residencies_date_9
90504,90505,97432.0,Negro Piera Angela,F,1953-08-19,69.0,VARESE,VA,2023-01-02,Medico,True,PAVIA,1978-02-01,LECCE,3382.0,1982-01-25,1.0,MEDICINA E CHIRURGIA,1978-07-28,PAVIA,1.0,PUERICULTURA,PAVIA,1981-07-13,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [202]:
# Check the name in df_fnomceo
df_fnomceo['name'].unique().shape  # Check unique names in the name column

(491188,)

In [203]:
# Lower all the name in fnomeceo
df_fnomceo['name'] = df_fnomceo['name'].str.lower().str.strip()  # Lowercase and strip

In [204]:
# Create a new column in df_cleaned_nodup to count the number of repeated names
df_cleaned_nodup['name_count'] = df_cleaned_nodup.groupby('name')['name'].transform('count')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_nodup['name_count'] = df_cleaned_nodup.groupby('name')['name'].transform('count')


In [205]:
# Check the rows with name_count 
df_cleaned_nodup.loc[df_cleaned_nodup['name_count'] > 1]  # Show only rows with name_count > 1

Unnamed: 0,name,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix,name_count
9,Ermanno Baldo,"[""Allergologo, Immunologo, Pediatra""]",https://www.miodottore.it/ermanno-baldo/allerg...,Riva,5.0,Recensioni,"{""Prima visita allergologica"": ""120 €"", ""Prima...","{""expertIn"": [""Allergologia e immunologia clin...",Dott.,True,2
25,Sofia Pallante,"[""Pediatra, Omeopata, Medico certificatore""]",https://www.miodottore.it/sofia-pallante/pedia...,Roma,5.0,Recensioni,"{""Consulenza online"": ""35 €"", ""Prima visita pe...","{""expertIn"": [""Omeopatia"", ""Omotossicologia"", ...",Dott.ssa,True,2
26,Luigi Palmieri,"[""Pediatra, Allergologo""]",https://www.miodottore.it/luigi-palmieri/pedia...,Bari,5.0,Recensioni,"{""Prima visita pediatrica"": ""85 €"", ""Visita pn...","{""expertIn"": [""Allergologia e immunologia clin...",Dott.,True,2
168,Vincenzo Rosa,"[""Agopuntore, Omeopata, Pediatra""]",https://www.miodottore.it/vincenzo-rosa-3/pedi...,Canneto,,,"{""Prima visita pediatrica"": ""60 € - 100 €"", ""O...","{""expertIn"": [""Medicina olistica"", ""Medicina f...",Dott.,True,2
201,Luigi Di Lorenzo,"[""Pediatra""]",https://www.miodottore.it/luigi-di-lorenzo-2/p...,Napoli,5.0,Recensioni,"{""Visita pediatrica"": null, ""Bilancio di salut...","{""disease"": [""Disturbi Della Crescita"", ""Asma""...",Dr.,True,2
...,...,...,...,...,...,...,...,...,...,...,...
114591,Carlo Marchetti,"[""Urologo""]",https://www.miodottore.it/carlo-marchetti-2/ur...,Pavia,5.0,Recensioni,"{""Prima visita urologica"": ""120 €"", ""Visita ur...","{""expertIn"": [""Urologia""], ""disease"": [""Carcin...",Dr.,True,2
114607,Antonio Romano,"[""Urologo""]",https://www.miodottore.it/antonio-romano-13/ur...,Piove,5.0,Recensioni,"{""Prima visita urologica"": ""140 €"", ""Uroflusso...",{},Dott.,True,2
114725,Giuseppe Benincasa,"[""Urologo""]",https://www.miodottore.it/giuseppe-benincasa/u...,Vallo,,,{},{},,False,2
114735,Maurizio Colombo,"[""Urologo""]",https://www.miodottore.it/maurizio-colombo-2/u...,Villanova,5.0,Recensioni,"{""Visita urologica"": ""130 €""}",{},Dott.,True,3


In [206]:
# Divide the data into two parts, one with name_count > 1 and one with name_count = 1
df_cleaned_nodup_count_gt_1 = df_cleaned_nodup[df_cleaned_nodup['name_count'] > 1]
df_cleaned_nodup_count_eq_1 = df_cleaned_nodup[df_cleaned_nodup['name_count'] == 1]

In [207]:
# Modify the name column in df_cleaned_nodup_count_eq_1 by lowering the case of the name and also remove all the spaces at the end
df_cleaned_nodup_count_eq_1['name'] = df_cleaned_nodup_count_eq_1['name'].str.lower().str.strip()  # Lowercase and stripdf
df_cleaned_nodup_count_gt_1['name'] = df_cleaned_nodup_count_gt_1['name'].str.lower().str.strip()  # Lowercase and stripdf

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_nodup_count_eq_1['name'] = df_cleaned_nodup_count_eq_1['name'].str.lower().str.strip()  # Lowercase and stripdf
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_nodup_count_gt_1['name'] = df_cleaned_nodup_count_gt_1['name'].str.lower().str.strip()  # Lowercase and stripdf


In [208]:
# Change the pattern name in df_cleaned_nodup_count_eq_1 to match the name in df_fnomceo
parts = df_cleaned_nodup_count_eq_1['name'].str.split(' ')
df_cleaned_nodup_count_eq_1['first_name'] = parts.apply(lambda x: " ".join(x[:-1]))  # Combine first two parts as name
df_cleaned_nodup_count_eq_1['last_name'] = parts.apply(lambda x: x[-1])  # Last part as last name

# Rejoin the first and last name to form the new name column
df_cleaned_nodup_count_eq_1['new_name'] = df_cleaned_nodup_count_eq_1['last_name'] + ' ' + df_cleaned_nodup_count_eq_1['first_name']
df_cleaned_nodup_count_eq_1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_nodup_count_eq_1['first_name'] = parts.apply(lambda x: " ".join(x[:-1]))  # Combine first two parts as name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_nodup_count_eq_1['last_name'] = parts.apply(lambda x: x[-1])  # Last part as last name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-vers

Unnamed: 0,name,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix,name_count,first_name,last_name,new_name
0,antonio iasi,"[""Pediatra""]",https://www.miodottore.it/antonio-iasi/pediatr...,Neviano,,,{},{},Dr.,True,1,antonio,iasi,iasi antonio
1,piera angela negro,"[""Pediatra""]",https://www.miodottore.it/piera-angela-negro/p...,Gallipoli,,,{},{},Dr.,True,1,piera angela,negro,negro piera angela
2,angela russo,"[""Pediatra""]",https://www.miodottore.it/angela-russo-2/pedia...,Galatone,,,{},{},,False,1,angela,russo,russo angela
3,ivana stapane,"[""Pediatra""]",https://www.miodottore.it/ivana-stapane/pediat...,Galatone,,,{},{},Dott.,True,1,ivana,stapane,stapane ivana
4,salvatore fulgido,"[""Pediatra""]",https://www.miodottore.it/salvatore-fulgido/pe...,Alezio,,,{},{},Dr.,True,1,salvatore,fulgido,fulgido salvatore
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114749,alice ramondetta,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/alice-ramondetta/der...,Catania,5.0,Recensioni,"{""Visita dermatologica"": ""Da 80 €"", ""Consulenz...","{""expertIn"": [""Dermatologia e venereologia""], ...",Dott.ssa,True,1,alice,ramondetta,ramondetta alice
114750,francesca todaro,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/francesca-todaro-3/d...,Palermo,5.0,Recensioni,"{""Prima visita dermatologica"": ""100 €"", ""Visit...","{""expertIn"": [""Dermatologia e venereologia""], ...",Dott.ssa,True,1,francesca,todaro,todaro francesca
114751,luigi pisano,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/luigi-pisano/dermato...,Firenze,5.0,Recensioni,{},"{""expertIn"": [""Dermatologia e venereologia""], ...",Dr.,True,1,luigi,pisano,pisano luigi
114752,adele sparavigna,"[""Dermatologo, Venereologo""]",https://www.miodottore.it/adele-sparavigna/der...,Milano,5.0,Recensioni,"{""Prima visita dermatologica"": ""180 €"", ""Visit...","{""expertIn"": [""Dermatologia e venereologia"", ""...",Dott.ssa,True,1,adele,sparavigna,sparavigna adele


In [209]:
# Change the pattern name in df_cleaned_nodup_count_gt_1 to match the name in df_fnomceo
parts = df_cleaned_nodup_count_gt_1['name'].str.split(' ')
df_cleaned_nodup_count_gt_1['first_name'] = parts.apply(lambda x: " ".join(x[:-1]))  # Combine first two parts as name
df_cleaned_nodup_count_gt_1['last_name'] = parts.apply(lambda x: x[-1])  # Last part as last name

# Rejoin the first and last name to form the new name column
df_cleaned_nodup_count_gt_1['new_name'] = df_cleaned_nodup_count_gt_1['last_name'] + ' ' + df_cleaned_nodup_count_gt_1['first_name']
df_cleaned_nodup_count_gt_1.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_nodup_count_gt_1['first_name'] = parts.apply(lambda x: " ".join(x[:-1]))  # Combine first two parts as name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_nodup_count_gt_1['last_name'] = parts.apply(lambda x: x[-1])  # Last part as last name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-vers

Unnamed: 0,name,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix,name_count,first_name,last_name,new_name
9,ermanno baldo,"[""Allergologo, Immunologo, Pediatra""]",https://www.miodottore.it/ermanno-baldo/allerg...,Riva,5.0,Recensioni,"{""Prima visita allergologica"": ""120 €"", ""Prima...","{""expertIn"": [""Allergologia e immunologia clin...",Dott.,True,2,ermanno,baldo,baldo ermanno
25,sofia pallante,"[""Pediatra, Omeopata, Medico certificatore""]",https://www.miodottore.it/sofia-pallante/pedia...,Roma,5.0,Recensioni,"{""Consulenza online"": ""35 €"", ""Prima visita pe...","{""expertIn"": [""Omeopatia"", ""Omotossicologia"", ...",Dott.ssa,True,2,sofia,pallante,pallante sofia
26,luigi palmieri,"[""Pediatra, Allergologo""]",https://www.miodottore.it/luigi-palmieri/pedia...,Bari,5.0,Recensioni,"{""Prima visita pediatrica"": ""85 €"", ""Visita pn...","{""expertIn"": [""Allergologia e immunologia clin...",Dott.,True,2,luigi,palmieri,palmieri luigi
168,vincenzo rosa,"[""Agopuntore, Omeopata, Pediatra""]",https://www.miodottore.it/vincenzo-rosa-3/pedi...,Canneto,,,"{""Prima visita pediatrica"": ""60 € - 100 €"", ""O...","{""expertIn"": [""Medicina olistica"", ""Medicina f...",Dott.,True,2,vincenzo,rosa,rosa vincenzo
201,luigi di lorenzo,"[""Pediatra""]",https://www.miodottore.it/luigi-di-lorenzo-2/p...,Napoli,5.0,Recensioni,"{""Visita pediatrica"": null, ""Bilancio di salut...","{""disease"": [""Disturbi Della Crescita"", ""Asma""...",Dr.,True,2,luigi di,lorenzo,lorenzo luigi di


Try Ollama LLM

In [210]:
# Install ollama
!pip install ollama



In [211]:
# Add the province column to location by using ollama
import ollama
from functools import lru_cache

# Model name
model_name = 'mistral'

# Optional: open log file (uncomment if you want file logging)
# log_file = open("province_extraction_log.txt", "w")

@lru_cache(maxsize=None)
def extract_province_of_commune(commune):
    """
    Extracts the Italian province for a given commune using an LLM via Ollama.
    Uses caching to avoid duplicate LLM calls.
    """
    prompt = (
        "Extract the province of Italy from the following commune name:\n"
        "For example, if the commune is 'Milano', the province is 'Milano'. "
        "If the commune is 'Neviano', the province is 'Lecce'.\n"
        "Just return the Italian province name. No explanation.\n"
        "Write the province name in Italian (e.g., Genova, Napoli, Roma).\n"
        "No full sentences, no parentheses, just the name.\n"
        f"Description: {commune}"
    )

    response = ollama.chat(
        model=model_name,
        messages=[{'role': 'user', 'content': prompt}]
    )

    raw_text = response['message']['content'].splitlines()[0].strip().upper()

    # Optional: log to file instead of printing
    # log_file.write(f"{commune} → {raw_text}\n")
    # log_file.flush()

    return raw_text




Merge a province name based on the commune name, using georef dataset (Already cleaned)

In [212]:
# Read the commune_province CSV file
commune_province = pd.read_csv('georef-italy-comune 2.csv')

# check the commune_province dataframe
commune_province['Official Name Comune']

# Merge a province name based on the commune name, using georef dataset (Already cleaned)
df_cleaned_nodup_count_eq_1 = pd.merge(
    df_cleaned_nodup_count_eq_1,
    commune_province[['Official Name Comune', 'Official Name Provincia/Città metropolitana']],
    left_on='location',
    right_on='Official Name Comune',
    how='left')

df_cleaned_nodup_count_gt_1 = pd.merge(
    df_cleaned_nodup_count_gt_1,
    commune_province[['Official Name Comune', 'Official Name Provincia/Città metropolitana']],
    left_on='location',
    right_on='Official Name Comune',
    how='left')

In [213]:
# Making province name column all uppercase
df_cleaned_nodup_count_eq_1['Official Name Provincia/Città metropolitana'] = df_cleaned_nodup_count_eq_1['Official Name Provincia/Città metropolitana'].str.upper()
df_cleaned_nodup_count_gt_1['Official Name Provincia/Città metropolitana'] = df_cleaned_nodup_count_gt_1['Official Name Provincia/Città metropolitana'].str.upper()

In [214]:
# Check the rows that have NaN in the province name column
df_cleaned_nodup_count_eq_1_Na = df_cleaned_nodup_count_eq_1[df_cleaned_nodup_count_eq_1['Official Name Provincia/Città metropolitana'].isna()]
df_cleaned_nodup_count_gt_1_Na = df_cleaned_nodup_count_gt_1[df_cleaned_nodup_count_gt_1['Official Name Provincia/Città metropolitana'].isna()]
df_cleaned_nodup_count_eq_1_not_Na = df_cleaned_nodup_count_eq_1[df_cleaned_nodup_count_eq_1['Official Name Provincia/Città metropolitana'].notna()]
df_cleaned_nodup_count_gt_1_not_Na = df_cleaned_nodup_count_gt_1[df_cleaned_nodup_count_gt_1['Official Name Provincia/Città metropolitana'].notna()]

from tqdm import tqdm

# Drop missing and get unique commune names
unique_communes = df_cleaned_nodup_count_eq_1_Na['location'].dropna().unique()

# Build a mapping
commune_to_province = {comm: extract_province_of_commune(comm) for comm in tqdm(unique_communes)}

# Apply the map to the DataFrame
df_cleaned_nodup_count_eq_1_Na['Official Name Provincia/Città metropolitana'] = df_cleaned_nodup_count_eq_1_Na['location'].map(commune_to_province)


100%|██████████| 1484/1484 [26:17<00:00,  1.06s/it] 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_nodup_count_eq_1_Na['Official Name Provincia/Città metropolitana'] = df_cleaned_nodup_count_eq_1_Na['location'].map(commune_to_province)


In [215]:
from tqdm import tqdm

# Drop missing and get unique commune names
unique_communes = df_cleaned_nodup_count_gt_1_Na['location'].dropna().unique()

# Build a mapping
commune_to_province = {comm: extract_province_of_commune(comm) for comm in tqdm(unique_communes)}

# Apply the map to the DataFrame
df_cleaned_nodup_count_gt_1_Na['Official Name Provincia/Città metropolitana'] = df_cleaned_nodup_count_gt_1_Na['location'].map(commune_to_province)


100%|██████████| 264/264 [00:10<00:00, 24.51it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_nodup_count_gt_1_Na['Official Name Provincia/Città metropolitana'] = df_cleaned_nodup_count_gt_1_Na['location'].map(commune_to_province)


In [216]:
df_cleaned_nodup_count_gt_1_Na

Unnamed: 0,name,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix,name_count,first_name,last_name,new_name,Official Name Comune,Official Name Provincia/Città metropolitana
0,ermanno baldo,"[""Allergologo, Immunologo, Pediatra""]",https://www.miodottore.it/ermanno-baldo/allerg...,Riva,5.0,Recensioni,"{""Prima visita allergologica"": ""120 €"", ""Prima...","{""expertIn"": [""Allergologia e immunologia clin...",Dott.,True,2,ermanno,baldo,baldo ermanno,,BRESCIA
3,vincenzo rosa,"[""Agopuntore, Omeopata, Pediatra""]",https://www.miodottore.it/vincenzo-rosa-3/pedi...,Canneto,,,"{""Prima visita pediatrica"": ""60 € - 100 €"", ""O...","{""expertIn"": [""Medicina olistica"", ""Medicina f...",Dott.,True,2,vincenzo,rosa,rosa vincenzo,,REGGIO CALABRIA
13,giuseppe recupero,"[""Pediatra""]",https://www.miodottore.it/giuseppe-recupero/pe...,Barcellona,,,{},{},Dr.,True,2,giuseppe,recupero,recupero giuseppe,,POTENZA
28,gaetano quaranta,"[""Pediatra di Libera Scelta, Allergologo, Pedi...",https://www.miodottore.it/gaetano-quaranta-2/p...,Piedimonte,5.0,Recensioni,"{""Visita pediatrica"": ""60 €"", ""Visita allergol...","{""disease"": [""Asma"", ""Eczema"", ""Tonsillite"", ""...",Dr.,True,3,gaetano,quaranta,quaranta gaetano,,PIEDIMONTE (PROVINCE IS NOT EXPLICITLY STATED ...
31,paola rocchi,"[""Pediatra""]",https://www.miodottore.it/paola-rocchi-2/pedia...,Caldana,5.0,Recensioni,{},{},Dr.,True,2,paola,rocchi,rocchi paola,,REGGIO EMILIA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3695,giuseppe romano,"[""Urologo""]",https://www.miodottore.it/giuseppe-romano-15/u...,Vibo,,,"{""Visita urologica"": null}","{""expertIn"": [""Urologia""], ""disease"": [""Cistit...",Dr.,True,5,giuseppe,romano,romano giuseppe,,VIBO VALENTIA
3697,giuseppe marino,"[""Urologo""]",https://www.miodottore.it/giuseppe-marino-14/u...,Mazara,,,"{""Visita urologica"": null, ""Ecografia"": null}","{""expertIn"": [""Urologia""], ""disease"": [""Colica...",,False,5,giuseppe,marino,marino giuseppe,,TRAPANI
3703,antonio romano,"[""Urologo""]",https://www.miodottore.it/antonio-romano-13/ur...,Piove,5.0,Recensioni,"{""Prima visita urologica"": ""140 €"", ""Uroflusso...",{},Dott.,True,2,antonio,romano,romano antonio,,VERONA
3704,giuseppe benincasa,"[""Urologo""]",https://www.miodottore.it/giuseppe-benincasa/u...,Vallo,,,{},{},,False,2,giuseppe,benincasa,benincasa giuseppe,,CUNEO


In [217]:
# Concatenate the two dataframes back together
df_cleaned_nodup_count_eq_1 = pd.concat([df_cleaned_nodup_count_eq_1_not_Na, df_cleaned_nodup_count_eq_1_Na], ignore_index=True)

In [218]:
# Concatenate the two dataframes back together
df_cleaned_nodup_count_gt_1 = pd.concat([df_cleaned_nodup_count_gt_1_not_Na, df_cleaned_nodup_count_gt_1_Na], ignore_index=True)

FNOMCEO Management and Cleaning (2)

In [221]:
# Split df_fnomceo
df_fnomceo['name_count'] = df_fnomceo.groupby('name')['name'].transform('count')

In [222]:
# Get all columns displayed (max columns)
pd.set_option('display.max_columns', None)
df_fnomceo['physician_register_place']

0         SALERNO
1          VARESE
2          NAPOLI
3          RAGUSA
4           LUCCA
           ...   
552297        NaN
552298        NaN
552299    BERGAMO
552300    BERGAMO
552301    BERGAMO
Name: physician_register_place, Length: 552302, dtype: object

In [223]:
# If df_fnomceo do not have physician_register_place, then we will use the dentist_register_place
df_fnomceo['physician_register_place'] = df_fnomceo['physician_register_place'].fillna(df_fnomceo['dentist_register_place'])

In [224]:
# Divide the data into two parts, one with name_count > 1 and one with name_count = 1
df_fnomceo_count_gt_1 = df_fnomceo[df_fnomceo['name_count'] > 1]
df_fnomceo_count_eq_1 = df_fnomceo[df_fnomceo['name_count'] == 1]

In [225]:
# Merge the df_cleaned_nodup_count_eq_1 and df_fnomceo_count_eq_1 on the name column
df_merged_1 = pd.merge(df_cleaned_nodup_count_eq_1, df_fnomceo_count_eq_1, left_on='new_name', right_on= 'name', how='inner')
# Merge the df_cleaned_nodup_count_eq_1 and df_fnomceo_gt_1 on the name column and matched location
df_merged_2 = pd.merge(df_cleaned_nodup_count_eq_1, df_fnomceo_count_gt_1, 
                       left_on=['new_name', 'Official Name Provincia/Città metropolitana'],
                       right_on=['name', 'physician_register_place'],
                       how='inner')
# Merge the df_cleaned_nodup_count_gt_1 and df_fnomceo on the name column and matched location
df_merged_3 = pd.merge(df_cleaned_nodup_count_gt_1, df_fnomceo_count_eq_1,
                       left_on=['new_name', 'Official Name Provincia/Città metropolitana'],
                       right_on=['name', 'physician_register_place'],
                       how='inner')
df_merged_4 = pd.merge(df_cleaned_nodup_count_gt_1, df_fnomceo_count_gt_1,
                       left_on=['new_name', 'Official Name Provincia/Città metropolitana'],
                       right_on=['name', 'physician_register_place'],
                       how='inner')

In [227]:
# Look at the df_merged_1 and df_merged_2 again but this time change how to left to see the rows that are not matched
df_merged_1_left = pd.merge(df_cleaned_nodup_count_eq_1, df_fnomceo_count_eq_1, left_on='new_name', right_on='name', how='left')
df_merged_2_left = pd.merge(df_cleaned_nodup_count_eq_1, df_fnomceo_count_gt_1,
                            left_on=['new_name', 'Official Name Provincia/Città metropolitana'],
                            right_on=['name', 'physician_register_place'],
                            how='left')

In [234]:
# Observe df_merged_1_left
df_unmatched_name = df_merged_1_left[df_merged_1_left['name_y'].notna()]  # Show only rows where name is NaN

- Some people have middle name in fnomceo but not from the miodottore website, so we need to merge them based on the first name and last name
- sometimes name has ', for example ali', so we need to remove it

In [235]:
# Recall the df_fnomceo_eq_1 and re-clean it again
# Consider these 41170 rows in fnomceo_eq_1 
name_matched = [name for name in df_unmatched_name['new_name'].unique()]
df_fnomceo_count_eq_1_unmatched_name = df_fnomceo_count_eq_1[df_fnomceo_count_eq_1['name'].isin(name_matched) == False]
df_fnomceo_count_eq_1_unmatched_name

Unnamed: 0,index,fnomceo_id,name,sex,birthdate,age,birthplace,birth_province,data_date,profession,is_active,physician_qualification_place,physician_qualification_date,physician_register_place,physician_register_number,physician_register_date,graduations_count,graduations_faculty_1,graduations_date_1,graduations_place_1,residencies_count,residencies_residency_1,residencies_place_1,residencies_date_1,special_attributes_count,special_attributes_attribute_1,special_attributes_date_1,dentist_register_place,dentist_register_number,dentist_register_date,residencies_residency_2,residencies_place_2,residencies_date_2,dentist_qualification_place,dentist_qualification_date,residencies_residency_3,residencies_place_3,residencies_date_3,residencies_residency_4,residencies_place_4,residencies_date_4,special_attributes_attribute_2,special_attributes_date_2,special_attributes_attribute_3,special_attributes_date_3,residencies_residency_5,residencies_place_5,residencies_date_5,graduations_faculty_2,graduations_date_2,graduations_place_2,special_attributes_attribute_4,special_attributes_date_4,residencies_residency_6,residencies_place_6,residencies_date_6,special_attributes_attribute_5,special_attributes_date_5,special_attributes_attribute_6,special_attributes_date_6,residencies_residency_7,residencies_place_7,residencies_date_7,residencies_residency_8,residencies_place_8,residencies_date_8,residencies_residency_9,residencies_place_9,residencies_date_9,name_count
1,1,21.0,tamborini emanuela,F,1963-03-28,59.0,VARESE,VA,2023-01-02,Medico,True,PAVIA,1989-01-01,VARESE,4366.0,1989-06-20,1.0,MEDICINA E CHIRURGIA,1988-12-16,PAVIA,1.0,PEDIATRIA,PAVIA,1993-05-25,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2,2,22.0,di fronzo antonio,M,1954-06-26,68.0,SANT'ANTIMO,,2022-12-23,Medico,True,NAPOLI,1992-01-01,NAPOLI,26132.0,1992-06-29,1.0,MEDICINA E CHIRURGIA,1992-04-09,NAPOLI SECONDA UNIVERSITA,1.0,FONIATRIA,NAPOLI,1998-11-06,1.0,MEDICO DI MEDICINA GENERALE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
3,3,23.0,ruta giovanni,M,1960-08-18,62.0,MODICA,RG,2022-12-19,Medico,True,CATANIA,1986-02-01,RAGUSA,1660.0,1987-01-30,1.0,MEDICINA E CHIRURGIA,1986-10-27,CATANIA,1.0,TISIOLOGIA E MALATTIE DELL'APPARATO RESPIRATORIO,CATANIA,1991-12-02,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
5,5,25.0,fiocca federico,M,1975-12-25,47.0,VERONA,VR,2022-12-29,Medico,True,VERONA,2002-01-01,VERONA,7174.0,2002-09-02,1.0,MEDICINA E CHIRURGIA,2001-07-13,VERONA,0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
8,8,28.0,coggi berzetti di buronzo giorgio paolo,M,1953-12-12,69.0,TORINO,TO,2022-12-29,Medico,True,TORINO,1979-02-01,TORINO,10648.0,1980-01-29,1.0,MEDICINA E CHIRURGIA,1979-07-19,TORINO,1.0,OFTALMOLOGIA,TORINO,1983-07-05,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
552294,552299,614421.0,caracausi antonio,M,1991-04-28,31.0,BARGA,LU,2023-01-03,Odontoiatra,True,,,LUCCA,,,1.0,ODONTOIATRIA E PROTESI DENTARIA,2022-09-20,PISA,0.0,,,,0.0,,,LUCCA,618.0,2022-12-14,,,,PISA,2022-02-01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
552296,552301,614423.0,berbenni christian,M,1992-09-27,30.0,SEGRATE,MI,2023-01-03,Odontoiatra,True,,,BERGAMO,,,1.0,ODONTOIATRIA E PROTESI DENTARIA,2022-10-12,BRESCIA,0.0,,,,0.0,,,BERGAMO,1375.0,2022-12-22,,,,BRESCIA,2022-02-01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
552298,552303,614425.0,aresi nicola,M,1995-12-22,27.0,BERGAMO,BG,2023-01-03,Odontoiatra,True,BRESCIA,2022-02-01,BERGAMO,,,1.0,ODONTOIATRIA E PROTESI DENTARIA,2022-10-12,BRESCIA,0.0,,,,0.0,,,BERGAMO,1376.0,2022-12-22,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
552299,552304,614426.0,pozzoni elena,F,1997-02-11,25.0,BRESCIA,BS,2023-01-03,Medico,True,,,BERGAMO,8543.0,2022-12-22,1.0,MEDICINA E CHIRURGIA,2022-12-13,BRESCIA,0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1


In [239]:
# We will clean this by cut everything in the name that is not a letter or a space
df_fnomceo_count_eq_1_unmatched_name['name'] = df_fnomceo_count_eq_1_unmatched_name['name'].str.replace(r'[^a-zA-Z\s]', '', regex=True)
# Lowercase the name
df_fnomceo_count_eq_1_unmatched_name['name'] = df_fnomceo_count_eq_1_unmatched_name['name'].str.lower().str.strip()  # Lowercase and strip
# Remove all the spaces at the end
df_fnomceo_count_eq_1_unmatched_name['name'] = df_fnomceo_count_eq_1_unmatched_name['name'].str.strip()  # Strip spaces

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fnomceo_count_eq_1_unmatched_name['name'] = df_fnomceo_count_eq_1_unmatched_name['name'].str.replace(r'[^a-zA-Z\s]', '', regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fnomceo_count_eq_1_unmatched_name['name'] = df_fnomceo_count_eq_1_unmatched_name['name'].str.lower().str.strip()  # Lowercase and strip
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-

In [241]:
# We will merge this to create df_merged_5
df_merged_5 = pd.merge(df_cleaned_nodup_count_eq_1, df_fnomceo_count_eq_1_unmatched_name,
                       left_on='new_name',
                       right_on='name',
                       how='inner')
df_merged_5

Unnamed: 0,name_x,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix,name_count_x,first_name,last_name,new_name,Official Name Comune,Official Name Provincia/Città metropolitana,index,fnomceo_id,name_y,sex,birthdate,age,birthplace,birth_province,data_date,profession,is_active,physician_qualification_place,physician_qualification_date,physician_register_place,physician_register_number,physician_register_date,graduations_count,graduations_faculty_1,graduations_date_1,graduations_place_1,residencies_count,residencies_residency_1,residencies_place_1,residencies_date_1,special_attributes_count,special_attributes_attribute_1,special_attributes_date_1,dentist_register_place,dentist_register_number,dentist_register_date,residencies_residency_2,residencies_place_2,residencies_date_2,dentist_qualification_place,dentist_qualification_date,residencies_residency_3,residencies_place_3,residencies_date_3,residencies_residency_4,residencies_place_4,residencies_date_4,special_attributes_attribute_2,special_attributes_date_2,special_attributes_attribute_3,special_attributes_date_3,residencies_residency_5,residencies_place_5,residencies_date_5,graduations_faculty_2,graduations_date_2,graduations_place_2,special_attributes_attribute_4,special_attributes_date_4,residencies_residency_6,residencies_place_6,residencies_date_6,special_attributes_attribute_5,special_attributes_date_5,special_attributes_attribute_6,special_attributes_date_6,residencies_residency_7,residencies_place_7,residencies_date_7,residencies_residency_8,residencies_place_8,residencies_date_8,residencies_residency_9,residencies_place_9,residencies_date_9,name_count_y
0,sergio sano,"[""Pediatra""]",https://www.miodottore.it/sergio-sano/pediatra...,Cartura,,,{},{},,False,1,sergio,sano,sano sergio,Cartura,PADOVA,242877,261259.0,sano sergio,M,1942-04-01,80.0,FIRENZE,FI,2022-12-29,Medico,True,PADOVA,1969-01-01,PADOVA,2214.0,1969-06-24,1.0,MEDICINA E CHIRURGIA,1968-07-22,PADOVA,1.0,CLINICA PEDIATRICA,PADOVA,1979-07-13,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
1,bruno romano,"[""Pediatra, Omeopata""]",https://www.miodottore.it/bruno-romano-4/pedia...,Roma,5.0,Recensioni,"{""Prima visita omeopatica"": ""140 €"", ""Prima vi...","{""expertIn"": [""Omeopatia""], ""disease"": [""Distu...",Dr.,True,1,bruno,romano,romano bruno,Roma,ROMA,436486,490841.0,romano bruno,M,1986-06-29,36.0,VIBO VALENTIA,VV,2022-12-22,Medico,True,"ROMA ""UNIVERSITA CATTOLICA SACRO CUORE""",2011-01-01,ROMA,68187.0,2021-10-19,1.0,MEDICINA E CHIRURGIA,2011-07-21,"ROMA ""UNIVERSITA CATTOLICA SACRO CUORE""",0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2,nicolo salvatore titone,"[""Pneumologo, Internista, Medico di medicina g...",https://www.miodottore.it/nicolo-salvatore-tit...,Alcamo,,,{},{},Dr.,True,1,nicolo salvatore,titone,titone nicolo salvatore,Alcamo,TRAPANI,381481,410391.0,titone nicolo salvatore,M,1960-12-13,62.0,CASTELVETRANO,TP,2022-12-22,Medico,True,PALERMO,1987-02-01,TRAPANI,2303.0,1988-02-08,1.0,MEDICINA E CHIRURGIA,1987-11-07,PALERMO,1.0,TISIOLOGIA E MALATTIE DELL'APPARATO RESPIRATORIO,CATANIA,1992-11-30,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
3,walter orru,"[""Psicoterapeuta, Psichiatra""]",https://www.miodottore.it/walter-orru/psicoter...,Bergamo,5.0,Recensioni,"{""Visita psichiatrica"": ""120 €"", ""Ciclo di psi...","{""expertIn"": [""Psicoterapia""], ""disease"": [""An...",Dr.,True,1,walter,orru,orru walter,Bergamo,BERGAMO,184398,198348.0,orru walter,M,1955-12-05,67.0,ORROLI,SU,2023-01-02,Medico,True,CAGLIARI,,CAGLIARI,4729.0,1984-05-29,1.0,MEDICINA E CHIRURGIA,1984-03-27,CAGLIARI,1.0,PSICHIATRIA,CAGLIARI,1990-12-20,1.0,MEDICO PSICOTERAPEUTA,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
4,francesco luca,"[""Psicologo, Psicoterapeuta, Psicologo clinico""]",https://www.miodottore.it/francesco-luca/psico...,Azzate,5.0,Recensioni,"{""Colloquio psicologico"": ""63 €"", ""Colloquio p...","{""expertIn"": [""Psicoterapia"", ""Psiconcologia"",...",Dott.,True,1,francesco,luca,luca francesco,Azzate,VARESE,259471,279045.0,luca francesco,M,1950-10-15,72.0,ROMA,RM,2022-12-23,Medico,True,"ROMA ""LA SAPIENZA""",1975-01-01,ROMA,21466.0,1975-02-25,1.0,MEDICINA E CHIRURGIA,1974-07-19,"ROMA ""LA SAPIENZA""",2.0,MEDICINA AERONAUTICA E SPAZIALE,"ROMA ""LA SAPIENZA""",1981-03-23,0.0,,,,,,RADIOLOGIA,"ROMA ""LA SAPIENZA""",1979-03-16,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,valchiria do,"[""Cardiologo""]",https://www.miodottore.it/valchiria-do/cardiol...,Castello,,,{},{},Dr.,True,1,valchiria,do,do valchiria,,TORINO,95897,103225.0,do valchiria,F,1952-05-31,70.0,LUSSEMBURGO,EE,2023-01-02,Medico,True,PERUGIA,1978-02-01,PERUGIA,2917.0,1978-12-21,1.0,MEDICINA E CHIRURGIA,1978-07-24,PERUGIA,1.0,MALATTIE APPARATO CARDIOVASCOLARE,PERUGIA,1981-07-15,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
108,maria concetta giofre,"[""Cardiologo""]",https://www.miodottore.it/maria-concetta-giofr...,Reggio,,,"{""Consulenza online"": ""50 €"", ""Visita cardiolo...","{""expertIn"": [""Medicina interna""], ""disease"": ...",Dott.ssa,True,1,maria concetta,giofre,giofre maria concetta,,CALABRIA,432551,486866.0,giofre maria concetta,F,1986-09-22,36.0,POLISTENA,RC,2023-01-03,Medico,True,MESSINA,2011-02-01,VIBO VALENTIA,1399.0,2012-03-05,1.0,MEDICINA E CHIRURGIA,2011-07-20,MESSINA,0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
109,domenico paternico,"[""Chirurgo generale""]",https://www.miodottore.it/domenico-paternico/c...,Cernusco,,,{},{},Dott.,True,1,domenico,paternico,paternico domenico,,MONZA,234623,252388.0,paternico domenico,M,1969-06-14,53.0,PIAZZA ARMERINA,EN,2022-12-22,Medico,True,"ROMA ""UNIVERSITA CATTOLICA SACRO CUORE""",1994-02-01,ENNA,1452.0,1994-12-27,1.0,MEDICINA E CHIRURGIA,1994-10-26,"ROMA ""UNIVERSITA CATTOLICA SACRO CUORE""",1.0,CHIRURGIA GENERALE,"ROMA ""UNIVERSITA CATTOLICA SACRO CUORE""",1999-11-05,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
110,andrea coli,"[""Terapeuta""]",https://www.miodottore.it/andrea-coli/terapeut...,San,5.0,Recensioni,"{""Laser terapia"": ""20 €"", ""Tecar terapia"": ""40...","{""disease"": [""Cervicalgia"", ""Disturbi del movi...",Dott.,True,1,andrea,coli,coli andrea,,CUNEO,135834,146130.0,coli andrea,M,1956-08-02,66.0,ASCOLI PICENO,AP,2022-12-30,Medico,True,"BOLOGNA ""ALMA MATER STUDIORUM""",1981-02-01,BOLOGNA,9436.0,1982-09-14,1.0,MEDICINA E CHIRURGIA,1981-12-19,"BOLOGNA ""ALMA MATER STUDIORUM""",1.0,ANESTESIA E RIANIMAZIONE,"BOLOGNA ""ALMA MATER STUDIORUM""",1984-07-10,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1


In [247]:
# Next, we will clean the df_fnomceo_count_gt_1_unmatched_name again by delete the middle name 
# Cut the last chunk of the name after the last space
df_fnomceo_count_eq_1_unmatched_name_2 = df_fnomceo_count_eq_1_unmatched_name.copy()
df_fnomceo_count_eq_1_unmatched_name_2['name'] = df_fnomceo_count_eq_1_unmatched_name['name'].str.split(' ').str[:-1].str.join(' ')

In [253]:
# Create df_merged_6 by merging df_cleaned_nodup_count_gt_1 and df_fnomceo_count_eq_1_unmatched_name_2
df_merged_6 = pd.merge(df_cleaned_nodup_count_gt_1, df_fnomceo_count_eq_1_unmatched_name_2,
                       left_on='new_name',
                       right_on='name',
                       how='inner')

In [255]:
df_merged_6.drop_duplicates(subset=['name_x'], keep='first', inplace=True)  # Remove duplicates based on URL
df_merged_6

Unnamed: 0,name_x,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix,name_count_x,first_name,last_name,new_name,Official Name Comune,Official Name Provincia/Città metropolitana,index,fnomceo_id,name_y,sex,birthdate,age,birthplace,birth_province,data_date,profession,is_active,physician_qualification_place,physician_qualification_date,physician_register_place,physician_register_number,physician_register_date,graduations_count,graduations_faculty_1,graduations_date_1,graduations_place_1,residencies_count,residencies_residency_1,residencies_place_1,residencies_date_1,special_attributes_count,special_attributes_attribute_1,special_attributes_date_1,dentist_register_place,dentist_register_number,dentist_register_date,residencies_residency_2,residencies_place_2,residencies_date_2,dentist_qualification_place,dentist_qualification_date,residencies_residency_3,residencies_place_3,residencies_date_3,residencies_residency_4,residencies_place_4,residencies_date_4,special_attributes_attribute_2,special_attributes_date_2,special_attributes_attribute_3,special_attributes_date_3,residencies_residency_5,residencies_place_5,residencies_date_5,graduations_faculty_2,graduations_date_2,graduations_place_2,special_attributes_attribute_4,special_attributes_date_4,residencies_residency_6,residencies_place_6,residencies_date_6,special_attributes_attribute_5,special_attributes_date_5,special_attributes_attribute_6,special_attributes_date_6,residencies_residency_7,residencies_place_7,residencies_date_7,residencies_residency_8,residencies_place_8,residencies_date_8,residencies_residency_9,residencies_place_9,residencies_date_9,name_count_y
0,marco greco,"[""Pediatra""]",https://www.miodottore.it/marco-greco/pediatra...,Firenze,5.0,Recensioni,"{""Visita pediatrica"": ""80 €"", ""Visita a domici...","{""disease"": [""Faringite"", ""Otite"", ""Convulsion...",Dr.,True,2,marco,greco,greco marco,Firenze,FIRENZE,329189,354108.0,greco marco,M,1981-11-24,41.0,CONVERSANO,BA,2023-01-01,Medico,True,MODENA,2006-02-01,BOLOGNA,16366.0,2010-11-23,1.0,MEDICINA E CHIRURGIA,2006-10-25,MODENA,1.0,OTORINOLARINGOIATRIA,FERRARA,2017-07-05,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2,giuseppe leone,"[""Pediatra""]",https://www.miodottore.it/giuseppe-leone-8/ped...,Barletta,,,{},{},,False,2,giuseppe,leone,leone giuseppe,Barletta,BARLETTA-ANDRIA-TRANI,27291,29420.0,leone giuseppe,M,1937-09-15,85.0,PISTICCI,MT,2023-01-02,Odontoiatra,True,PISA,1973-01-01,PESARO-URBINO,801.0,1973-11-19,1.0,MEDICINA E CHIRURGIA,1972-04-28,PISA,1.0,ODONTOIATRIA E PROTESI DENTARIA,PISA,1979-07-16,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
4,giuseppe esposito,"[""Pediatra di Libera Scelta, Pediatra""]",https://www.miodottore.it/giuseppe-esposito-10...,Bedonia,,,"{""Prima visita pediatrica"": null, ""Visita per ...","{""school"": [""Laurea in Medicina e Chirurgia pr...",Dr.,True,4,giuseppe,esposito,esposito giuseppe,Bedonia,PARMA,354626,381514.0,esposito giuseppe,M,1967-02-12,55.0,MILANO,MI,2023-01-03,Odontoiatra,True,,,MILANO,,,1.0,ODONTOIATRIA E PROTESI DENTARIA,1993-03-02,MILANO,0.0,,,,0.0,,,MILANO,1847.0,1993-07-19,,,,MILANO,1993-01-01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
5,vincenzo caruso,"[""Pediatra""]",https://www.miodottore.it/vincenzo-caruso-4/pe...,Benevento,5.0,Recensioni,{},{},,False,2,vincenzo,caruso,caruso vincenzo,Benevento,BENEVENTO,93172,100304.0,caruso vincenzo,M,1953-10-07,69.0,TREMESTIERI ETNEO,CT,2022-12-30,Medico,True,CATANIA,1979-01-01,CATANIA,5980.0,1979-05-03,1.0,MEDICINA E CHIRURGIA,1979-04-03,CATANIA,3.0,EMATOLOGIA,CATANIA,1993-11-06,0.0,,,,,,PEDIATRIA,CATANIA,1987-07-10,,,CARDIOLOGIA,CATANIA,1983-07-20,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
8,marta ferrari,"[""Pediatra, Endocrinologo, Diabetologo""]",https://www.miodottore.it/marta-ferrari-5/pedi...,Firenze,5.0,Recensioni,"{""Prima visita endocrinologica"": ""100 €"", ""Pri...","{""disease"": [""Obesità"", ""Sovrappeso"", ""Diabete...",Dott.ssa,True,5,marta,ferrari,ferrari marta,Firenze,FIRENZE,458744,515593.0,ferrari marta,F,1989-04-03,33.0,MILANO,MI,2023-01-03,Medico,True,MILANO,2014-02-01,MILANO,43921.0,2015-02-17,1.0,MEDICINA E CHIRURGIA,2014-07-23,MILANO,1.0,ORTOPEDIA E TRAUMATOLOGIA,MILANO,2021-11-11,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1118,giovanni rizzo,"[""Medico dello sport""]",https://www.miodottore.it/giovanni-rizzo-5/med...,Cellino,4.5,30.0,"{""Visita Medico Sportiva"": null, ""Visita medic...","{""disease"": [""Contrattura"", ""Articolazione"", ""...",Dr.,True,2,giovanni,rizzo,rizzo giovanni,,POTENZA,281128,302365.0,rizzo giovanni,M,1955-09-26,67.0,MONTERONI DI LECCE,LE,2023-01-02,Medico,True,PARMA,1982-02-01,LECCE,3589.0,1988-04-21,1.0,MEDICINA E CHIRURGIA,1982-11-25,PARMA,1.0,PEDIATRIA,PARMA,1987-06-11,1.0,FITOTERAPIA,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
1138,michele cavallo,"[""Medico estetico, Medico di medicina generale...",https://www.miodottore.it/michele-cavallo-3/me...,Marano,5.0,63.0,"{""Visita medica generica in CONVENZIONE"": ""Da ...","{""expertIn"": [""Medicina generale""], ""disease"":...",Dr.,True,3,michele,cavallo,cavallo michele,,MARANO (PROVINCE IS NOT SPECIFIED AT THE COMMU...,227968,245251.0,cavallo michele,M,1958-04-22,64.0,BOLOGNA,BO,2022-12-30,Medico,True,"BOLOGNA ""ALMA MATER STUDIORUM""",1984-01-01,BOLOGNA,10191.0,1984-11-13,1.0,MEDICINA E CHIRURGIA,1984-07-19,"BOLOGNA ""ALMA MATER STUDIORUM""",0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
1147,antonio romano,"[""Gastroenterologo, Epatologo""]",https://www.miodottore.it/antonio-romano-3/gas...,San,5.0,10.0,"{""Prima visita gastroenterologica"": ""200 €"", ""...","{""expertIn"": [""Gastroenterologia""], ""disease"":...",Dr.,True,2,antonio,romano,romano antonio,,CUNEO,212259,228334.0,romano antonio,M,1949-09-20,73.0,ANZI,PZ,2013-10-15,Medico,False,NAPOLI,1980-01-01,,,,1.0,MEDICINA E CHIRURGIA,1980-06-30,NAPOLI,0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
1171,elisa colombo,"[""Nutrizionista""]",https://www.miodottore.it/elisa-colombo-5/nutr...,Paderno,5.0,Recensioni,"{""Prima visita nutrizionale"": ""120 €"", ""Prima ...","{""expertIn"": [""Scienze dell'alimentazione uman...",Dott.ssa,True,2,elisa,colombo,colombo elisa,,COMO,324955,349547.0,colombo elisa,F,1959-08-09,63.0,PONTIROLO NUOVO,BG,2023-01-03,Medico,True,MILANO,1985-02-01,BERGAMO,3812.0,1986-01-27,1.0,MEDICINA E CHIRURGIA,1985-11-13,MILANO,1.0,PSICHIATRIA,MILANO,1989-07-21,1.0,MEDICO PSICOTERAPEUTA,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1


In [None]:
# Sanity check (1)
df_merged_2_ab = pd.merge(df_cleaned_nodup_count_eq_1, df_fnomceo_count_gt_1,
                          left_on= 'new_name',
                          right_on='name',
                            how='inner')
df_merged_2_ab.drop_duplicates(subset=['name_x'], keep='first', inplace=True)  # Remove duplicates based on URL
df_merged_2_ab

Unnamed: 0,name_x,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix,name_count_x,first_name,last_name,new_name,Official Name Comune,Official Name Provincia/Città metropolitana,index,fnomceo_id,name_y,sex,birthdate,age,birthplace,birth_province,data_date,profession,is_active,physician_qualification_place,physician_qualification_date,physician_register_place,physician_register_number,physician_register_date,graduations_count,graduations_faculty_1,graduations_date_1,graduations_place_1,residencies_count,residencies_residency_1,residencies_place_1,residencies_date_1,special_attributes_count,special_attributes_attribute_1,special_attributes_date_1,dentist_register_place,dentist_register_number,dentist_register_date,residencies_residency_2,residencies_place_2,residencies_date_2,dentist_qualification_place,dentist_qualification_date,residencies_residency_3,residencies_place_3,residencies_date_3,residencies_residency_4,residencies_place_4,residencies_date_4,special_attributes_attribute_2,special_attributes_date_2,special_attributes_attribute_3,special_attributes_date_3,residencies_residency_5,residencies_place_5,residencies_date_5,graduations_faculty_2,graduations_date_2,graduations_place_2,special_attributes_attribute_4,special_attributes_date_4,residencies_residency_6,residencies_place_6,residencies_date_6,special_attributes_attribute_5,special_attributes_date_5,special_attributes_attribute_6,special_attributes_date_6,residencies_residency_7,residencies_place_7,residencies_date_7,residencies_residency_8,residencies_place_8,residencies_date_8,residencies_residency_9,residencies_place_9,residencies_date_9,name_count_y
0,angela russo,"[""Pediatra""]",https://www.miodottore.it/angela-russo-2/pedia...,Galatone,,,{},{},,False,1,angela,russo,russo angela,Galatone,LECCE,8743,9464.0,russo angela,F,1980-10-29,42.0,BOLOGNA,BO,2022-12-30,Odontoiatra,True,,,BOLOGNA,,,1.0,ODONTOIATRIA E PROTESI DENTARIA,2004-11-09,"BOLOGNA ""ALMA MATER STUDIORUM""",0.0,,,,0.0,,,BOLOGNA,1556.0,2008-04-15,,,,"BOLOGNA ""ALMA MATER STUDIORUM""",2004-02-01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7
7,marco maria mariani,"[""Pediatra, Allergologo""]",https://www.miodottore.it/marco-maria-mariani-...,Arezzo,3.5,Recensioni,"{""Visita pediatrica"": ""Da 80 €"", ""Visita aller...","{""disease"": [""Disturbi Della Crescita"", ""Eczem...",,False,1,marco maria,mariani,mariani marco maria,Arezzo,AREZZO,85805,92390.0,mariani marco maria,M,1954-06-01,68.0,AREZZO,AR,2022-12-30,Medico,True,FIRENZE,1980-01-01,AREZZO,1209.0,1980-05-31,1.0,MEDICINA E CHIRURGIA,1980-03-25,FIRENZE,2.0,ALLERGOLOGIA ED IMMUNOLOGIA CLINICA,FIRENZE,1989-09-27,0.0,,,,,,CLINICA PEDIATRICA,FIRENZE,1983-07-13,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
9,maria grazia giuliani,"[""Pediatra""]",https://www.miodottore.it/maria-grazia-giulian...,Rho,,,"{""Visita pediatrica"": null, ""Boel test"": null,...","{""disease"": [""Disturbi Della Crescita"", ""Asma""...",Dr.,True,1,maria grazia,giuliani,giuliani maria grazia,Rho,MILANO,3817,4132.0,giuliani maria grazia,F,1962-09-10,60.0,TRAVEDONA-MONATE,VA,2023-01-02,Medico,True,MILANO,1988-01-01,MILANO,28408.0,1988-06-24,1.0,MEDICINA E CHIRURGIA,1988-03-15,MILANO,0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4
13,michela giovannini,"[""Pediatra di Libera Scelta, Pediatra""]",https://www.miodottore.it/michela-giovannini/p...,Argenta,,,"{""Prima visita pediatrica"": null, ""Visita pedi...",{},Dott.ssa,True,1,michela,giovannini,giovannini michela,Argenta,FERRARA,234296,252033.0,giovannini michela,F,1970-05-19,52.0,CANTU',CO,2022-12-29,Medico,True,PADOVA,1997-01-01,PADOVA,8267.0,1997-06-18,1.0,MEDICINA E CHIRURGIA,1996-10-15,PADOVA,0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
15,felice perillo,"[""Pediatra""]",https://www.miodottore.it/felice-perillo/pedia...,Marigliano,5.0,Recensioni,"{""Visita pediatrica"": null, ""Boel test"": null,...","{""disease"": [""Disturbi Della Crescita"", ""Asma""...",,False,1,felice,perillo,perillo felice,Marigliano,NAPOLI,291101,313157.0,perillo felice,M,1952-12-16,70.0,MARIGLIANO,,2022-12-22,Medico,True,NAPOLI,1979-01-01,NAPOLI,15661.0,1979-09-27,1.0,MEDICINA E CHIRURGIA,1979-07-26,NAPOLI,1.0,PEDIATRIA,NAPOLI,1984-07-26,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22214,gennaro romano,"[""Urologo""]",https://www.miodottore.it/gennaro-romano/urolo...,Giugliano,,,"{""Visita urologica"": null}",{},Prof.,True,1,gennaro,romano,romano gennaro,,SALERNO,254004,273175.0,romano gennaro,M,1953-10-20,69.0,POLLENA TROCCHIA,,2022-12-22,Medico,True,NAPOLI,1978-02-01,NAPOLI,15303.0,1979-01-30,1.0,MEDICINA E CHIRURGIA,1978-12-13,NAPOLI,1.0,UROLOGIA,NAPOLI,1984-10-27,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
22216,antonio florio,"[""Urologo""]",https://www.miodottore.it/antonio-florio/urolo...,San,,,{},{},,False,1,antonio,florio,florio antonio,,CUNEO,232093,249668.0,florio antonio,M,1953-01-01,69.0,SAN GIUSEPPE VESUVIANO,,2022-12-22,Medico,True,NAPOLI,1979-01-01,NAPOLI,15607.0,1979-09-27,1.0,MEDICINA E CHIRURGIA,1979-07-26,NAPOLI,2.0,UROLOGIA,NAPOLI,1988-06-30,0.0,,,,,,MEDICINA DELLO SPORT,NAPOLI,1983-07-27,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
22218,pierpaolo manca,"[""Andrologo, Urologo""]",https://www.miodottore.it/pierpaolo-manca-2/an...,Quartu,,,{},{},,False,1,pierpaolo,manca,manca pierpaolo,,SASSARI,216851,233297.0,manca pierpaolo,M,1947-02-15,75.0,PATTADA,SS,2022-12-16,Medico,True,PARMA,,SASSARI,1442.0,1974-03-07,1.0,MEDICINA E CHIRURGIA,1973-11-22,PARMA,0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
22220,michele gentile,"[""Urologo""]",https://www.miodottore.it/michele-gentile/urol...,Rionero,,,{},{},Dr.,True,1,michele,gentile,gentile michele,,POTENZA,159036,171102.0,gentile michele,M,1960-06-15,62.0,SALERNO,SA,2023-01-02,Medico,True,"ROMA ""LA SAPIENZA""",1995-01-01,POTENZA,2598.0,1995-07-05,1.0,MEDICINA E CHIRURGIA,1994-12-06,"ROMA ""LA SAPIENZA""",1.0,UROLOGIA,"ROMA ""LA SAPIENZA""",2000-11-08,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5


In [None]:
# Now observe df_merged_2_left
df_unmatched_name_2 = df_merged_2_left[df_merged_2_left['name_y'].notna()]  # Show only rows where name is NaN
df_unmatched_name_2 

Unnamed: 0,name_x,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix,name_count_x,first_name,last_name,new_name,Official Name Comune,Official Name Provincia/Città metropolitana,index,fnomceo_id,name_y,sex,birthdate,age,birthplace,birth_province,data_date,profession,is_active,physician_qualification_place,physician_qualification_date,physician_register_place,physician_register_number,physician_register_date,graduations_count,graduations_faculty_1,graduations_date_1,graduations_place_1,residencies_count,residencies_residency_1,residencies_place_1,residencies_date_1,special_attributes_count,special_attributes_attribute_1,special_attributes_date_1,dentist_register_place,dentist_register_number,dentist_register_date,residencies_residency_2,residencies_place_2,residencies_date_2,dentist_qualification_place,dentist_qualification_date,residencies_residency_3,residencies_place_3,residencies_date_3,residencies_residency_4,residencies_place_4,residencies_date_4,special_attributes_attribute_2,special_attributes_date_2,special_attributes_attribute_3,special_attributes_date_3,residencies_residency_5,residencies_place_5,residencies_date_5,graduations_faculty_2,graduations_date_2,graduations_place_2,special_attributes_attribute_4,special_attributes_date_4,residencies_residency_6,residencies_place_6,residencies_date_6,special_attributes_attribute_5,special_attributes_date_5,special_attributes_attribute_6,special_attributes_date_6,residencies_residency_7,residencies_place_7,residencies_date_7,residencies_residency_8,residencies_place_8,residencies_date_8,residencies_residency_9,residencies_place_9,residencies_date_9,name_count_y
2,angela russo,"[""Pediatra""]",https://www.miodottore.it/angela-russo-2/pedia...,Galatone,,,{},{},,False,1,angela,russo,russo angela,Galatone,LECCE,361293.0,388695.0,russo angela,F,1957-10-02,65.0,LECCE,LE,2023-01-02,Medico,True,PARMA,1982-02-01,LECCE,3592.0,1983-01-17,1.0,MEDICINA E CHIRURGIA,1982-11-09,PARMA,1.0,PEDIATRIA,PARMA,1986-06-19,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.0
30,marco maria mariani,"[""Pediatra, Allergologo""]",https://www.miodottore.it/marco-maria-mariani-...,Arezzo,3.5,Recensioni,"{""Visita pediatrica"": ""Da 80 €"", ""Visita aller...","{""disease"": [""Disturbi Della Crescita"", ""Eczem...",,False,1,marco maria,mariani,mariani marco maria,Arezzo,AREZZO,85805.0,92390.0,mariani marco maria,M,1954-06-01,68.0,AREZZO,AR,2022-12-30,Medico,True,FIRENZE,1980-01-01,AREZZO,1209.0,1980-05-31,1.0,MEDICINA E CHIRURGIA,1980-03-25,FIRENZE,2.0,ALLERGOLOGIA ED IMMUNOLOGIA CLINICA,FIRENZE,1989-09-27,0.0,,,,,,CLINICA PEDIATRICA,FIRENZE,1983-07-13,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0
61,maria grazia giuliani,"[""Pediatra""]",https://www.miodottore.it/maria-grazia-giulian...,Rho,,,"{""Visita pediatrica"": null, ""Boel test"": null,...","{""disease"": [""Disturbi Della Crescita"", ""Asma""...",Dr.,True,1,maria grazia,giuliani,giuliani maria grazia,Rho,MILANO,3817.0,4132.0,giuliani maria grazia,F,1962-09-10,60.0,TRAVEDONA-MONATE,VA,2023-01-02,Medico,True,MILANO,1988-01-01,MILANO,28408.0,1988-06-24,1.0,MEDICINA E CHIRURGIA,1988-03-15,MILANO,0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.0
62,maria grazia giuliani,"[""Pediatra""]",https://www.miodottore.it/maria-grazia-giulian...,Rho,,,"{""Visita pediatrica"": null, ""Boel test"": null,...","{""disease"": [""Disturbi Della Crescita"", ""Asma""...",Dr.,True,1,maria grazia,giuliani,giuliani maria grazia,Rho,MILANO,121941.0,131208.0,giuliani maria grazia,F,1957-12-19,65.0,MILANO,MI,2023-01-02,Medico,True,MILANO,1983-02-01,MILANO,24038.0,1984-02-09,1.0,MEDICINA E CHIRURGIA,1983-11-11,MILANO,1.0,PEDIATRIA III,MILANO,1987-07-08,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.0
103,felice perillo,"[""Pediatra""]",https://www.miodottore.it/felice-perillo/pedia...,Marigliano,5.0,Recensioni,"{""Visita pediatrica"": null, ""Boel test"": null,...","{""disease"": [""Disturbi Della Crescita"", ""Asma""...",,False,1,felice,perillo,perillo felice,Marigliano,NAPOLI,291101.0,313157.0,perillo felice,M,1952-12-16,70.0,MARIGLIANO,,2022-12-22,Medico,True,NAPOLI,1979-01-01,NAPOLI,15661.0,1979-09-27,1.0,MEDICINA E CHIRURGIA,1979-07-26,NAPOLI,1.0,PEDIATRIA,NAPOLI,1984-07-26,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111348,francesco muratore,"[""Radiologo""]",https://www.miodottore.it/francesco-muratore-2...,Misano,,,"{""Ecografia"": null}",{},Dott.,True,1,francesco,muratore,muratore francesco,,RIMINI,236814.0,254758.0,muratore francesco,M,1961-08-30,61.0,ROMA,RM,2023-01-03,Medico,True,"BOLOGNA ""ALMA MATER STUDIORUM""",1987-02-01,RIMINI,971.0,1993-10-08,1.0,MEDICINA E CHIRURGIA,1987-10-16,"BOLOGNA ""ALMA MATER STUDIORUM""",1.0,RADIOLOGIA INDIRIZZO RADIODIAGNOSTICA E SCIENZ...,"BOLOGNA ""ALMA MATER STUDIORUM""",1991-10-28,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0
111763,pierpaolo manca,"[""Andrologo, Urologo""]",https://www.miodottore.it/pierpaolo-manca-2/an...,Quartu,,,{},{},,False,1,pierpaolo,manca,manca pierpaolo,,SASSARI,216851.0,233297.0,manca pierpaolo,M,1947-02-15,75.0,PATTADA,SS,2022-12-16,Medico,True,PARMA,,SASSARI,1442.0,1974-03-07,1.0,MEDICINA E CHIRURGIA,1973-11-22,PARMA,0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0
111777,michele gentile,"[""Urologo""]",https://www.miodottore.it/michele-gentile/urol...,Rionero,,,{},{},Dr.,True,1,michele,gentile,gentile michele,,POTENZA,159036.0,171102.0,gentile michele,M,1960-06-15,62.0,SALERNO,SA,2023-01-02,Medico,True,"ROMA ""LA SAPIENZA""",1995-01-01,POTENZA,2598.0,1995-07-05,1.0,MEDICINA E CHIRURGIA,1994-12-06,"ROMA ""LA SAPIENZA""",1.0,UROLOGIA,"ROMA ""LA SAPIENZA""",2000-11-08,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0
111791,maurizio ranieri,"[""Urologo""]",https://www.miodottore.it/maurizio-ranieri/uro...,Aquila,,,{},{},,False,1,maurizio,ranieri,ranieri maurizio,,L'AQUILA,297096.0,319626.0,ranieri maurizio,M,1967-05-19,55.0,L'AQUILA,AQ,2022-12-30,Medico,True,L'AQUILA,1994-01-01,L'AQUILA,2585.0,1994-06-16,1.0,MEDICINA E CHIRURGIA,1994-03-30,L'AQUILA,1.0,UROLOGIA,L'AQUILA,2000-10-31,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0


In [260]:
# Recall the df_fnomceo_eq_1 and re-clean it again

name_matched_2 = [name for name in df_unmatched_name_2['new_name'].unique()]
df_fnomceo_count_gt_1_unmatched_name = df_fnomceo_count_gt_1[df_fnomceo_count_gt_1['name'].isin(name_matched_2) == False]
df_fnomceo_count_gt_1_unmatched_name

Unnamed: 0,index,fnomceo_id,name,sex,birthdate,age,birthplace,birth_province,data_date,profession,is_active,physician_qualification_place,physician_qualification_date,physician_register_place,physician_register_number,physician_register_date,graduations_count,graduations_faculty_1,graduations_date_1,graduations_place_1,residencies_count,residencies_residency_1,residencies_place_1,residencies_date_1,special_attributes_count,special_attributes_attribute_1,special_attributes_date_1,dentist_register_place,dentist_register_number,dentist_register_date,residencies_residency_2,residencies_place_2,residencies_date_2,dentist_qualification_place,dentist_qualification_date,residencies_residency_3,residencies_place_3,residencies_date_3,residencies_residency_4,residencies_place_4,residencies_date_4,special_attributes_attribute_2,special_attributes_date_2,special_attributes_attribute_3,special_attributes_date_3,residencies_residency_5,residencies_place_5,residencies_date_5,graduations_faculty_2,graduations_date_2,graduations_place_2,special_attributes_attribute_4,special_attributes_date_4,residencies_residency_6,residencies_place_6,residencies_date_6,special_attributes_attribute_5,special_attributes_date_5,special_attributes_attribute_6,special_attributes_date_6,residencies_residency_7,residencies_place_7,residencies_date_7,residencies_residency_8,residencies_place_8,residencies_date_8,residencies_residency_9,residencies_place_9,residencies_date_9,name_count
0,0,20.0,magrini antonio,M,1953-04-30,69.0,SERRE,SA,2022-12-28,Odontoiatra,True,NAPOLI SECONDA UNIVERSITA,1982-02-01,SALERNO,5167.0,1982-12-31,1.0,MEDICINA E CHIRURGIA,1982-07-26,NAPOLI SECONDA UNIVERSITA,1.0,ODONTOIATRIA E PROTESI DENTARIA,GENOVA,1992-07-06,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
4,4,24.0,evangelisti daniela,F,1955-07-09,67.0,CAMAIORE,LU,2022-11-24,Medico e odontoiatra,True,PISA,1988-01-01,LUCCA,2003.0,1989-06-13,1.0,MEDICINA E CHIRURGIA,1986-12-03,PISA,0.0,,,,0.0,,,LUCCA,41.0,1988-06-28,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
6,6,26.0,quaranta giovanni,M,1964-10-09,58.0,TORINO,TO,2022-12-29,Medico,True,TORINO,1991-02-01,TORINO,16019.0,1991-05-29,1.0,MEDICINA E CHIRURGIA,1991-03-25,TORINO,0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
7,7,27.0,villani valeria,F,1955-07-20,67.0,BOLOGNA,BO,2022-12-30,Medico,True,"BOLOGNA ""ALMA MATER STUDIORUM""",1980-02-01,BOLOGNA,8785.0,1980-12-16,1.0,MEDICINA E CHIRURGIA,1980-11-11,"BOLOGNA ""ALMA MATER STUDIORUM""",2.0,GASTROENTEROLOGIA ED ENDOSCOPIA DIGESTIVA,"BOLOGNA ""ALMA MATER STUDIORUM""",1993-07-16,0.0,,,,,,FISIOTERAPIA,"BOLOGNA ""ALMA MATER STUDIORUM""",1983-06-30,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
15,15,36.0,montanari enzo,M,1973-07-31,49.0,REGGIO NELL'EMILIA,RE,2023-01-02,Medico,True,MODENA E REGGIO EMILIA,2003-01-01,REGGIO EMILIA,3148.0,2003-07-15,1.0,MEDICINA E CHIRURGIA,2002-07-26,MODENA E REGGIO EMILIA,1.0,MEDICINA INTERNA E MEDICINA D'URGENZA,MODENA E REGGIO EMILIA,2008-11-05,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
552286,552291,614413.0,erba stefano,M,1996-05-09,26.0,BUSTO ARSIZIO,VA,2023-01-02,Medico,True,CAGLIARI,2022-01-01,CAGLIARI,11332.0,2022-12-13,1.0,MEDICINA E CHIRURGIA,2022-11-22,CAGLIARI,0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
552287,552292,614414.0,fenu maria laura,F,1993-01-20,29.0,CAGLIARI,CA,2023-01-02,Medico,True,CAGLIARI,2022-02-01,CAGLIARI,11333.0,2022-12-13,1.0,MEDICINA E CHIRURGIA,2022-11-22,CAGLIARI,0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
552293,552298,614420.0,carcangiu giampaolo,M,1958-06-11,64.0,CAGLIARI,CA,2023-01-03,Medico,True,PAVIA,1985-01-01,CAGLIARI,4936.0,1985-06-26,1.0,MEDICINA E CHIRURGIA,1985-03-29,PAVIA,1.0,FARMACOLOGIA,CAGLIARI,1990-01-31,1.0,MEDICO PSICOTERAPEUTA,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
552295,552300,614422.0,santilli daniele,M,1997-08-28,25.0,ISERNIA,IS,2023-01-03,Medico,True,,,ISERNIA,1107.0,2022-12-12,1.0,MEDICINA E CHIRURGIA,2022-11-21,"CHIETI ""GABRIELE D'ANNUNZIO""",0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3


In [261]:
# We will clean this by cut everything in the name that is not a letter or a space
df_fnomceo_count_gt_1_unmatched_name['name'] = df_fnomceo_count_gt_1_unmatched_name['name'].str.replace(r'[^a-zA-Z\s]', '', regex=True)
# Lowercase the name
df_fnomceo_count_gt_1_unmatched_name['name'] = df_fnomceo_count_gt_1_unmatched_name['name'].str.lower().str.strip()  # Lowercase and strip
# Remove all the spaces at the end
df_fnomceo_count_gt_1_unmatched_name['name'] = df_fnomceo_count_gt_1_unmatched_name['name'].str.strip()  # Strip spaces

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fnomceo_count_gt_1_unmatched_name['name'] = df_fnomceo_count_gt_1_unmatched_name['name'].str.replace(r'[^a-zA-Z\s]', '', regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fnomceo_count_gt_1_unmatched_name['name'] = df_fnomceo_count_gt_1_unmatched_name['name'].str.lower().str.strip()  # Lowercase and strip
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-

In [267]:
# We will merge this to create df_merged_7
df_merged_7 = pd.merge(df_cleaned_nodup_count_eq_1, df_fnomceo_count_gt_1_unmatched_name,
                       left_on=['new_name', 'Official Name Provincia/Città metropolitana'],
                       right_on=['name', 'physician_register_place'],
                       how='inner')
df_merged_7

Unnamed: 0,name_x,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix,name_count_x,first_name,last_name,new_name,Official Name Comune,Official Name Provincia/Città metropolitana,index,fnomceo_id,name_y,sex,birthdate,age,birthplace,birth_province,data_date,profession,is_active,physician_qualification_place,physician_qualification_date,physician_register_place,physician_register_number,physician_register_date,graduations_count,graduations_faculty_1,graduations_date_1,graduations_place_1,residencies_count,residencies_residency_1,residencies_place_1,residencies_date_1,special_attributes_count,special_attributes_attribute_1,special_attributes_date_1,dentist_register_place,dentist_register_number,dentist_register_date,residencies_residency_2,residencies_place_2,residencies_date_2,dentist_qualification_place,dentist_qualification_date,residencies_residency_3,residencies_place_3,residencies_date_3,residencies_residency_4,residencies_place_4,residencies_date_4,special_attributes_attribute_2,special_attributes_date_2,special_attributes_attribute_3,special_attributes_date_3,residencies_residency_5,residencies_place_5,residencies_date_5,graduations_faculty_2,graduations_date_2,graduations_place_2,special_attributes_attribute_4,special_attributes_date_4,residencies_residency_6,residencies_place_6,residencies_date_6,special_attributes_attribute_5,special_attributes_date_5,special_attributes_attribute_6,special_attributes_date_6,residencies_residency_7,residencies_place_7,residencies_date_7,residencies_residency_8,residencies_place_8,residencies_date_8,residencies_residency_9,residencies_place_9,residencies_date_9,name_count_y
0,anna calio,"[""Internista, Medico di medicina generale""]",https://www.miodottore.it/anna-calio/internist...,Marsala,,,{},{},Dr.,True,1,anna,calio,calio anna,Marsala,TRAPANI,63568,68461.0,calio anna,F,1955-04-08,67.0,SANTA CATERINA VILLARMOSA,CL,2022-12-22,Medico,True,PALERMO,1983-01-01,TRAPANI,1800.0,1983-06-20,1.0,MEDICINA E CHIRURGIA,1982-11-09,PALERMO,1.0,MEDICINA INTERNA,PARMA,1991-10-22,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
1,giuseppe orru,"[""Medico competente""]",https://www.miodottore.it/giuseppe-orru-2/medi...,Cavallermaggiore,,,{},{},,False,1,giuseppe,orru,orru giuseppe,Cavallermaggiore,CUNEO,189592,203892.0,orru giuseppe,M,1965-07-09,57.0,TRAPANI,TP,2022-12-30,Medico,True,PALERMO,1991-02-01,CUNEO,4893.0,2019-08-22,1.0,MEDICINA E CHIRURGIA,1991-11-08,PALERMO,1.0,MEDICINA DEL LAVORO,PALERMO,1996-11-04,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3
2,roberto secli,"[""Anestesista, Terapista del dolore""]",https://www.miodottore.it/roberto-secli-2/anes...,Bologna,5.0,7.0,"{""Consulenza online"": ""100 €"", ""Prima visita d...","{""expertIn"": [""Anestesia rianimazione, terapia...",Dott.,True,1,roberto,secli,secli roberto,Bologna,BOLOGNA,423362,477421.0,secli roberto,M,1981-05-18,41.0,GALATINA,LE,2022-12-30,Medico,True,"BOLOGNA ""ALMA MATER STUDIORUM""",2009-02-01,BOLOGNA,17589.0,2016-03-18,1.0,MEDICINA E CHIRURGIA,2009-07-20,"BOLOGNA ""ALMA MATER STUDIORUM""",0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
3,francesco ferrau,"[""Oncologo""]",https://www.miodottore.it/francesco-ferrau/onc...,Zafferana,5.0,Recensioni,"{""Prima visita oncologica"": ""250 €""}",{},Dott.,True,1,francesco,ferrau,ferrau francesco,,CATANIA,349445,375907.0,ferrau francesco,M,1959-08-05,63.0,CATANIA,CT,2022-12-30,Medico,True,CATANIA,1983-02-01,CATANIA,7640.0,1984-03-15,1.0,MEDICINA E CHIRURGIA,1983-11-10,CATANIA,1.0,ONCOLOGIA,FERRARA,1987-07-08,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3


In [268]:
# Concatenate the all dataframes back together
df_final = pd.concat([df_merged_1, df_merged_2, df_merged_3, df_merged_4, df_merged_5, df_merged_6, df_merged_7], ignore_index=True)

In [289]:
df_final

Unnamed: 0,name_x,categories,url,location,rating,number_of_reviews,prestazioni,espierienze,prefix,is_prefix,name_count_x,first_name,last_name,new_name,Official Name Comune,Official Name Provincia/Città metropolitana,index,fnomceo_id,name_y,sex,birthdate,age,birthplace,birth_province,data_date,profession,is_active,physician_qualification_place,physician_qualification_date,physician_register_place,physician_register_number,physician_register_date,graduations_count,graduations_faculty_1,graduations_date_1,graduations_place_1,residencies_count,residencies_residency_1,residencies_place_1,residencies_date_1,special_attributes_count,special_attributes_attribute_1,special_attributes_date_1,dentist_register_place,dentist_register_number,dentist_register_date,residencies_residency_2,residencies_place_2,residencies_date_2,dentist_qualification_place,dentist_qualification_date,residencies_residency_3,residencies_place_3,residencies_date_3,residencies_residency_4,residencies_place_4,residencies_date_4,special_attributes_attribute_2,special_attributes_date_2,special_attributes_attribute_3,special_attributes_date_3,residencies_residency_5,residencies_place_5,residencies_date_5,graduations_faculty_2,graduations_date_2,graduations_place_2,special_attributes_attribute_4,special_attributes_date_4,residencies_residency_6,residencies_place_6,residencies_date_6,special_attributes_attribute_5,special_attributes_date_5,special_attributes_attribute_6,special_attributes_date_6,residencies_residency_7,residencies_place_7,residencies_date_7,residencies_residency_8,residencies_place_8,residencies_date_8,residencies_residency_9,residencies_place_9,residencies_date_9,name_count_y
0,antonio iasi,"[""Pediatra""]",https://www.miodottore.it/antonio-iasi/pediatr...,Neviano,,,{},{},Dr.,True,1,antonio,iasi,iasi antonio,Neviano,LECCE,277268,298209.0,iasi antonio,M,1948-02-08,74.0,NEVIANO,LE,2023-01-02,Medico,True,"BARI ""ALDO MORO""",1979-01-01,LECCE,2785.0,1979-07-30,1.0,MEDICINA E CHIRURGIA,1979-06-29,"BARI ""ALDO MORO""",0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
1,piera angela negro,"[""Pediatra""]",https://www.miodottore.it/piera-angela-negro/p...,Gallipoli,,,{},{},Dr.,True,1,piera angela,negro,negro piera angela,Gallipoli,LECCE,90505,97432.0,negro piera angela,F,1953-08-19,69.0,VARESE,VA,2023-01-02,Medico,True,PAVIA,1978-02-01,LECCE,3382.0,1982-01-25,1.0,MEDICINA E CHIRURGIA,1978-07-28,PAVIA,1.0,PUERICULTURA,PAVIA,1981-07-13,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2,ivana stapane,"[""Pediatra""]",https://www.miodottore.it/ivana-stapane/pediat...,Galatone,,,{},{},Dott.,True,1,ivana,stapane,stapane ivana,Galatone,LECCE,361326,388730.0,stapane ivana,F,1962-06-24,60.0,GALATONE,LE,2023-01-02,Medico,True,PARMA,1989-02-01,LECCE,5080.0,1990-01-25,1.0,MEDICINA E CHIRURGIA,1989-07-13,PARMA,1.0,PEDIATRIA,PARMA,1993-06-24,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
3,salvatore fulgido,"[""Pediatra""]",https://www.miodottore.it/salvatore-fulgido/pe...,Alezio,,,{},{},Dr.,True,1,salvatore,fulgido,fulgido salvatore,Alezio,LECCE,30048,32355.0,fulgido salvatore,M,1962-06-24,60.0,NARDO',LE,2023-01-02,Medico,True,"BARI ""ALDO MORO""",1989-02-01,LECCE,5042.0,1990-01-09,1.0,MEDICINA E CHIRURGIA,1989-10-17,"BARI ""ALDO MORO""",1.0,PEDIATRIA GENERALE,PARMA,1993-06-24,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
4,angelica tarantino,"[""Pediatra""]",https://www.miodottore.it/angelica-tarantino/p...,Tuglie,,,{},{},,False,1,angelica,tarantino,tarantino angelica,Tuglie,LECCE,369109,397089.0,tarantino angelica,F,1960-08-14,62.0,MONCALIERI,TO,2023-01-02,Medico,True,ROMA,1985-02-01,LECCE,4290.0,1986-03-05,1.0,MEDICINA E CHIRURGIA,1985-11-04,ROMA,1.0,PEDIATRIA,ROMA,1989-07-06,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77369,giovanni iacono,"[""Oculista""]",https://www.miodottore.it/giovanni-iacono-8/oc...,Civitanova,5.0,Recensioni,"{""Visita oculistica completa + fondo oculare"":...","{""expertIn"": [""Oftalmologia""], ""disease"": [""Ca...",Dott.,True,2,giovanni,iacono,iacono giovanni,,MARCHE,65831,70901.0,iacono giovanni,M,1968-11-21,54.0,NAPOLI,,2022-12-22,Odontoiatra,True,,,NAPOLI,,,1.0,ODONTOIATRIA E PROTESI DENTARIA,1993-03-29,NAPOLI,0.0,,,,0.0,,,NAPOLI,1508.0,1993-05-31,,,,NAPOLI,1993-01-01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
77370,anna calio,"[""Internista, Medico di medicina generale""]",https://www.miodottore.it/anna-calio/internist...,Marsala,,,{},{},Dr.,True,1,anna,calio,calio anna,Marsala,TRAPANI,63568,68461.0,calio anna,F,1955-04-08,67.0,SANTA CATERINA VILLARMOSA,CL,2022-12-22,Medico,True,PALERMO,1983-01-01,TRAPANI,1800.0,1983-06-20,1.0,MEDICINA E CHIRURGIA,1982-11-09,PALERMO,1.0,MEDICINA INTERNA,PARMA,1991-10-22,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2
77371,giuseppe orru,"[""Medico competente""]",https://www.miodottore.it/giuseppe-orru-2/medi...,Cavallermaggiore,,,{},{},,False,1,giuseppe,orru,orru giuseppe,Cavallermaggiore,CUNEO,189592,203892.0,orru giuseppe,M,1965-07-09,57.0,TRAPANI,TP,2022-12-30,Medico,True,PALERMO,1991-02-01,CUNEO,4893.0,2019-08-22,1.0,MEDICINA E CHIRURGIA,1991-11-08,PALERMO,1.0,MEDICINA DEL LAVORO,PALERMO,1996-11-04,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3
77372,roberto secli,"[""Anestesista, Terapista del dolore""]",https://www.miodottore.it/roberto-secli-2/anes...,Bologna,5.0,7.0,"{""Consulenza online"": ""100 €"", ""Prima visita d...","{""expertIn"": [""Anestesia rianimazione, terapia...",Dott.,True,1,roberto,secli,secli roberto,Bologna,BOLOGNA,423362,477421.0,secli roberto,M,1981-05-18,41.0,GALATINA,LE,2022-12-30,Medico,True,"BOLOGNA ""ALMA MATER STUDIORUM""",2009-02-01,BOLOGNA,17589.0,2016-03-18,1.0,MEDICINA E CHIRURGIA,2009-07-20,"BOLOGNA ""ALMA MATER STUDIORUM""",0.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2


In [290]:
# Save the merged DataFrame to a CSV file
df_final.to_csv("doctor_details_merged_final.csv", index=False)