In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from urllib.parse import urlparse
import time
from pathlib import Path

def get_datacenter_links():
    # Update this path to your chromedriver executable
    service = Service('/Applications/chromedriver-mac-arm64/chromedriver')
    driver = webdriver.Chrome(service=service)
    
    driver.get("https://datacenters.com/locations/united-states")
    wait = WebDriverWait(driver, 10)
    
    # We’ll store URLs from two sections:
    # 1. Pages 1-50 (extracted_urls_first)
    # 2. Pages 51-53 (extracted_urls_last)
    extracted_urls_first = []
    extracted_urls_last = []
    
    # --- Optional: Dismiss the cookie consent banner if it appears ---
    try:
        cookie_button = wait.until(EC.element_to_be_clickable(
            (By.XPATH, "//button[contains(., 'Accept')]")
        ))
        cookie_button.click()
        print("Cookie banner accepted.")
    except Exception as e:
        print("Cookie banner not found or already handled:", e)
    
    # ======================================================
    # Part 1: Process Pages 1-50
    # ======================================================
    page_count = 1
    while page_count <= 50:
        print(f"\nProcessing page {page_count} (pages 1-50)...")
        try:
            # Wait until the grid containing the links is present.
            grid = wait.until(EC.presence_of_element_located(
                (By.XPATH, "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[1]")
            ))
            a_tags = grid.find_elements(By.TAG_NAME, "a")
            
            for a in a_tags:
                href = a.get_attribute("href")
                if href:
                    path = urlparse(href).path
                    if path not in extracted_urls_first:
                        extracted_urls_first.append(path)
                        print(path)
            
            # If this is not the last page in the first block, click the next button.
            if page_count < 50:
                # Use one XPath for pages 1-3, and a different one for pages 4-50.
                if page_count < 4:
                    next_button_xpath = "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[2]/div[1]/nav/button[5]"
                else:
                    next_button_xpath = "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[2]/div[1]/nav/button[9]"
                
                next_button = wait.until(EC.presence_of_element_located((By.XPATH, next_button_xpath)))
                
                # Remove any overlay that might block the button.
                try:
                    overlay = driver.find_element(By.CLASS_NAME, "ot-sdk-row")
                    driver.execute_script("arguments[0].remove();", overlay)
                    print("Overlay removed.")
                except Exception as e:
                    print("No overlay found:", e)
                
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
                time.sleep(1)  # Give time for scrolling
                driver.execute_script("arguments[0].click();", next_button)
                print(f"Clicked next page for page {page_count}.")
                time.sleep(2)  # Wait for the next page to load
        except Exception as e:
            print("Exception encountered on page", page_count, ":", e)
            print("Stopping pagination for pages 1-50.")
            break
        
        page_count += 1

    # ======================================================
    # Part 2: Process Pages 51-53
    # ------------------------------------------------------
    # Now the driver is on page 50. We want to get pages 51-53,
    # so for each of these pages, first click the next button 
    # using the new XPath, then extract the new links.
    # Note: We do NOT extract links from page 50 again.
    # ======================================================
    for page in range(51, 54):
        print(f"\nProcessing page {page} (pages 51-53)...")
        try:
            # Click the next button using the new configuration for the last pages.
            # This XPath applies for pages 51 and 52 (to move to the next page).
            next_button_xpath = "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[2]/div[1]/nav/button[4]"
            next_button = wait.until(EC.presence_of_element_located((By.XPATH, next_button_xpath)))
            
            try:
                overlay = driver.find_element(By.CLASS_NAME, "ot-sdk-row")
                driver.execute_script("arguments[0].remove();", overlay)
                print("Overlay removed.")
            except Exception as e:
                print("No overlay found:", e)
            
            driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
            time.sleep(1)
            driver.execute_script("arguments[0].click();", next_button)
            print(f"Clicked next page to move to page {page}.")
            time.sleep(2)  # Wait for the new page to load

            # Now extract links from the new page (pages 51-53).
            grid = wait.until(EC.presence_of_element_located(
                (By.XPATH, "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[1]")
            ))
            a_tags = grid.find_elements(By.TAG_NAME, "a")
            for a in a_tags:
                href = a.get_attribute("href")
                if href:
                    path = urlparse(href).path
                    if path not in extracted_urls_last:
                        extracted_urls_last.append(path)
                        print(path)
        except Exception as e:
            print("Exception encountered on page", page, ":", e)
            print("Stopping pagination for pages 51-53.")
            break

    driver.quit()
    return extracted_urls_first + extracted_urls_last


In [84]:
def get_datacenter_links():
    # Update this path to your chromedriver executable
    service = Service('/Applications/chromedriver-mac-arm64/chromedriver')
    driver = webdriver.Chrome(service=service)
    
    driver.get("https://datacenters.com/locations/united-states")
    wait = WebDriverWait(driver, 10)
    
    # Single list to store URLs from pages 1-53
    extracted_urls = []
    
    # --- Optional: Dismiss the cookie consent banner if present ---
    try:
        cookie_button = wait.until(EC.element_to_be_clickable(
            (By.XPATH, "//button[contains(., 'Accept')]")
        ))
        cookie_button.click()
        print("Cookie banner accepted.")
    except Exception as e:
        print("Cookie banner not found or already handled:", e)
    
    # ======================================================
    # Part 1: Process Pages 1-50
    # ======================================================
    page_count = 1
    while page_count <= 50:
        print(f"\nProcessing page {page_count} (pages 1-50)...")
        try:
            # Wait until the grid containing the links is present.
            grid = wait.until(EC.presence_of_element_located(
                (By.XPATH, "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[1]")
            ))
            a_tags = grid.find_elements(By.TAG_NAME, "a")
            
            for a in a_tags:
                href = a.get_attribute("href")
                if href:
                    path = urlparse(href).path
                    if path not in extracted_urls:
                        extracted_urls.append(path)
                        print(path)
            
            # If not on page 50, click the next button to move forward.
            if page_count < 50:
                if page_count < 4:
                    # For pages 1-3, use this XPath.
                    next_button_xpath = "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[2]/div[1]/nav/button[5]"
                else:
                    # For pages 4-50, use this XPath.
                    next_button_xpath = "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[2]/div[1]/nav/button[9]"
                
                next_button = wait.until(EC.presence_of_element_located((By.XPATH, next_button_xpath)))
                
                # Remove any overlay that might block the button.
                try:
                    overlay = driver.find_element(By.CLASS_NAME, "ot-sdk-row")
                    driver.execute_script("arguments[0].remove();", overlay)
                    print("Overlay removed.")
                except Exception as e:
                    print("No overlay found:", e)
                
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
                time.sleep(1)  # Allow time for scrolling
                driver.execute_script("arguments[0].click();", next_button)
                print(f"Clicked next page for page {page_count}.")
                time.sleep(2)  # Wait for the new page to load
        except Exception as e:
            print("Exception encountered on page", page_count, ":", e)
            print("Stopping pagination for pages 1-50.")
            break
        
        page_count += 1
    
    # ======================================================
    # Part 2: Process Pages 51-53 (do not re‑extract page 50)
    # ======================================================
    for page in range(51, 54):
        print(f"\nProcessing page {page} (pages 51-53)...")
        try:
            # For pages 51 and 52 (to move to the next page), click the new button.
            # We do not extract page 50 again.
            next_button_xpath = "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[2]/div[1]/nav/button[4]"
            next_button = wait.until(EC.presence_of_element_located((By.XPATH, next_button_xpath)))
            
            try:
                overlay = driver.find_element(By.CLASS_NAME, "ot-sdk-row")
                driver.execute_script("arguments[0].remove();", overlay)
                print("Overlay removed.")
            except Exception as e:
                print("No overlay found:", e)
            
            driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
            time.sleep(1)
            driver.execute_script("arguments[0].click();", next_button)
            print(f"Clicked next page to move to page {page}.")
            time.sleep(2)  # Wait for the new page to load
            
            # Now extract the links on the new page.
            grid = wait.until(EC.presence_of_element_located(
                (By.XPATH, "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[1]")
            ))
            a_tags = grid.find_elements(By.TAG_NAME, "a")
            for a in a_tags:
                href = a.get_attribute("href")
                if href:
                    path = urlparse(href).path
                    if path not in extracted_urls:
                        extracted_urls.append(path)
                        print(path)
        except Exception as e:
            print("Exception encountered on page", page, ":", e)
            print("Stopping pagination for pages 51-53.")
            break

    driver.quit()
    return extracted_urls


In [102]:
def get_last_datacenter_urls():
    # Update this path to your chromedriver executable
    service = Service('/Applications/chromedriver-mac-arm64/chromedriver')
    driver = webdriver.Chrome(service=service)
    
    driver.get("https://datacenters.com/locations/united-states")
    wait = WebDriverWait(driver, 10)
    
    # We’ll store URLs from two sections:
    # 1. Pages 1-50 (extracted_urls_first)
    # 2. Pages 51-53 (extracted_urls_last)
    extracted_urls_first = []
    extracted_urls_last = []
    
    # --- Optional: Dismiss the cookie consent banner if it appears ---
    try:
        cookie_button = wait.until(EC.element_to_be_clickable(
            (By.XPATH, "//button[contains(., 'Accept')]")
        ))
        cookie_button.click()
        print("Cookie banner accepted.")
    except Exception as e:
        print("Cookie banner not found or already handled:", e)
    
    # ======================================================
    # Part 1: Process Pages 1-50
    # ======================================================
    page_count = 1
    while page_count <= 50:
        print(f"\nProcessing page {page_count} (pages 1-50)...")
        try:
            # If this is not the last page in the first block, click the next button.
            if page_count < 50:
                # Use one XPath for pages 1-3, and a different one for pages 4-50.
                if page_count < 4:
                    next_button_xpath = "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[2]/div[1]/nav/button[5]"
                else:
                    next_button_xpath = "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[2]/div[1]/nav/button[9]"
                
                next_button = wait.until(EC.presence_of_element_located((By.XPATH, next_button_xpath)))
                
                # Remove any overlay that might block the button.
                try:
                    overlay = driver.find_element(By.CLASS_NAME, "ot-sdk-row")
                    driver.execute_script("arguments[0].remove();", overlay)
                    print("Overlay removed.")
                except Exception as e:
                    print("No overlay found:", e)
                
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
                time.sleep(1)  # Give time for scrolling
                driver.execute_script("arguments[0].click();", next_button)
                print(f"Clicked next page for page {page_count}.")
                time.sleep(2)  # Wait for the next page to load
        except Exception as e:
            print("Exception encountered on page", page_count, ":", e)
            print("Stopping pagination for pages 1-50.")
            break
        
        page_count += 1

    # ======================================================
    # Part 2: Process Pages 51-53
    # ------------------------------------------------------
    # Now the driver is on page 50. We want to get pages 51-53,
    # so for each of these pages, first click the next button 
    # using the new XPath, then extract the new links.
    # Note: We do NOT extract links from page 50 again.
    # ======================================================
    for page in range(51, 54):
        print(f"\nProcessing page {page} (pages 51-53)...")
        try:
            # Click the next button using the new configuration for the last pages.
            # This XPath applies for pages 51 and 52 (to move to the next page).
            next_button_xpath = "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[2]/div[1]/nav/button[4]"
            next_button = wait.until(EC.presence_of_element_located((By.XPATH, next_button_xpath)))
            
            try:
                overlay = driver.find_element(By.CLASS_NAME, "ot-sdk-row")
                driver.execute_script("arguments[0].remove();", overlay)
                print("Overlay removed.")
            except Exception as e:
                print("No overlay found:", e)
            
            driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
            time.sleep(1)
            driver.execute_script("arguments[0].click();", next_button)
            print(f"Clicked next page to move to page {page}.")
            time.sleep(2)  # Wait for the new page to load

            # Now extract links from the new page (pages 51-53).
            grid = wait.until(EC.presence_of_element_located(
                (By.XPATH, "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[1]")
            ))
            a_tags = grid.find_elements(By.TAG_NAME, "a")
            for a in a_tags:
                href = a.get_attribute("href")
                if href:
                    path = urlparse(href).path
                    if path not in extracted_urls_last:
                        extracted_urls_last.append(path)
                        print(path)
        except Exception as e:
            print("Exception encountered on page", page, ":", e)
            print("Stopping pagination for pages 51-53.")
            break

    driver.quit()
    return extracted_urls_last


In [103]:
last_urls = get_last_datacenter_urls()

Cookie banner not found or already handled: Message: 
Stacktrace:
0   chromedriver                        0x00000001031eb94c cxxbridge1$str$ptr + 2728228
1   chromedriver                        0x00000001031e3fa8 cxxbridge1$str$ptr + 2697088
2   chromedriver                        0x0000000102d49ff0 cxxbridge1$string$len + 93348
3   chromedriver                        0x0000000102d90d5c cxxbridge1$string$len + 383504
4   chromedriver                        0x0000000102dd1f14 cxxbridge1$string$len + 650184
5   chromedriver                        0x0000000102d85004 cxxbridge1$string$len + 335032
6   chromedriver                        0x00000001031b41f4 cxxbridge1$str$ptr + 2501068
7   chromedriver                        0x00000001031b7508 cxxbridge1$str$ptr + 2514144
8   chromedriver                        0x00000001031998a4 cxxbridge1$str$ptr + 2392188
9   chromedriver                        0x00000001031b7db0 cxxbridge1$str$ptr + 2516360
10  chromedriver                        0x00000

In [116]:
def get_all_datacenter_urls():
    # ---------------------------
    # Setup and initializations
    # ---------------------------
    service = Service('/Applications/chromedriver-mac-arm64/chromedriver')
    driver = webdriver.Chrome(service=service)
    wait = WebDriverWait(driver, 10)
    
    driver.get("https://datacenters.com/locations/united-states")
    
    # --- Dismiss cookie consent if it appears ---
    try:
        cookie_button = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'Accept')]"))
        )
        cookie_button.click()
        print("Cookie banner accepted.")
    except Exception as e:
        print("Cookie banner not found or already handled:", e)
    
    # XPath for the grid that contains the datacenter links.
    grid_xpath = "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[1]"
    all_urls = []
    
    # -----------------------------------------
    # Extract links from the landing page (page 1)
    # -----------------------------------------
    try:
        grid = wait.until(EC.presence_of_element_located((By.XPATH, grid_xpath)))
        a_tags = grid.find_elements(By.TAG_NAME, "a")
        for a in a_tags:
            href = a.get_attribute("href")
            if href:
                path = urlparse(href).path
                if path not in all_urls:
                    all_urls.append(path)
                    print(f"Page 1: {path}")
    except Exception as e:
        print("Error extracting links on page 1:", e)
    
    # -----------------------------------------
    # Process pages 1–50 using their respective next-button XPaths
    # -----------------------------------------
    for current_page in range(1, 50):  # (Page 1 already processed; loop for pages 2 to 50)
        next_page_num = current_page + 1
        print(f"\nProcessing page {next_page_num} (pages 1–50)...")
        try:
            # For pages 1–3 use one XPath; pages 4–50 use a different one.
            if current_page < 4:
                next_button_xpath = "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[2]/div[1]/nav/button[5]"
            else:
                next_button_xpath = "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[2]/div[1]/nav/button[9]"
            
            next_buttons = driver.find_elements(By.XPATH, next_button_xpath)
            if next_buttons:
                next_button = next_buttons[0]
            else:
                raise Exception(f"Next button not found on page {next_page_num}.")
            
            # Remove overlay if present (using find_elements to avoid errors when not found)
            overlays = driver.find_elements(By.CLASS_NAME, "ot-sdk-row")
            if overlays:
                driver.execute_script("arguments[0].remove();", overlays[0])
                print("Overlay removed.")
            
            driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
            time.sleep(1)
            driver.execute_script("arguments[0].click();", next_button)
            print(f"Clicked next page for page {current_page}.")
            time.sleep(2)  # Allow time for the new page to load
            
            grid = wait.until(EC.presence_of_element_located((By.XPATH, grid_xpath)))
            a_tags = grid.find_elements(By.TAG_NAME, "a")
            for a in a_tags:
                href = a.get_attribute("href")
                if href:
                    path = urlparse(href).path
                    if path not in all_urls:
                        all_urls.append(path)
                        print(f"Page {next_page_num}: {path}")
        except Exception as e:
            print(f"Exception encountered on page {next_page_num}:", e)
            print("Stopping pagination for pages 1–50.")
            break

    # -----------------------------------------
    # Process pages 51–53 using the last-pages strategy
    # -----------------------------------------
    for page in range(51, 54):
        print(f"\nProcessing page {page} (pages 51–53)...")
        try:
            # Use the next-button XPath configured for the last pages.
            next_button_xpath = "/html/body/div[1]/div[4]/div/div[2]/div[3]/div[2]/div[1]/nav/button[4]"
            next_buttons = driver.find_elements(By.XPATH, next_button_xpath)
            if next_buttons:
                next_button = next_buttons[0]
            else:
                raise Exception(f"Next button not found for pages 51–53 on page {page}.")
            
            # Remove overlay if present.
            overlays = driver.find_elements(By.CLASS_NAME, "ot-sdk-row")
            if overlays:
                driver.execute_script("arguments[0].remove();", overlays[0])
                print("Overlay removed.")
            
            driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
            time.sleep(1)
            driver.execute_script("arguments[0].click();", next_button)
            print(f"Clicked next page to move to page {page}.")
            time.sleep(2)  # Allow time for the new page to load
            
            grid = wait.until(EC.presence_of_element_located((By.XPATH, grid_xpath)))
            a_tags = grid.find_elements(By.TAG_NAME, "a")
            for a in a_tags:
                href = a.get_attribute("href")
                if href:
                    path = urlparse(href).path
                    if path not in all_urls:
                        all_urls.append(path)
                        print(f"Page {page}: {path}")
        except Exception as e:
            print(f"Exception encountered on page {page}:", e)
            print("Stopping pagination for pages 51–53.")
            break

    driver.quit()
    return all_urls

In [117]:
all_urls = get_all_datacenter_urls()

Cookie banner not found or already handled: Message: 
Stacktrace:
0   chromedriver                        0x000000010102394c cxxbridge1$str$ptr + 2728228
1   chromedriver                        0x000000010101bfa8 cxxbridge1$str$ptr + 2697088
2   chromedriver                        0x0000000100b81ff0 cxxbridge1$string$len + 93348
3   chromedriver                        0x0000000100bc8d5c cxxbridge1$string$len + 383504
4   chromedriver                        0x0000000100c09f14 cxxbridge1$string$len + 650184
5   chromedriver                        0x0000000100bbd004 cxxbridge1$string$len + 335032
6   chromedriver                        0x0000000100fec1f4 cxxbridge1$str$ptr + 2501068
7   chromedriver                        0x0000000100fef508 cxxbridge1$str$ptr + 2514144
8   chromedriver                        0x0000000100fd18a4 cxxbridge1$str$ptr + 2392188
9   chromedriver                        0x0000000100fefdb0 cxxbridge1$str$ptr + 2516360
10  chromedriver                        0x00000

In [119]:
len(all_urls)

1745

# Scraping the DC Profiles

In [69]:
def scrape_datacenter_details(url_extensions):
    # Initialize the Chrome driver.
    service = Service('/Applications/chromedriver-mac-arm64/chromedriver')
    driver = webdriver.Chrome(service=service)
    wait = WebDriverWait(driver, 10)
    
    base_url = "https://datacenters.com"
    results = []
    
    for extension in url_extensions:
        full_url = base_url + extension
        print(f"Processing: {full_url}")
        driver.get(full_url)
        time.sleep(1)  # Allow the page to start loading
        
        # --- Extract the Data Center Name ---
        # Try multiple XPaths to account for layout variations.
        name = None
        heading_xpaths = [
            "/html/body/div[1]/div[4]/div/div[1]/div[1]/div/div[3]/div/h1",  # Standard case
            "/html/body/div[1]/div[4]/div/div[1]/div[1]/div/h1"                # Fallback case
        ]
        for xpath in heading_xpaths:
            try:
                name_element = wait.until(EC.presence_of_element_located((By.XPATH, xpath)))
                name = name_element.text.strip()
                if name:
                    break  # Use the first non-empty heading found.
            except Exception as e:
                print(f"Heading not found using {xpath}.")
        if not name:
            print("Error extracting heading from:", full_url)
        
        # --- Extract the Address ---
        try:
            address_element = wait.until(EC.presence_of_element_located(
                (By.XPATH, "/html/body/div[1]/div[4]/div/div[1]/div[1]/div/div[2]/div/div[1]/div[2]/div[1]/span[2]")
            ))
            address = address_element.text.strip()
        except Exception as e:
            print(f"Error getting address from {full_url}: {e}")
            address = None

        # --- Extract the Sidebar Data Fields ---
        try:
            sidebar = wait.until(EC.presence_of_element_located(
                (By.XPATH, "/html/body/div[1]/div[4]/div/div[1]/div[1]/div/div[2]/div/div[2]")
            ))
            # Get all immediate child <div> elements
            field_elements = sidebar.find_elements(By.XPATH, "./div")
            fields = [elem.text.strip() for elem in field_elements if elem.text.strip()]
        except Exception as e:
            print(f"Error getting sidebar fields from {full_url}: {e}")
            fields = []
        
        results.append({
            "url": full_url,
            "name": name,
            "address": address,
            "fields": fields
        })
        time.sleep(1)  # Pause between pages

    driver.quit()
    
    # Determine the maximum number of sidebar fields across all pages.
    max_fields = max(len(r["fields"]) for r in results) if results else 0
    
    # Build a list of dictionaries for the DataFrame.
    data_for_df = []
    for r in results:
        row = {
            "url": r["url"],
            "name": r["name"],
            "address": r["address"]
        }
        for i in range(max_fields):
            key = f"field{i+1}"
            row[key] = r["fields"][i] if i < len(r["fields"]) else None
        data_for_df.append(row)
    
    df = pd.DataFrame(data_for_df)
    return df


In [75]:
datacenters = scrape_datacenter_details(all_urls)
# took me ~140m to run

Processing: https://datacenters.com/microsoft-azure-east-us-virginia
Processing: https://datacenters.com/equinix-dc2-ashburn
Processing: https://datacenters.com/google-mayes-county-ok
Processing: https://datacenters.com/equinix-ny4-secaucus
Processing: https://datacenters.com/microsoft-azure-des-moines
Processing: https://datacenters.com/microsoft-azure-west-us-2-washington
Processing: https://datacenters.com/microsoft-azure-east-us-2-virginia
Processing: https://datacenters.com/qts-richmond
Processing: https://datacenters.com/amazon-aws-pdx4-boardman-or
Processing: https://datacenters.com/google-midlothian-tx
Processing: https://datacenters.com/microsoft-azure-south-central-us-texas
Processing: https://datacenters.com/equinix-mi1-miami
Processing: https://datacenters.com/microsoft-azure-north-central-us-illinois
Processing: https://datacenters.com/apple-inc-maiden
Processing: https://datacenters.com/switch-switch-core-campus
Heading not found using /html/body/div[1]/div[4]/div/div[1]/

In [77]:
datacenters.to_csv('datacenter_map_scraped.csv')

In [3]:
from pathlib import Path
import pandas as pd


In [5]:
datacenters = pd.read_csv('datacenter_map_scraped.csv')

In [8]:
datacenters = datacenters.drop('Unnamed: 0', axis=1)

In [17]:
data_folder = Path.cwd().parent / 'data' / 'raw'
file_path = data_folder / 'datacenter_map_scraped.csv'

datacenters.to_csv(file_path)