In [2]:
# Install Playwright (if not already installed)
!pip install playwright

# Install the browsers
!playwright install firefox




In [9]:
import nest_asyncio
nest_asyncio.apply()


In [10]:
from playwright.async_api import async_playwright

async def open_site():
    global playwright, browser, context, page

    playwright = await async_playwright().start()
    browser = await playwright.firefox.launch(headless=False)
    context = await browser.new_context()
    page = await context.new_page()

    print("Opening page…")
    await page.goto("https://www.dmr.nd.gov/oilgas/findwellsvw.asp", wait_until="networkidle")
    print("Page loaded. Browser should be visible.")

await open_site()


Opening page…
Page loaded. Browser should be visible.


In [15]:
import pandas as pd
from tqdm.notebook import tqdm

all_rows = []
townships = list(range(129, 165))  # 129 to 164 inclusive

for ts in tqdm(townships, desc="Scraping townships"):
    # Open a fresh page for each township
    page2 = await context.new_page()
    await page2.goto("https://www.dmr.nd.gov/oilgas/findwellsvw.asp", wait_until="networkidle")

    print(f"Selecting township {ts}…")
    await page2.select_option("#ddmTownship", str(ts))
    await page2.click("input[name='B1']")

    await page2.wait_for_load_state("networkidle")

    table = await page2.query_selector("table[summary='Well Log search results table']")
    if table is None:
        print(f"No table found for township {ts}, skipping.")
        await page2.close()
        continue

    # Get headers once
    if 'headers' not in locals():
        header_elements = await table.query_selector_all("th")
        headers = [await th.inner_text() for th in header_elements]
        print("Column headers:", headers)

    # Get rows
    row_elements = await table.query_selector_all("tr")
    for tr in row_elements[1:]:
        cell_elements = await tr.query_selector_all("td")
        row = [await td.inner_text() for td in cell_elements]
        all_rows.append(row)

    print(f"Township {ts} done. Collected {len(row_elements)-1} rows.")
    await page2.close()  # close page to avoid target closed issues

print("\nScraping complete. Total rows collected:", len(all_rows))

# Convert to DataFrame
df = pd.DataFrame(all_rows, columns=headers)
df.head()


Scraping townships:   0%|          | 0/36 [00:00<?, ?it/s]

Selecting township 129…
Column headers: ['File No', 'CTB No', 'API No', 'Well Type', 'Well Status', 'Status Date', 'DTD', 'Location', 'Operator', 'Well Name', 'Field']
Township 129 done. Collected 378 rows.
Selecting township 130…
Township 130 done. Collected 599 rows.
Selecting township 131…
Township 131 done. Collected 474 rows.
Selecting township 132…
Township 132 done. Collected 215 rows.
Selecting township 133…
Township 133 done. Collected 166 rows.
Selecting township 134…
Township 134 done. Collected 124 rows.
Selecting township 135…
Township 135 done. Collected 103 rows.
Selecting township 136…
Township 136 done. Collected 98 rows.
Selecting township 137…
Township 137 done. Collected 118 rows.
Selecting township 138…
Township 138 done. Collected 215 rows.
Selecting township 139…
Township 139 done. Collected 631 rows.
Selecting township 140…
Township 140 done. Collected 683 rows.
Selecting township 141…
Township 141 done. Collected 475 rows.
Selecting township 142…
Township 142 d

Unnamed: 0,File No,CTB No,API No,Well Type,Well Status,Status Date,DTD,Location,Operator,Well Name,Field
0,3065,,3308100001,OG,DRY,1/1/1962,1214,NWNW 9-129-58,SARGENT MINERAL CORP.,LAMPORT 1,WILDCAT
1,99025,,3302199025,ST,DRY,9/30/1963,1600,NENE 10-129-63,JACK ROUSE OIL PROPERTIES,HAROLD BILLIE 1,WILDCAT
2,515,,3302100001,OG,DRY,11/17/1954,1882,SENW 11-129-63,H. HANSON OIL SYNDICATE,HAROLD BILLEY 1,WILDCAT
3,572,,3302100002,OG,DRY,6/17/1954,1860,NENW 14-129-63,H. HANSON OIL SYNDICATE,JOHN BELL 1,WILDCAT
4,1394,,3302100005,OG,DRY,3/12/1957,3166,NWNW 22-129-66,"CALVERT DRILLING, INC.",MARVIN KAMM 1,WILDCAT


In [16]:
# Save the DataFrame to CSV
csv_filename = "north_dakota_wells.csv"
df.to_csv(csv_filename, index=False)

print(f"Data saved to {csv_filename}")


Data saved to north_dakota_wells.csv
