# Get study area counties in Texas
### 1. Download all counties in Texas by the dataset from Wikipedia: [List of counties in Texas](https://en.wikipedia.org/wiki/List_of_counties_in_Texas).

In [2]:
import pandas as pd

# scrape and collect the county-level data on Wikipedia
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_counties_in_Texas')[1]
df = df.iloc[:,:1]
df.columns = ['county']
df.to_csv('data/texas-counties.csv', index=False)

### 2. Get adjacent counties by the description from Wikipedia's each county page.

In [3]:
def get_adj(county):
    """Input the county name e.g. Anderson County"""

    import requests
    from bs4 import BeautifulSoup

    county = county.replace(" ","_")
    response = requests.get("https://en.wikipedia.org/wiki/"+county+",_Texas")
    soup = BeautifulSoup(response.text, "lxml")
    links = (
        soup
        .select_one("#Adjacent_counties,#Adjacent_counties_and_municipalities,#Adjacent_counties_and_parish,#Adjacent_counties_and_parishes")
        .parent
        .find_next_sibling(["div","ul"])
        .find_all("a", href=True)
    )
    adj = []
    for link in links:
        if 'Texas' in link["title"]:
            adj.append(link["title"][:-7])

    return sorted(adj)

Test the function to get the adjacent counties

In [4]:
print(get_adj('Delta County'))

['Fannin County', 'Franklin County', 'Hopkins County', 'Hunt County', 'Lamar County', 'Red River County']


### 3. Call the get_adj() function for all adjacent counties

In [5]:
def get_adj2(county):
    """Input the county name e.g. Anderson County"""

    adj_counties = get_adj(county)
    adj2 = get_adj(county)
    for adj in adj_counties:
        adj_adj_counties = get_adj(adj)
        for adj_adj in adj_adj_counties:
            if adj_adj not in adj2 and adj_adj != county:
                adj2.append(adj_adj)

    return sorted(adj2)

Test the function to get the study area counties

In [6]:
print(get_adj2('Delta County'))

['Bowie County', 'Camp County', 'Collin County', 'Fannin County', 'Franklin County', 'Grayson County', 'Hopkins County', 'Hunt County', 'Kaufman County', 'Lamar County', 'Morris County', 'Rains County', 'Red River County', 'Rockwall County', 'Titus County', 'Van Zandt County', 'Wood County']


### 4. Modify the website, change the color and update the map.

In [None]:
county = 'Delta County'

from bs4 import BeautifulSoup
import re

county_s = county[:-7]
with open("data/texas_color_map.html") as fp:
    soup = BeautifulSoup(fp, 'html.parser')

old_text = soup.find('textarea')
old_text.string = "\n"+" \n".join(old_text.string.split())+" "
# print(old_text.string)
old_text.string = re.sub(f"\n{county_s} ",f"\n{county_s} "+'1', old_text.string)
for adj in get_adj2(county):
    adj_s = adj[:-7]
    adj_s = adj_s.replace(" ","_")
    old_text.string = re.sub(f"\n{adj_s} ",f"\n{adj_s} "+'2', old_text.string)
# print(old_text.string)
with open("data/study_area.html", "wb") as f_output:
    f_output.write(soup.prettify("utf-8"))

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
import os
driver = webdriver.Chrome()
filename = 'file:///'+os.getcwd()+'/' + 'data/study_area.html'
driver.get(filename)
driver.find_element(By.ID,"update").click()

### 5. Write section 4 as a function that update the map and download the pdf.

In [7]:
def plot_study_area(county):

    import re
    import time
    import glob
    import os
    from bs4 import BeautifulSoup
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC

    county_s = county[:-7]
    county_s = county_s.replace(" ","_")
    with open("data/texas_color_map.html") as fp:
        soup = BeautifulSoup(fp, 'html.parser')

    old_text = soup.find('textarea')
    old_text.string = '\n'+old_text.string
    old_text.string = "\n"+" \n".join(old_text.string.split())+" "
    old_text.string = re.sub(f"\n{county_s} ",f"\n{county_s} "+'1', old_text.string)
    for adj in get_adj2(county):
        adj_s = adj[:-7]
        adj_s = adj_s.replace(" ","_")
        old_text.string = re.sub(f"\n{adj_s} ",f"\n{adj_s} "+'2', old_text.string)

    with open("data/study_area.html", "wb") as f_output:
        f_output.write(soup.prettify("utf-8"))

    download_path = "/Users/arthurli/Documents/Python code/download map"
    options = webdriver.ChromeOptions()
    prefs = {"download.default_directory" : download_path}
    options.add_experimental_option("prefs",prefs)
    options.add_argument("--disable-popup-blocking")
    driver = webdriver.Chrome(options=options)

    filename = 'file:///'+os.getcwd()+'/' + 'data/study_area.html'
    try:
        map_size = 0
        new_pdf = download_path+f'/{county} study area.pdf'
        while map_size < 300*1024:
            driver.get(filename)
            driver.find_element(By.ID,"update").click()
            download= driver.find_element(By.ID,"capture_pdf")
            driver.execute_script("arguments[0].scrollIntoView();", download)
            popup = WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.ID,"capture_pdf")))
            if os.path.exists(new_pdf):
                os.remove(new_pdf)
            download.click()
            time.sleep(5)
            list_of_files = glob.glob(download_path+"/*.pdf")
            downloaded_pdf = max(list_of_files, key=os.path.getctime)
            
            os.rename(downloaded_pdf,new_pdf)
            map_size = os.path.getsize(new_pdf)

        driver.close()
    except:
        print("Error occurs, please debug!")


In [None]:
plot_study_area(county='Delta County')