### Import required packages
This notebook requires pandas and selenium packages to run

In [15]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By

### Create webdriver

In [3]:
#Initialize the Chrome driver with ChromeDriverManager
driver_service = Service(ChromeDriverManager().install())
options = webdriver.ChromeOptions()
#Add any additional options you need, for example:
#options.add_argument('--headless')  # Run Chrome in headless mode
#options.add_argument('--disable-gpu')  # Disable GPU acceleration
driver = webdriver.Chrome(service=driver_service, options=options)

### Scrape book and character data from Tolkien wiki

In [4]:
#Go to wiki page for Tolkien universe
page_url = 'https://lotr.fandom.com/wiki/Category:Characters'
driver.get(page_url)

In [5]:
#Store each book wiki page to a unique variable
#Searching page_url by link text
hobbit_page = driver.find_element(By.LINK_TEXT, 'Category:The Hobbit characters')
lotr_page = driver.find_element(By.LINK_TEXT, 'Category:The Lord of the Rings characters')
silmarillion_page = driver.find_element(By.LINK_TEXT, 'Category:The Silmarillion characters')

In [6]:
#Store book character pages in a list for later looping
book_pages =[hobbit_page, lotr_page, silmarillion_page]

In [7]:
#Store meta data for the book character pages to a list of dictionaries
books=[]
for book in book_pages:
    book_url = book.get_attribute('href')
    book_name = book.text
    books.append({'book_name': book_name, 'url': book_url})

In [8]:
books

[{'book_name': 'Category:The Hobbit characters',
  'url': 'https://lotr.fandom.com/wiki/Category:The_Hobbit_characters'},
 {'book_name': 'Category:The Lord of the Rings characters',
  'url': 'https://lotr.fandom.com/wiki/Category:The_Lord_of_the_Rings_characters'},
 {'book_name': 'Category:The Silmarillion characters',
  'url': 'https://lotr.fandom.com/wiki/Category:The_Silmarillion_characters'}]

In [9]:
#Store characters from each book to a list of dictionaries
characters_list = []


for book in books:
    driver.get(book['url'])
    character_elems = driver.find_elements(By.CLASS_NAME, 'category-page__member-link')
    for elem in character_elems:
        characters_list.append({'book': book['book_name'], 'character': elem.text})

In [10]:
#Close driver
driver.close()

In [11]:
#Quit driver
driver.quit()

In [13]:
characters_list

[{'book': 'Category:The Hobbit characters', 'character': 'Bilbo Baggins'},
 {'book': 'Category:The Hobbit characters', 'character': 'Balin'},
 {'book': 'Category:The Hobbit characters', 'character': 'Bard'},
 {'book': 'Category:The Hobbit characters', 'character': 'Beorn'},
 {'book': 'Category:The Hobbit characters', 'character': 'Bifur'},
 {'book': 'Category:The Hobbit characters', 'character': 'Bladorthin'},
 {'book': 'Category:The Hobbit characters', 'character': 'Bofur'},
 {'book': 'Category:The Hobbit characters', 'character': 'Bolg'},
 {'book': 'Category:The Hobbit characters', 'character': 'Bombur'},
 {'book': 'Category:The Hobbit characters',
  'character': 'Captain of the guard'},
 {'book': 'Category:The Hobbit characters', 'character': 'Carc'},
 {'book': 'Category:The Hobbit characters',
  'character': 'Category:Characters that have appeared in the Hobbit and the Lord of the Rings'},
 {'book': 'Category:The Hobbit characters',
  'character': 'Chief of the guards (Woodland Rea

In [17]:
#Store character_list to a pandas dataframe
df = pd.DataFrame(characters_list)

In [18]:
#Save df to a pickle file for later use
df.to_pickle('characters.pkl')