In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

#Set options to run Chrome in 'Headless' mode
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1920x1080")


Python-dotenv could not parse statement starting at line 1


In [2]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=chrome_options)




In [3]:
url = "https://dayz.fandom.com/wiki/Ammunition"
driver.get(url)

This creates a dictionary of lists containing all the image names and image URLs found on the webpage.

If the image name has an extension of `.png`, then it is pulled into the dictionary.

If the image has an associated link, then it is pulled into the dictionary.

In [4]:
def get_image_info(driver):
    """
    This function retrieves all images from a specified webdriver.
    
    Parameters
    ----------
    driver : WebDriver
        Google webdriver at a predetermined URL.

    Returns
    -------
    image_dict : `dict`
        Dictionary containing image information such as the name of the image 
        and the link to the image.    
    """

    image_dict = {"name":[],"image_link":[]}
    list_of_images = driver.find_elements(By.TAG_NAME,"img")
    for images in list_of_images:
        #Get Image Name
        if '.png' in images.get_attribute("alt"):
            image_dict['name'].append(images.get_attribute("alt"))
        #Get Image Link
        if images.get_attribute("data-src") == None:
            continue
        else:
            image_dict['image_link'].append(images.get_attribute("data-src"))
    
    return image_dict

In [5]:
image_dict = get_image_info(driver)

Now that we have the image names and image links, we must match the image names to the image links.

To do this, we need to search each link for the given image name.

However, there are a few things we need to address and fix:

    1. The image link contains the image name, with the exception that the name now has underscores vs. spaces.
    2. The image size is not consistent (preference is 250 pixels).

In [6]:
image_dict['adjusted_name'] = [name.replace(' ','_') for name in image_dict['name']]
print(f'    Original Name: {image_dict["name"][2]}\n    Adjusted Name: {image_dict["adjusted_name"][2]}')

    Original Name: AmmoBox 22 50Rnd.png
    Adjusted Name: AmmoBox_22_50Rnd.png


In [7]:
def link_resize(image_link_list, size = '250'):
    """
    Function to resize an image given it's URL from the DayZ Wiki.

    Parameters
    ----------
    image_link_list : `list`
        A list containing url links to an image on the DayZ Wiki.

    size: `string`, optional
        Value of the size of the new image.
        Default resize value is 250 pixels.

    Returns
    -------
    new_link_list : `list`
        A list containing url links to an image on the DayZ Wiki
        adjusted by the `size` parameter.
    """

    new_link_list = []

    for links in image_link_list:
        #Focus on section of link that needs to be changed
        string_index = links.find('down/')
        end_of_link = links[string_index+5:]
        #Split link by character to isolate picture size value
        temp_list = end_of_link.split('?')
        #Set new size to 250 pixels
        temp_list[0] = size
        #Combine new size with remaining portion of the image link
        new_link_end = temp_list[0] + '?' + temp_list[1]
        #Replace the old size with new size
        new_link = links.replace(end_of_link,new_link_end)
        new_link_list.append(new_link)

    return new_link_list

In [8]:
image_dict['adjusted_links'] = link_resize(image_dict['image_link'])

Now we will match on the adjusted name and append the actual item name and image link to the dictionary.

In [9]:
def match_name_to_link(image_dict):
    """
    Used to match image names to their corresponding updated links in
    a given image dictionary.

    Parameters
    ----------
    image_dict : `dict`
        Dictionary containing image information such as the name of the image 
        and the link to the image.

    Returns
    -------
    final_pairing: `dict`
        Dictionary containing image name and the corresponding updated link.
    """
    final_pair = {"item_Name":[],"image_link":[]}
    for names,adjusted_names in zip(image_dict['name'],image_dict['adjusted_name']):
        for links in image_dict['adjusted_links']:
            if adjusted_names in links:
                final_pair['item_Name'].append(names)
                final_pair['image_link'].append(links)

    return final_pair

In [10]:
final_pair = match_name_to_link(image_dict)
ammunition_page = pd.DataFrame(final_pair)

Use this space to grab additional images from other pages on the DayZ Wiki.

In [11]:
def get_stuff(driver):
    image_dict = get_image_info(driver)
    #Adjust image names
    image_dict['adjusted_name'] = [name.replace(' ','_') for name in image_dict['name']]
    #Resize images
    image_dict['adjusted_links'] = link_resize(image_dict['image_link'])
    #Rematch image names to thier links
    final_pair = match_name_to_link(image_dict)
    return final_pair

In [12]:

driver.get("https://dayz.fandom.com/wiki/Attachments")
attachment_page = pd.DataFrame(get_stuff(driver))

In [13]:
driver.get('https://dayz.fandom.com/wiki/Magazines')
magazine_page = pd.DataFrame(get_stuff(driver))

In [14]:
driver.get('https://dayz.fandom.com/wiki/Weapons')
weapons_page = pd.DataFrame(get_stuff(driver))

In [15]:
driver.get('https://dayz.fandom.com/wiki/Resources')
resource_page = pd.DataFrame(get_stuff(driver))

In [16]:
driver.get('https://dayz.fandom.com/wiki/Equipment')
equipment_page = pd.DataFrame(get_stuff(driver))

In [17]:
driver.get('https://dayz.fandom.com/wiki/Clothing')
clothing_info = pd.DataFrame(get_stuff(driver))

In [18]:
driver.get('https://dayz.fandom.com/wiki/Medical_Supplies')
medical_stuff = pd.DataFrame(get_stuff(driver))

In [19]:
driver.get('https://dayz.fandom.com/wiki/Backpack')
backpack_info = pd.DataFrame(get_stuff(driver))

In [20]:
driver.get('https://dayz.fandom.com/wiki/Food_and_Drink')
food_and_drink = pd.DataFrame(get_stuff(driver))

In [21]:
all_items_and_links = pd.concat([
    ammunition_page,attachment_page,backpack_info,clothing_info,
    equipment_page,food_and_drink,magazine_page,medical_stuff,
    resource_page,weapons_page]).drop_duplicates().reset_index(drop=True)

In [22]:
all_items_and_links

Unnamed: 0,item_Name,image_link
0,Sporter 22 Wood.png,https://static.wikia.nocookie.net/dayz_gameped...
1,15rd Sporter 22 Mag.png,https://static.wikia.nocookie.net/dayz_gameped...
2,30 rnd 22 mag.png,https://static.wikia.nocookie.net/dayz_gameped...
3,Ammo 380.png,https://static.wikia.nocookie.net/dayz_gameped...
4,AmmoBox 380 35Rnd.png,https://static.wikia.nocookie.net/dayz_gameped...
...,...,...
9120,Plastic Explosive.png,https://static.wikia.nocookie.net/dayz_gameped...
9121,Improvised Explosive.png,https://static.wikia.nocookie.net/dayz_gameped...
9122,LandMine.png,https://static.wikia.nocookie.net/dayz_gameped...
9123,RGD-5 Grenade.png,https://static.wikia.nocookie.net/dayz_gameped...
