In [23]:
# import stuff
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd

# Downloading website data
# Use user agent to be able to access data
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
# url of website
url = 'https://genshin-impact.fandom.com/wiki/Character/List'
# get html data using beautifulsoup
response = requests.get(url,headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

# the character list is in <tbody>
character_list = soup.find('tbody')

# each character is in <tr>
characters = character_list.findAll('tr')

# the first <tr> is just the columns names from the original website
characters.pop(0)

# create empty lists for the columns
names = []
qualities = []
elements = []
weapons = []
regions = []
model_types = []
release_dates = []
versions = []

# loop through each character
for character in characters:
  # find all character details and put it into a list
  details = character.findAll('td')
  # label each detail (just so it's easier to read later)
  # for context, details[0] is for the character icon, which isn't used here
  name = details[1].text
  quality = details[2].find('img').get('title')
  element_with_icon = details[3].findAll('a')
  # the protagonist doesn't have an element
  if len(element_with_icon) != 0:
    element = element_with_icon[1].text
  else:
    element = details[3].text
  weapon_with_icon = details[4].findAll('a')
  weapon = weapon_with_icon[1].text
  region_with_icon = details[5].findAll('a')
  # a collab character and the protagonist has no region
  # the if statement is for these special cases
  if len(region_with_icon) != 0:
    region = region_with_icon[1].text
  else:
    region = details[5].text

  # The protagonist has two model types
  character_model_type = details[6].findAll('a')
  if len(character_model_type) != 1:
    model_type = character_model_type[0].text + " (Aether) / " + character_model_type[1].text + " (Lumine)"
  else:
    model_type = character_model_type[0].text
  release_date = details[7].text
  version = details[8].text

  # put the details into their respective lists
  names.append(name)
  qualities.append(quality)
  elements.append(element)
  weapons.append(weapon)
  regions.append(region)
  model_types.append(model_type)
  release_dates.append(release_date)
  versions.append(version)

# create dataframe using the columns and their respective lists
dataframe = pd.DataFrame(
    {
        'Name': names,
        'Quality': qualities,
        'Element': elements,
        'Weapon': weapons,
        'Region': regions,
        'Model type': model_types,
        'Release date': release_dates,
        'Version': versions
    }
)

# convert to csv
dataframe.to_csv('genshin_characters.csv', index=False)

# show dataframe
print(dataframe)




              Name  Quality Element    Weapon     Region     Model type  \
0         Albedo\n  5 Stars     Geo     Sword  Mondstadt    Medium Male   
1      Alhaitham\n  5 Stars  Dendro     Sword     Sumeru      Tall Male   
2           Aloy\n  5 Stars    Cryo       Bow     None\n  Medium Female   
3          Amber\n  4 Stars    Pyro       Bow  Mondstadt  Medium Female   
4   Arataki Itto\n  5 Stars     Geo  Claymore    Inazuma      Tall Male   
..             ...      ...     ...       ...        ...            ...   
66        Yaoyao\n  4 Stars  Dendro   Polearm      Liyue   Short Female   
67         Yelan\n  5 Stars   Hydro       Bow      Liyue    Tall Female   
68       Yoimiya\n  5 Stars    Pyro       Bow    Inazuma  Medium Female   
69       Yun Jin\n  4 Stars     Geo   Polearm      Liyue  Medium Female   
70       Zhongli\n  5 Stars     Geo   Polearm      Liyue      Tall Male   

            Release date Version  
0    December 23, 2020\n   1.2\n  
1     January 18, 2023\n   3.