# Listing Available Game Reviews

### Library Management

In [30]:
%%capture
!pip install selenium
!pip install selenium webdriver-manager 

In [31]:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
from selenium import webdriver
import time
import pandas as pd

### Process

In [32]:
def remove_element(element):
    driver.execute_script("""
        var element = arguments[0];
        element.parentNode.removeChild(element);
        """, 
        element
    )

In [41]:
def extract_active_sections():
    game_titles         = []
    game_review_urls    = []

    review_sections = driver.find_elements(By.CLASS_NAME, "content-feed-grid-wrapper")
    #print(f"{len(review_sections)} non extracted sections")
    
    for section in review_sections:
        # Extract Section Data
        reviews = section.find_elements(By.CLASS_NAME, "content-item")
        for review in reviews:
            title = review.find_elements(By.CLASS_NAME, "item-title")
            item_body = review.find_elements(By.CLASS_NAME, "item-body")[0]
            url = item_body.get_attribute("href")

            game_titles.append(title[0].text)
            game_review_urls.append(url)
        
        # Removing extracted Section
        remove_element(section)

    return pd.DataFrame(data={
        'title' : game_titles,
        'url'   : game_review_urls
    })

In [None]:
from tqdm import tqdm

TIME_STEP = 0.5

with tqdm() as pbar:
    driver = webdriver.Firefox()
    driver.get("https://www.ign.com/reviews/games")

    sort_options = driver.find_elements(By.ID, 'sortBy')[0]
    select = Select(sort_options)
    select.select_by_index(1)

    last_height = driver.execute_script("return document.body.scrollHeight")

    all_reviews = pd.DataFrame()
    while True:
        driver.execute_script(
            "window.scrollTo(0, document.body.scrollHeight);"
        )

        # Espera até ter novos elementos
        tempo_esperado = 0
        while len(driver.find_elements(By.CLASS_NAME, "content-feed-grid-wrapper")) == 0:
            time.sleep(TIME_STEP)
            tempo_esperado += TIME_STEP

            if tempo_esperado > 10:
                break

        reviews = extract_active_sections()
        all_reviews = pd.concat([all_reviews, reviews])
        all_reviews.to_csv("review_list.csv", header=True, index=False)

        new_height = driver.execute_script(
            "return document.body.scrollHeight"
        )
        
        last_height = new_height

        pbar.update(reviews.shape[0])