# Projet Web Scrapping

In [None]:
!pip install bs4
!pip install selenium

In [None]:
# Other libs possible to use
#pip install requests
#pip install jyquickhelper
#pip install urllib3
#pip install selenium
#pip install scrapy

In [1]:
# Import packages for Beautiful Soup
import urllib
import bs4
import requests
from urllib import request
from bs4 import BeautifulSoup
import json

In [2]:
# importing packages for selenium
from selenium import webdriver
from selenium.webdriver.common.by import By

In [3]:
# Global var
Steam = "https://store.steampowered.com/search/?term="
IG = "https://www.instant-gaming.com/fr/rechercher/?gametype=games&query="
G2A = "https://www.g2a.com/fr/category/gaming-c1?f[product-kind][0]=10&query="
EpicGames = "https://store.epicgames.com/en-US/browse?q="
GOG = "https://www.gog.com/fr/games?query="

In [4]:
#Function which return a Json object
def ReturnJsonElem(title =  None, picture_url = None, price = None, opinion = None ):
    # Create a dictionary with the information
    game_info = {
        'title': title,
        'picture_url': picture_url,
        'price': price,
        'opinion' : opinion
    }

    # Convert the dictionary to a JSON object
    game_info_json = json.dumps(game_info)

    return game_info_json

Now we scrap the search pages of all platforms

In [25]:
# Steam
def ScrappingSteam(userSearch):
    # Send a GET request to the Steam search page for FIFA 22
    url = str(Steam + userSearch)
    response = requests.get(url)

    # Parse the HTML content of the page
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the first result on the page
    result = soup.find('a', class_='search_result_row')
    
    # Find the opinion of the game
    opinion_element = soup.find("span", {"class": "search_review_summary positive"})

    # Extract the title, picture, and price from the result
    try:
        title = result.find('span', class_='title').text
        picture_url = result.img['src']
        price = result.find('div', class_='search_price').text.replace(' ','')
        opinion = opinion_element["data-tooltip-html"]
    except (IndexError, AttributeError):
        title='0';
        picture_url='0';
        price='0';
        opinion='0';
    except ConnexionError:
        title='???'
        picture_url='0'
        price='???'
        opinion='???'
    # Return the informations
    return ReturnJsonElem(title, picture_url, price, opinion)

In [None]:
# Instant Gaming
def ScrappingIG(userSearch):
    IG_url = str(IG + userSearch.replace(" ","+"))
    request_text = requests.get(IG_url).content
    htmlpage = bs4.BeautifulSoup(request_text, "html")
    price = htmlpage.find_all(class_ = "price")[1].text
    title = htmlpage.find('span', class_='title').text
    
    # Return the informations
    return ReturnJsonElem(title, picture_url, price, opinion)

In [26]:
# Instant Gaming
def ScrappingIG(userSearch):
    # scraping
    driver = webdriver.Chrome()
    IG_url = str(IG + userSearch.replace(" ","%20"))
    driver.get(IG_url)
    # we select the correct data 
    try:
        title = driver.find_elements(By.CLASS_NAME, "title")[4].get_attribute('innerHTML');
        picture_url = driver.find_elements(By.CLASS_NAME, "picture")[0].get_attribute('src');
        price = driver.find_elements(By.CLASS_NAME, "price")[1].get_attribute('innerHTML');
        opinion='0';
    except IndexError:
        title='0';
        picture_url='0';
        price='0';
        opinion='0';
    except ConnexionError:
        title='???'
        picture_url='0'
        price='???'
        opinion='???'
    # Closes the current window
    # driver.close()
    
    # Return the informations
    return ReturnJsonElem(title, picture_url, price, opinion)

In [None]:
# G2A
def ScrappingG2A(userSearch):
    # scraping
    driver = webdriver.Chrome()
    driver.get(G2A + userSearch.replace(" ","+"))

    # we select the correct data
    content = driver.find_element(By.CLASS_NAME, "hMxmQl").get_attribute('innerHTML');
    price=str(content.split(">")[3] + content.split(">")[1][0])
    print(price)

    # Closes the current window
    driver.close()
    
    # Return the informations
    return ReturnJsonElem(title, picture_url, price, opinion)

'''
from selenium import webdriver
from selenium.webdriver.common.by import By

# Set the URL you want to scrape
url = "https://www.g2a.com/fr/search?query="

# Create a new Chrome browser and navigate to the URL
driver = webdriver.Chrome()
driver.get(url)

# Find the title of the game
title = driver.find_element(By.XPATH, '//h3[@class="card-title"]').text

# Find the price of the game
price = driver.find_element(By.XPATH, '//span[@class="price"]').text

# Find the picture of the game
picture = driver.find_element(By.XPATH, '//img[@class="card-img-top"]').get_attribute('src')

# Find the opinion of the game
opinion = driver.find_element(By.XPATH, '//div[@class="card-text"]').text

# Print the results
print(title)
print(price)
print(picture)
print(opinion)

# Close the browser
driver.quit()
'''

In [28]:
#Epicgames
def ScrappingEpics(userSearch):
    # scraping
    driver = webdriver.Chrome()
    EP_url = str(EpicGames + userSearch.replace(" ","%20")+'&sortBy=relevancy')
    driver.get(EP_url)
    #xpath='/html/body/div[1]/div/div[4]/main/div[2]/div/div/div/div/section/div/section/div/section/section/ul/li/div/div/a/div/div/div[2]/div[3]/div/div/div/div/span'

    # we select the correct data 
    try:
        title = driver.find_elements(By.CLASS_NAME, "css-rgqwpc")[0].get_attribute('innerHTML');
        picture_url = driver.find_elements(By.CLASS_NAME, "css-174g26k")[0].get_attribute('src');
        price = driver.find_elements(By.CLASS_NAME, "css-119zqif")[5].get_attribute('innerHTML');
        opinion='0';
    except IndexError:
        title='0';
        picture_url='0';
        price='0';
        opinion='0';
    except ConnexionError:
        title='???'
        picture_url='0'
        price='???'
        opinion='???'
    # Closes the current window
    # driver.close()
    
    # Return the informations
    return ReturnJsonElem(title, picture_url, price, opinion)

In [27]:
# GOG
def ScrappingGOG(userSearch):
    GOG_url = str(GOG + userSearch.replace(" ","%20"))
    request_text = request.urlopen(GOG_url).read()
    htmlpage = bs4.BeautifulSoup(request_text, "html")
    try:
        title = htmlpage.find_all(class_="product-tile__title")[0].get('title')
        picture_url='0'
        price = htmlpage.find_all(class_ = "final-value")[0].text
        opinion='0'
    except IndexError:
        title='0';
        picture_url='0';
        price='0';
        opinion='0';
    except ConnexionError:
        title='???'
        picture_url='0'
        price='???'
        opinion='???'
        
    # Return the informations
    return ReturnJsonElem(title, picture_url, price, opinion)

# Result 

In [17]:

# We define the game at find in the page
userSearch = "ixion"

# Steam
# print(ScrappingSteam(userSearch))

# IG
print(ScrappingIG(userSearch))

# G2A
# print(ScrappingG2A(userSearch))

# Epic Game
# print(ScrappingEpics(userSearch))

# GOG
# print(ScrappingGOG(userSearch))

{"title": "Ixion", "picture_url": "https://s3.gaming-cdn.com/images/products/9086/380x218/ixion-pc-jeu-steam-europe-cover.jpg?v=1670438249", "price": "22.99\u20ac", "opinion": "0"}


In [18]:
def APIgames(userSearch):
    table=[]
    # Steam
    table.append(ScrappingSteam(userSearch))

    #IG
    table.append(ScrappingIG(userSearch))

    # G2A
#     table.append(ScrappingG2A(userSearch))

    # Epic Game
    table.append(ScrappingEpics(userSearch))

    #GOG
    table.append(ScrappingGOG(userSearch))
    
    return table    

In [29]:
APIgames('fifa 23')

ConnectionError: ('Connection aborted.', TimeoutError(10060, 'Une tentative de connexion a échoué car le parti connecté n’a pas répondu convenablement au-delà d’une certaine durée ou une connexion établie a échoué car l’hôte de connexion n’a pas répondu', None, 10060, None))