In [49]:
# importing Packages

from bs4 import BeautifulSoup
from selenium import webdriver
import time
import pandas as pd
import requests

Using Selenium and Beautiful Soup to extract various games links from the website

In [50]:
# Initialize WebDriver
driver = webdriver.Chrome()  # You should replace this with the appropriate web driver you're using (Chrome, Firefox, etc.)

# Open the URL of the webpage
url = "https://rawg.io/games"
driver.get(url)

# Automatically scroll the page
scroll_pause_time = 2  # Pause between each scroll
screen_height = driver.execute_script("return window.screen.height;")  # Browser window height

for i in range(350):
    # Scroll down
    driver.execute_script(f"window.scrollTo(0, {screen_height * i});")
    i += 1
    time.sleep(scroll_pause_time)

# Fetch the data using BeautifulSoup after all data is loaded
soup = BeautifulSoup(driver.page_source, "html.parser")
# Process and save the data as needed

# Close the WebDriver session
driver.quit()

In [51]:
# storing links to use
links = soup.find_all("a", attrs = {"class":"game-card-medium__info__name"})

In [52]:
links

[<a class="game-card-medium__info__name" href="/games/grand-theft-auto-v">Grand Theft Auto V<div class="rating rating_emoji rating_exceptional game-card-medium__info__rating" role="button" tabindex="0" title="exceptional"></div></a>,
 <a class="game-card-medium__info__name" href="/games/tomb-raider">Tomb Raider (2013)<div class="rating rating_emoji rating_recommended game-card-medium__info__rating" role="button" tabindex="0" title="recommended"></div></a>,
 <a class="game-card-medium__info__name" href="/games/bioshock-infinite">BioShock Infinite<div class="rating rating_emoji rating_exceptional game-card-medium__info__rating" role="button" tabindex="0" title="exceptional"></div></a>,
 <a class="game-card-medium__info__name" href="/games/half-life-2">Half-Life 2<div class="rating rating_emoji rating_exceptional game-card-medium__info__rating" role="button" tabindex="0" title="exceptional"></div></a>,
 <a class="game-card-medium__info__name" href="/games/god-of-war-2">God of War (2018)<d

In [53]:
# Testing links list
links[0]

<a class="game-card-medium__info__name" href="/games/grand-theft-auto-v">Grand Theft Auto V<div class="rating rating_emoji rating_exceptional game-card-medium__info__rating" role="button" tabindex="0" title="exceptional"></div></a>

In [54]:
link_1 = links[0].get('href')

In [56]:
# Creating a test link to test out extraction methods before making a function for them
game_link = "https://rawg.io"+link_1

In [57]:
game_link

'https://rawg.io/games/grand-theft-auto-v'

In [58]:
headers = ({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36', 'Accept-Language' : 'en-US, en;q=0.5'})

In [59]:
game_page = requests.get(game_link, headers = headers)

In [60]:
game_page

<Response [200]>

In [61]:
game_soup = BeautifulSoup(game_page.content, 'html.parser')

In [62]:
# Testing Getting Title
game_soup.find("h1", attrs = {'class': 'heading heading_1 game__title'}).text.strip()

'Grand Theft Auto V'

In [63]:
# Funtion to return title

def get_title(soup):
    try:
        title = soup.find("h1", attrs = {'class': 'heading heading_1 game__title'}).text.strip()
    except AttributeError:
        title = ''
    return(title)

In [64]:
# Testing getting platforms
game_soup.find("div", attrs = {'class': 'game__meta-text'}).text.strip()

'PlayStation 5, Xbox Series S/X, PC, PlayStation 4, PlayStation 3, Xbox 360, Xbox One'

In [65]:
# Function to get platforms

def get_platform(soup):
    try:
        platform = soup.find("div", attrs = {'class': 'game__meta-text'}).text.strip()
    except AttributeError:
        platform = ''
    return (platform)

In [66]:
# Testing getting release date
game_soup.find("div", attrs = {'class': 'game__meta-text', 'itemprop' : 'datePublished'}).text.strip()

'Sep 17, 2013'

In [67]:
# Function to get release date

def get_release_date(soup):
    try:
        date = soup.find("div", attrs = {'class': 'game__meta-text', 'itemprop' : 'datePublished'}).text.strip()
    except AttributeError:
        date = ''
    return (date)

In [68]:
# Testing getting game genre
game_genre = game_soup.find_all("meta", attrs = {'itemprop' : 'genre'})

In [69]:
for i in game_genre:
    print(i.get('content'))

Action
Adventure


In [81]:
# Function to get genres

def get_genres(soup):
    genres = []
    try:
        genres1 = soup.find_all("meta", attrs = {'itemprop' : 'genre'})
        for i in genres1:
            x = i.get('content')
            x = str(x)
            genres.append(x)
        
        return (genres)
    
    except AttributeError:
        pass
    return (genres)

In [85]:
# Testing getting publisher name
game_soup.find("meta", attrs = {'itemprop': 'publisher'}).get('content')

'Rockstar Games'

In [86]:
# Function to get publisher name

def get_publisher(soup):
    try:
        publisher = soup.find("meta", attrs = {'itemprop': 'publisher'}).get('content')
    except AttributeError:
        publisher = ''
    return (publisher)

In [87]:
# Testing getting game description
game_soup.find_all("p")[1].text

'Rockstar Games went bigger, since their previous installment of the series. You get the complicated and realistic world-building from Liberty City of GTA4 in the setting of lively and diverse Los Santos, from an old fan favorite GTA San Andreas. 561 different vehicles (including every transport you can operate) and the amount is rising with every update. \nSimultaneous storytelling from three unique perspectives: \nFollow Michael, ex-criminal living his life of leisure away from the past, Franklin, a kid that seeks the better future, and Trevor, the exact past Michael is trying to run away from. \nGTA Online will provide a lot of additional challenge even for the experienced players, coming fresh from the story mode. Now you will have other players around that can help you just as likely as ruin your mission. Every GTA mechanic up to date can be experienced by players through the unique customizable character, and community content paired with the leveling system tends to keep everyon

In [88]:
game_soup.find("div", attrs = {'itemprop': 'description'}).find("p").text

'Rockstar Games went bigger, since their previous installment of the series. You get the complicated and realistic world-building from Liberty City of GTA4 in the setting of lively and diverse Los Santos, from an old fan favorite GTA San Andreas. 561 different vehicles (including every transport you can operate) and the amount is rising with every update. \nSimultaneous storytelling from three unique perspectives: \nFollow Michael, ex-criminal living his life of leisure away from the past, Franklin, a kid that seeks the better future, and Trevor, the exact past Michael is trying to run away from. \nGTA Online will provide a lot of additional challenge even for the experienced players, coming fresh from the story mode. Now you will have other players around that can help you just as likely as ruin your mission. Every GTA mechanic up to date can be experienced by players through the unique customizable character, and community content paired with the leveling system tends to keep everyon

In [89]:
# Function to get Description

def get_description(soup):
    try:
        description = soup.find("div", attrs = {'itemprop': 'description'}).find("p").text
    except AttributeError:
        description = ''
    return (description)

In [90]:
# Testing getting age rating
test_rating = game_soup.find_all("div", attrs = {'class': 'game__meta-text'})

In [91]:
test_rating[6]

<div class="game__meta-text">17+<!-- --> <!-- -->Mature</div>

In [92]:
# Function to get age rating

def get_age_rating(soup):
    try:
        age_rating_element = soup.find('div', {'class': 'game__meta-title'}, text='Age rating')
        age_rating = age_rating_element.find_next_sibling('div', {'class': 'game__meta-text'}).text
    except AttributeError:
        age_rating = ''
    return (age_rating)

In [93]:
# Testing getting metascore
game_soup.find("div", attrs = {'class': 'metascore-label metascore-label_green'}).text

'92'

In [94]:
# Function to get metascore

def get_metascore(soup):
    try:
        metascore = soup.find("div", attrs = {'class': 'metascore-label metascore-label_green'}).text
    except AttributeError:
        metascore = ''
    return (metascore)

In [95]:
# Creating a dictionary to convert to dataframe later
d = {'Title':[],'Platform':[],'Genres':[],'Release_date':[],'Publisher Name':[], 'Age Rating':[], 'Metascore':[], 'Description':[]}

In [96]:
# Getting the list of all links
links_list = []

for link in links:
    links_list.append(link.get('href'))

In [97]:
# Extraction of information from all links 

for link in links_list:
    webpage = requests.get("https://rawg.io"+link, headers = headers)
    new_soup = BeautifulSoup(webpage.content, "html.parser")
    
    # Applying functions to get values
    d['Title'].append(get_title(new_soup))
    d['Platform'].append(get_platform(new_soup))
    d['Genres'].append(get_genres(new_soup))
    d['Release_date'].append(get_release_date(new_soup))
    d['Publisher Name'].append(get_publisher(new_soup))
    d['Age Rating'].append(get_age_rating(new_soup))
    d['Metascore'].append(get_metascore(new_soup))
    d['Description'].append(get_description(new_soup))
    
    
    games_df = pd.DataFrame.from_dict(d)
    

In [98]:
games_df

Unnamed: 0,Title,Platform,Genres,Release_date,Publisher Name,Age Rating,Metascore,Description
0,Grand Theft Auto V,"PlayStation 5, Xbox Series S/X, PC, PlayStatio...","[Action, Adventure]","Sep 17, 2013",Rockstar Games,17+ Mature,92,"Rockstar Games went bigger, since their previo..."
1,Tomb Raider (2013),"PlayStation 4, macOS, PC, Xbox One, Xbox 360, ...","[Action, Adventure]","Mar 5, 2013",Square Enix,17+ Mature,86,A cinematic revival of the series in its actio...
2,BioShock Infinite,"PlayStation 4, Xbox 360, Nintendo Switch, Linu...","[Action, Shooter]","Mar 26, 2013",2K Games,17+ Mature,94,"The third game in the series, Bioshock takes t..."
3,Half-Life 2,"PC, macOS, Xbox 360, Linux, Xbox, Android","[Action, Shooter]","Nov 16, 2004",Valve,17+ Mature,96,Gordon Freeman became the most popular nameles...
4,God of War (2018),"PC, PlayStation 4","[Action, Adventure, RPG]","Apr 20, 2018",Sony Interactive Entertainment,17+ Mature,94,It is a new beginning for Kratos. Living as a ...
...,...,...,...,...,...,...,...,...
3475,Motorsport Manager,"iOS, PC, Android","[Strategy, Simulation, Sports, Racing]","Aug 21, 2014",SEGA,10+ Everyone 10+,81,
3476,Freedom Force vs. the Third Reich,PC,"[Action, RPG, Strategy]","May 29, 2005",2K Games,13+ Teen,86,Freedom is threatened yet again and only Minut...
3477,Army of Two: The 40th Day,"PlayStation 3, Xbox 360","[Action, Shooter, Adventure]","Jan 8, 2010",Electronic Arts,17+ Mature,84,A carefully orchestrated series of mysterious ...
3478,"Kivi, Toilet and Shotgun",PC,"[Action, Adventure, Casual, Indie]","Jan 15, 2016",Back To Basics Gaming,Not rated,82,


In [99]:
# Droping null values
games_df = games_df.dropna(subset = ['Title'])

In [100]:
games_df

Unnamed: 0,Title,Platform,Genres,Release_date,Publisher Name,Age Rating,Metascore,Description
0,Grand Theft Auto V,"PlayStation 5, Xbox Series S/X, PC, PlayStatio...","[Action, Adventure]","Sep 17, 2013",Rockstar Games,17+ Mature,92,"Rockstar Games went bigger, since their previo..."
1,Tomb Raider (2013),"PlayStation 4, macOS, PC, Xbox One, Xbox 360, ...","[Action, Adventure]","Mar 5, 2013",Square Enix,17+ Mature,86,A cinematic revival of the series in its actio...
2,BioShock Infinite,"PlayStation 4, Xbox 360, Nintendo Switch, Linu...","[Action, Shooter]","Mar 26, 2013",2K Games,17+ Mature,94,"The third game in the series, Bioshock takes t..."
3,Half-Life 2,"PC, macOS, Xbox 360, Linux, Xbox, Android","[Action, Shooter]","Nov 16, 2004",Valve,17+ Mature,96,Gordon Freeman became the most popular nameles...
4,God of War (2018),"PC, PlayStation 4","[Action, Adventure, RPG]","Apr 20, 2018",Sony Interactive Entertainment,17+ Mature,94,It is a new beginning for Kratos. Living as a ...
...,...,...,...,...,...,...,...,...
3475,Motorsport Manager,"iOS, PC, Android","[Strategy, Simulation, Sports, Racing]","Aug 21, 2014",SEGA,10+ Everyone 10+,81,
3476,Freedom Force vs. the Third Reich,PC,"[Action, RPG, Strategy]","May 29, 2005",2K Games,13+ Teen,86,Freedom is threatened yet again and only Minut...
3477,Army of Two: The 40th Day,"PlayStation 3, Xbox 360","[Action, Shooter, Adventure]","Jan 8, 2010",Electronic Arts,17+ Mature,84,A carefully orchestrated series of mysterious ...
3478,"Kivi, Toilet and Shotgun",PC,"[Action, Adventure, Casual, Indie]","Jan 15, 2016",Back To Basics Gaming,Not rated,82,


In [103]:
games_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3480 entries, 0 to 3479
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Title           3480 non-null   object
 1   Platform        3480 non-null   object
 2   Genres          3480 non-null   object
 3   Release_date    3480 non-null   object
 4   Publisher Name  3480 non-null   object
 5   Age Rating      3480 non-null   object
 6   Metascore       3480 non-null   object
 7   Description     3480 non-null   object
dtypes: object(8)
memory usage: 217.6+ KB


In [115]:
# Converting Genres Coulmn to a column containg string values rather than lists

def joining_genres(lis):
    return (", ".join(lis))

In [117]:
games_df['Genres'] = games_df['Genres'].apply(joining_genres)

In [118]:
games_df

Unnamed: 0,Title,Platform,Genres,Release_date,Publisher Name,Age Rating,Metascore,Description
0,Grand Theft Auto V,"PlayStation 5, Xbox Series S/X, PC, PlayStatio...","Action, Adventure","Sep 17, 2013",Rockstar Games,17+ Mature,92,"Rockstar Games went bigger, since their previo..."
1,Tomb Raider (2013),"PlayStation 4, macOS, PC, Xbox One, Xbox 360, ...","Action, Adventure","Mar 5, 2013",Square Enix,17+ Mature,86,A cinematic revival of the series in its actio...
2,BioShock Infinite,"PlayStation 4, Xbox 360, Nintendo Switch, Linu...","Action, Shooter","Mar 26, 2013",2K Games,17+ Mature,94,"The third game in the series, Bioshock takes t..."
3,Half-Life 2,"PC, macOS, Xbox 360, Linux, Xbox, Android","Action, Shooter","Nov 16, 2004",Valve,17+ Mature,96,Gordon Freeman became the most popular nameles...
4,God of War (2018),"PC, PlayStation 4","Action, Adventure, RPG","Apr 20, 2018",Sony Interactive Entertainment,17+ Mature,94,It is a new beginning for Kratos. Living as a ...
...,...,...,...,...,...,...,...,...
3475,Motorsport Manager,"iOS, PC, Android","Strategy, Simulation, Sports, Racing","Aug 21, 2014",SEGA,10+ Everyone 10+,81,
3476,Freedom Force vs. the Third Reich,PC,"Action, RPG, Strategy","May 29, 2005",2K Games,13+ Teen,86,Freedom is threatened yet again and only Minut...
3477,Army of Two: The 40th Day,"PlayStation 3, Xbox 360","Action, Shooter, Adventure","Jan 8, 2010",Electronic Arts,17+ Mature,84,A carefully orchestrated series of mysterious ...
3478,"Kivi, Toilet and Shotgun",PC,"Action, Adventure, Casual, Indie","Jan 15, 2016",Back To Basics Gaming,Not rated,82,


In [119]:
# Exporting games_df as a csv file for further use
games_df.to_csv("games_data.csv", header = True, index = False)