In [1]:
import dataprocessing.datacolect as dc
import pandas as pd

# 1. Sobre a coleta dos jogos

O objetivo deste *notebook* é obter o nome dos jogos que estão presentes no serviço ***Xbox Game Pass***. Portanto, é necessário extrair essas informações de uma página que tenha essas informações atualizadas constantemente. Isso deve ao fato da natureza do serviço.

O ***Xbox Game Pass*** recebe jogos e outros são retirados de maneira periódica. Os dados serão coletados do portal ***Windows Cetral***.

![Alt text](image.png)

Ademais, é importante obter os jogos para os diferentes segmentos do serviço que hoje se divide em:

> *Game Pass Core.*
>
> *Game Pass Standard.*
>
> *Game Pass Ultimate.*
>
> *PC Game Pass.*

Os 3 primeiros destinados aos consoles e o último apenas para os computadores. 

# 2. Rotina de *scraping*

In [2]:
# Url da página do Windows Central com a lista dos jogos:

url = 'https://www.windowscentral.com/xbox-game-pass-list'

In [3]:
soup = dc.Soup(url)
soup = soup.get_soup()
scrape = dc.Scrape()

In [4]:
type(soup)

bs4.BeautifulSoup

In [5]:
games_soup_list = scrape.find_elements(tag='ul', soup=soup)

In [6]:
def get_games_names(soup, index: int=2):
    if index in [2, 3, 4]:
        games_soup = scrape.find_elements(tag='li', soup=soup[index])
    else:
        raise ValueError('Parâmetro index fora do intervalo. As opções são 2 (console), 3 (PC) e 4 (Cloud).')
    
    games_list = list()

    for game in games_soup:
        if '(👇🏻)' in game.get_text():
            game = game.get_text().replace(' (👇🏻)', '')
            games_list.append(game.upper())
        else:    
            games_list.append(game.get_text().upper())

    return games_list


def games_dataframe(games: list):
    dataframe = pd.DataFrame({'name': games})
    return dataframe

In [7]:
console_games = get_games_names(games_soup_list, index=2)
pc_games = get_games_names(games_soup_list, index=3)
cloud_games = get_games_names(games_soup_list, index=4)

In [8]:
console_games = games_dataframe(console_games)
console_games

Unnamed: 0,name
0,7 DAYS TO DIE
1,A PLAGUE TALE: REQUIEM
2,A WAY OUT
3,AGE OF EMPIRES II: DEFINITIVE EDITION
4,AGE OF EMPIRE IV
...,...
460,YOU SUCK AT PARKING
461,ZOMBIE ARMY 4: DEAD WAR
462,ZOO TYCOON: ULTIMATE ANIMAL COLLECTION
463,ZUMA


In [9]:
pc_games = games_dataframe(pc_games)
pc_games

Unnamed: 0,name
0,7 DAYS TO DIE
1,A PLAGUE TALE: REQUIEM
2,A WAY OUT
3,AGE OF EMPIRES: DEFINITIVE EDITION
4,AGE OF EMPIRES II: DEFINITIVE EDITION
...,...
441,YAKUZA KIWAMI 2
442,YAKUZA: LIKE A DRAGON
443,YOU SUCK AT PARKING
444,ZOMBIE ARMY 4: DEAD WAR


In [10]:
cloud_games = games_dataframe(cloud_games)

In [11]:
df = pd.DataFrame()

In [12]:
list_dataframes = [console_games, pc_games, cloud_games]

In [13]:
for dataframe in list_dataframes:
    df = pd.concat([df, dataframe])
    df = df.drop_duplicates(subset='name')

In [19]:
for name in df['name']:
    print(name)

7 DAYS TO DIE
A PLAGUE TALE: REQUIEM
A WAY OUT
AGE OF EMPIRES II: DEFINITIVE EDITION
AGE OF EMPIRE IV
AIRBORNE KINGDOM
ALAN WAKE'S AMERICAN NIGHTMARE
ALICE: MADNESS RETURNS
AMONG US
AMNESIA: COLLECTION
AMNESIA: REBIRTH
AMNESIA: THE BUNKER
ANTHEM
ANVIL
ARCADE PARADISE
ARK: SURVIVAL EVOLVED EXPLORER'S EDITION
ARK: ULTIMATE SURVIVOR EDITION
AS DUSK FALLS
ASSASSIN’S CREED ORIGINS
ASSASSIN’S CREED ODYSSEY
ASTRONEER
ATOMIC HEART
BACK 4 BLOOD
BANJO-KAZOOIE: NUTS & BOLTS
BANJO-KAZOOIE
BANJO-TOOIE
BATMAN: ARKHAM KNIGHT
BATTLEFIELD 1943
BATTLEFIELD 1
BATTLEFIELD 3
BATTLEFIELD 4
BATTLEFIELD: BAD COMPANY
BATTLEFIELD: BAD COMPANY 2
BATTLEFIELD HARDLINE
BATTLEFIELD V
BATTLETOADS
BEACON PINES
BEJEWELED 2
BEJEWELED 3
BEN 10: POWER TRIP
BESIEGE 
BLACK
BLAZBLUE: CROSS TAG BATTLE SPECIAL EDITION
BLEEDING EDGE
BLINX: THE TIME SWEEPER
BRAMBLE: THE MOUNTAIN KING
BROKEN AGE
BRÜTAL LEGEND
BURNOUT PARADISE REMASTERED
CAR MECHANIC SIMULATOR 2021
CASSETTE BEASTS
CELESTE
CITIES: SKYLINES – REMASTERED
CHAINED ECHO