In [1]:
import requests
import json
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('max_colwidth',None)

## API: Informações das cartas

In [3]:
def basic_cards_information():
    
    base_url = 'https://api.godsunchained.com'
    version = 'v0'
    proto = 'proto?page=0'
    url = "/".join(i for i in [base_url, version, proto])
    
    try:
        req = requests.get(url)
    except Exception as erro:
        print('DEU ERRO:\t', erro)
    
    cards = json.loads(req.text)['records']
    informacoes = ['id','name','effect','god','rarity',
                   ('tribe','String'), 'mana',('attack','Int64'),
                   ('health','Int64'), 'type', 'set']
    
    resposta = []
    for card in cards:
        atributos = []
        for inf in informacoes:
            if not isinstance(inf,str):
                atributos.append(card[inf[0]][inf[1]])
                continue
            atributos.append(card[inf])
        resposta.append(atributos)
        
            
    df = pd.DataFrame(data=resposta, columns=['id','name','effect','god',
                                                'rarity','tribe', 'mana',
                                                'attack','health', 'type', 'set'])
    return df

## Web scrapping: Preços das cartas

In [4]:
# Coloque aqui a cotacao atual do ETH e GODS, em dolares.

def transformPrices(df):
    cotacao_ETH = 1472.73
    cotacao_GODS = 0.330516
    df['price_ETH_$'] = df['price_ETH_$'].apply(lambda x: float(x)*cotacao_ETH)
    df['price_GODS_$'] = df['price_GODS_$'].apply(lambda x: float(x)*cotacao_GODS)

    return df

In [5]:
def diferenca_entre_ETH_e_GODS(df):
    lista = []
    for e, g in zip(df['price_ETH_$'], df['price_GODS_$']):
        lista.append(abs(e-g)/min(e,g))

    df['difference_%'] = lista
    return df

In [6]:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from time import sleep


def takePrices():
    
    for i in ['ETH', 'GODS']:
        try:
            options = Options()
            options.page_load_strategy = 'normal' # normal, eager, none # VELOCIDADE DE CARREGAMENTO
            #options.add_argument('--headless')
            driver = webdriver.Chrome(options=options)
            #driver.set_window_size(800,1024) #TAMANHO DA TELA
            driver.get(f"https://cardsunchained.com/?days=14&il=1&is=0&curr={i}")
            driver.execute_script("window.scrollTo(0, 100000)")
        except Exception as erro:
            print(f"DEU ERRO: {erro}")

        sleep(5)
        site = BeautifulSoup(driver.page_source, 'html.parser')
        cartas = site.findAll('tr', attrs={'class': 'row'})
        #print(cartas[0].prettify())



        if i == 'ETH':
            names = []
            Trends = []
            Sold_per_weeks = []
            Average_variations = []
            Lowest_prices = []
            for carta in cartas:
                nome = carta.find('th', attrs={'class': 'name'}).text
                tendencia = carta.find('span', attrs={'class': 'js-sorter-rawdata'}).text.strip('│')
                tendencia = int(tendencia) if tendencia != None else None
                vendidos = carta.find('td', attrs={'class': 'sold'}).text
                try:
                    vendidos = int(vendidos)
                except:
                    vendidos = None
                var_preco = carta.find('td', attrs={'class': 'stat'}).find('span', attrs={'class': 'js-sorter-rawdata'})
                var_preco = float(var_preco.text.strip('│'))/100 if var_preco != None else None
                preco_baixo = carta.find('td', attrs={'class': 'price'}).text
                preco_baixo = preco_baixo if preco_baixo != None else None

                names.append(nome)
                Trends.append(tendencia)
                Sold_per_weeks.append(vendidos)
                Average_variations.append(var_preco)
                Lowest_prices.append(preco_baixo)
            
            df_ETH = pd.DataFrame(data={'name': names, 'Tendencia': Trends, 'Vendidos por semana': Sold_per_weeks,
                                           'Variação do preço %': Average_variations, 'price_ETH_$': Lowest_prices})
        else:
            names = []
            Lowest_prices = []
            for carta in cartas:
                nome = carta.find('th', attrs={'class': 'name'}).text
                preco_baixo = carta.find('td', attrs={'class': 'price'}).text
                preco_baixo = preco_baixo if preco_baixo != None else None

                names.append(nome)
                Lowest_prices.append(preco_baixo)
            
            df_GODS= pd.DataFrame(data={'name': names, 'price_GODS_$': Lowest_prices})

    df_geral = pd.merge(df_ETH, df_GODS.loc[:, ['name', 'price_GODS_$']], on='name').dropna(axis=0)
    return diferenca_entre_ETH_e_GODS(transformPrices(df_geral))

## Web scrapping:

In [7]:
def web_scrapping():

    import requests
    from bs4 import BeautifulSoup
    import pandas as pd
    import numpy as np
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.common.by import By
    from time import sleep

    options = Options()
    options.add_argument('window-size=800,800')

    navegador = webdriver.Chrome()
    navegador.get('https://gudecks.com/meta/card-rankings?timeFrame=30&userRank=2')

    sleep(10)

    but = navegador.find_element(By.XPATH, '//*[@id="root"]/div[3]/button')
    but.click()

    sleep(0.5)

    # Alterando o numero de matchs necessario para selecionar as cartas 
    #but_2 = navegador.find_element(By.XPATH, '//*[@id="root"]/div[1]/div/div[2]/div[1]/div[1]/div[7]/div[2]/div/div/div')
    #but_2.click()
    #navegador.find_element(By.XPATH, '//*[@id="rrs-decksWithCard-menu"]/li[3]').click() # li[1] ou li[2]...

    informations = [] # Todas as paginas
    for i in range(69):
        informations.append(BeautifulSoup(navegador.page_source, 'html.parser'))
        but = navegador.find_element(By.XPATH, '//*[@id="root"]/div[1]/div/div[2]/div[1]/div[2]/div[2]/div/div[3]/button')
        but.click()
        sleep(0.025)

    all_cards = [] 
    for site in informations:
        cards = site.findAll('div', attrs={'class':'rt-tr-group'})
        for card in cards:
            little_list = []
            try:
                little_list.append(card.find('div', attrs={'class':'card-ranking-name'}).text) # Nome da carta
            except:
                continue
            little_list.extend([atr.text for atr in card.findAll('div', attrs={'class':'rt-td react-table-cell'})]) #Info das cartas

            all_cards.append(little_list)

    colunas = ['Name', 'God', 'Set', 'Matches', 'In % of Decks', 'Copies', 'Deck win rate', 'Unique Deck WR','Est. Price']
    df_web_scrap = pd.DataFrame(all_cards, columns=colunas)
    return df_web_scrap




## Coletando os dados

In [10]:
taked = False
def data_colect():
    global taked
    # Informações das cartas
    if not taked:
        df = basic_cards_information()
        print("Informações basicas foram pegas")
    
    # Web scrapping: Mais informações das cartas
    if not taked:
        df_web_scrap = web_scrapping()
        print("Dados estatisticos das cartas foram pegos")
    
    # Informações economicas das cartas
    if not taked:
        df_precos = takePrices()
        print("Preços das cartas foram pegos")
    
    taked = True
    # Taking info about the data_sets
    info = {}
    dados = [df, df_web_scrap, df_precos]
    nomes = ["basic", "web scrapping", "pricing"]
    for i, j in zip(dados, nomes):
        col_names = list(i.columns)
        info[j] = [i.shape] + col_names
    
    size_max = 0
    for key in info:
        if len(info[key]) > size_max:
            size_max = len(info[key])
    for key in info:
        while len(info[key]) < size_max:
            info[key].append(None)
    
    # Saving and seeing
    df_info = pd.DataFrame(info, index=["Shape"] + ["column" for i in range(size_max-1)])
    for n, i in enumerate([df_info, df, df_web_scrap, df_precos]):
        if n == 0:
            print("Info")
            display(i)
        else:
            print(nomes[n-1], i.shape, f"\nDados nulos \n{i.isnull().sum()}")
            display(i.head(3))
        
    return df, df_web_scrap, df_precos, df_info

In [11]:
df, df_web_scrap, df_precos, df_info = data_colect()

Informações basicas foram pegas
Dados estatisticos das cartas foram pegos
Preços das cartas foram pegos
Info


Unnamed: 0,basic,web scrapping,pricing
Shape,"(1498, 11)","(1340, 9)","(1263, 7)"
column,id,Name,name
column,name,God,Tendencia
column,effect,Set,Vendidos por semana
column,god,Matches,Variação do preço %
column,rarity,In % of Decks,price_ETH_$
column,tribe,Copies,price_GODS_$
column,mana,Deck win rate,difference_%
column,attack,Unique Deck WR,
column,health,Est. Price,


basic (1498, 11) 
Dados nulos 
id        0
name      0
effect    0
god       0
rarity    0
tribe     0
mana      0
attack    0
health    0
type      0
set       0
dtype: int64


Unnamed: 0,id,name,effect,god,rarity,tribe,mana,attack,health,type,set
0,12,Sanctify,Give ward to all creatures with strength 2 or less.<br>Draw a card.,light,common,,2,0,0,spell,genesis
1,13,Charm,"Gain control of target enemy creature for one turn. Give it godblitz. Look at your opponent's hand, select any card and pull it into your hand.",deception,rare,,8,0,0,spell,genesis
2,46,Untold Greed,Destroy target friendly creature.<br>Draw two cards.,death,rare,,1,0,0,spell,genesis


web scrapping (1340, 9) 
Dados nulos 
Name              0
God               0
Set               0
Matches           0
In % of Decks     0
Copies            0
Deck win rate     0
Unique Deck WR    0
Est. Price        0
dtype: int64


Unnamed: 0,Name,God,Set,Matches,In % of Decks,Copies,Deck win rate,Unique Deck WR,Est. Price
0,Highborn Knight,Light,Genesis,16.971,6.8%,1.82,56.1%,56.2%,0.07083
1,Reflection Elementalist,Neutral,Genesis,40.397,2.0%,1.65,55.6%,55.9%,0.01566
2,Holy Writ,Light,Genesis,49.728,20.0%,1.4,55.3%,55.5%,0.00134


pricing (1263, 7) 
Dados nulos 
name                   0
Tendencia              0
Vendidos por semana    0
Variação do preço %    0
price_ETH_$            0
price_GODS_$           0
difference_%           0
dtype: int64


Unnamed: 0,name,Tendencia,Vendidos por semana,Variação do preço %,price_ETH_$,price_GODS_$,difference_%
0,Guild Enforcer,0,961.0,-0.002084,0.59351,0.614198,0.034857
3,Marsh Walker,0,570.0,0.142148,0.594983,0.61585,0.035073
4,Black Jaguar,0,800.0,-0.088675,0.156109,0.161027,0.031504


## Organizando os dados

In [12]:
def organizando_web(df_web):
    
    df = df_web.drop(labels=['God','Set','Est. Price', 'Matches', 'Copies'], axis=1)
    
    for column in df:
        df[column] = df[column].apply(lambda x: ''.join([i for i in x if i != '%']))

    for i in ['In % of Decks', 'Deck win rate', 'Unique Deck WR']:
        df[i] = df[i].apply(lambda x: float(x)/100)
    
    df.rename({'Name': 'name'}, axis=1, inplace=True)
    return df

In [13]:
from sklearn.preprocessing import StandardScaler

def organizando_precos(df):
    df['Tendencia'] = df['Tendencia'].apply(lambda x: None if x <= -1000 else x)
    df = df.dropna(axis=0)
    
    df['Vendidos por semana'] = df['Vendidos por semana'].apply(lambda x: int(x))
    df['Tendencia'] = df['Tendencia'].apply(lambda x: int(x))

    return df

In [14]:
def small_cleaning():
    global df, df_web_scrap, df_precos

    # Organizando os dados do web scrapping
    df_web_scrap_2 = organizando_web(df_web_scrap)
    
    
    # Organizando os preços
    df_precos_2 = organizando_precos(df_precos)
    
    
    # Taking info about the data_sets
    info = {}
    dados = [df, df_web_scrap_2, df_precos_2]
    nomes = ["basic", "web scrapping", "pricing"]
    for i, j in zip(dados, nomes):
        col_names = list(i.columns)
        info[j] = [i.shape] + col_names
    
    size_max = 0
    for key in info:
        if len(info[key]) > size_max:
            size_max = len(info[key])
    for key in info:
        while len(info[key]) < size_max:
            info[key].append(None)
    
    # Saving and seeing
    df_info_2 = pd.DataFrame(info, index=["Shape"] + ["column" for i in range(size_max-1)])
    for n, i in enumerate([df_info_2, df, df_web_scrap_2, df_precos_2]):
        if n == 0:
            print("Info")
            display(i)
        else:
            print(nomes[n-1], i.shape, f"\nDados nulos \n{i.isnull().sum()}")
            display(i.head(3))
    
        
    return df, df_web_scrap_2, df_precos_2, df_info_2

In [15]:
df, df_web_scrap_2, df_precos_2, df_info_2 = small_cleaning() 

Info


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Vendidos por semana'] = df['Vendidos por semana'].apply(lambda x: int(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Tendencia'] = df['Tendencia'].apply(lambda x: int(x))


Unnamed: 0,basic,web scrapping,pricing
Shape,"(1498, 11)","(1340, 4)","(1257, 7)"
column,id,name,name
column,name,In % of Decks,Tendencia
column,effect,Deck win rate,Vendidos por semana
column,god,Unique Deck WR,Variação do preço %
column,rarity,,price_ETH_$
column,tribe,,price_GODS_$
column,mana,,difference_%
column,attack,,
column,health,,


basic (1498, 11) 
Dados nulos 
id        0
name      0
effect    0
god       0
rarity    0
tribe     0
mana      0
attack    0
health    0
type      0
set       0
dtype: int64


Unnamed: 0,id,name,effect,god,rarity,tribe,mana,attack,health,type,set
0,12,Sanctify,Give ward to all creatures with strength 2 or less.<br>Draw a card.,light,common,,2,0,0,spell,genesis
1,13,Charm,"Gain control of target enemy creature for one turn. Give it godblitz. Look at your opponent's hand, select any card and pull it into your hand.",deception,rare,,8,0,0,spell,genesis
2,46,Untold Greed,Destroy target friendly creature.<br>Draw two cards.,death,rare,,1,0,0,spell,genesis


web scrapping (1340, 4) 
Dados nulos 
name              0
In % of Decks     0
Deck win rate     0
Unique Deck WR    0
dtype: int64


Unnamed: 0,name,In % of Decks,Deck win rate,Unique Deck WR
0,Highborn Knight,0.068,0.561,0.562
1,Reflection Elementalist,0.02,0.556,0.559
2,Holy Writ,0.2,0.553,0.555


pricing (1257, 7) 
Dados nulos 
name                   0
Tendencia              0
Vendidos por semana    0
Variação do preço %    0
price_ETH_$            0
price_GODS_$           0
difference_%           0
dtype: int64


Unnamed: 0,name,Tendencia,Vendidos por semana,Variação do preço %,price_ETH_$,price_GODS_$,difference_%
0,Guild Enforcer,0,961,-0.002084,0.59351,0.614198,0.034857
3,Marsh Walker,0,570,0.142148,0.594983,0.61585,0.035073
4,Black Jaguar,0,800,-0.088675,0.156109,0.161027,0.031504


## Juntando tudo

In [16]:
def juntando_banco_de_dados(*df):
    lista = list(df)
    df_all = pd.merge(lista[0], lista[1], on='name')
    df_all = pd.merge(df_all, lista[2], on="name")
        
    print("Todos os dados juntos", df_all.shape, f"\nDados nulos \n{df_all.isnull().sum()}")
    #display(df_all.head(1))
    return df_all

In [17]:
df_all = juntando_banco_de_dados(df, df_web_scrap_2, df_precos_2)

Todos os dados juntos (1227, 20) 
Dados nulos 
id                     0
name                   0
effect                 0
god                    0
rarity                 0
tribe                  0
mana                   0
attack                 0
health                 0
type                   0
set                    0
In % of Decks          0
Deck win rate          0
Unique Deck WR         0
Tendencia              0
Vendidos por semana    0
Variação do preço %    0
price_ETH_$            0
price_GODS_$           0
difference_%           0
dtype: int64


## Salvando os dados

In [18]:
import datetime
data = datetime.date.today()
df_all.to_csv(f'dados/dataset_{data}.csv', index=True)
print(f"Salvamento concluído na data {data}")

Salvamento concluído na data 2022-08-28
