In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import lightgbm as lgb
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [2]:
r = requests.get('https://api.hearthstonejson.com/v1/44582/enUS/cards.collectible.json')

data = pd.DataFrame(r.json())

data.set_index("name", inplace = True)

### Columns to drop

- id:
- armor:
- flavor:
- collectible
- faction
- artist
- collectionText
- battlegroundsPremiumDbfId
- elite (if true can only have one copy in deck -> same as legendary

In [3]:
col_drop = ['id', 'armor', 'flavor', 'collectible',
            'faction', 'artist', 'dbfId', 'collectionText',
            'battlegroundsPremiumDbfId', 'howToEarn', 'questReward',
           'howToEarnGolden', 'hideStats', 'targetingArrowText', "elite"]
data.drop(columns = col_drop, inplace = True)



In [7]:
# clean card text of html artifacts
data.loc[:, "text"] = data.loc[:, "text"].str.replace("<b>|</b>|<i>" ,"", regex = True)
data.loc[:, "text"] = data.loc[:, "text"].str.replace("\[x\]" ,"")
data.loc[:, "text"]= data.loc[:, "text"].str.replace("\\n" ," ", regex = True)


In [30]:
data.mechanics.str.get(2).isna()

2497

In [58]:
# create iterable which store unique list of all mechanics
unique_mechanics = set(data.mechanics.explode().str.cat(sep=',').split(","))

a = pd.DataFrame([])

for i in unique_mechanics:
    
    for j in range(5):
        
        data[f"mechanic_{i}"] = data.mechanics.str.get(j).isin([f'{i}']).astype(int)

In [69]:
data.mechanics.str.get(1).isin(['TAUNT']).astype(int)

name
Flame Lance           0
Effigy                0
Fallen Hero           0
Arcane Blast          0
Polymorph: Boar       0
                     ..
Sky Gen'ral Kragg     1
Steel Beetle          0
Eye of the Storm      0
The Fist of Ra-den    0
Scalelord             0
Name: mechanics, Length: 2537, dtype: int64

In [103]:
test = data.mechanics.explode()
test1 = test.reset_index()
test1.pivot(columns = 'mechanics', index = 'name', values = 'mechanics')


mechanics,NaN,ADJACENT_BUFF,AFFECTED_BY_SPELL_POWER,AURA,BATTLECRY,CANT_ATTACK,CANT_BE_TARGETED_BY_HERO_POWERS,CANT_BE_TARGETED_BY_SPELLS,CHARGE,CHOOSE_ONE,...,SIDEQUEST,SILENCE,SPELLPOWER,START_OF_GAME,STEALTH,TAUNT,TOPDECK,TRIGGER_VISUAL,TWINSPELL,WINDFURY
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A Light in the Darkness,,,,,,,,,,,...,,,,,,,,,,
A New Challenger...,,,,,,,,,,,...,,,,,,,,,,
Aberrant Berserker,,,,,,,,,,,...,,,,,,,,,,
Abominable Bowman,,,,,,,,,,,...,,,,,,,,,,
Abomination,,,,,,,,,,,...,,,,,,TAUNT,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zombie Chow,,,,,,,,,,,...,,,,,,,,,,
Zoobot,,,,,BATTLECRY,,,,,,...,,,,,,,,,,
Zul'Drak Ritualist,,,,,BATTLECRY,,,,,,...,,,,,,TAUNT,,,,
Zul'jin,,,,,BATTLECRY,,,,,,...,,,,,,,,,,


# Hs_Replay Data

In [None]:
hs_replay = pd.read_csv("hs_replay.csv") # read in scraped data

In [None]:
# add types to data
b = ["in decks", "copies", "deck wr", "times played", "per of played cards", "played wr"]
hs_replay["type"] = b * 1384

In [None]:
# add names of the cards to the data
with open("test.html") as fp:
    soup = BeautifulSoup(fp)

names = []
for i in range(len(soup.find_all("figcaption"))):
    
    names.append(soup.find_all("figcaption")[i].string)
hs_replay["names"] = [val for val in names for _ in range(0,6)]


In [None]:
# bring data into form as on the website
hs_replay_data = hs_replay.pivot(index = "names", columns = "type", values = "propertyName1")

In [None]:
# join two datasets
test = data.join(hs_replay_data["in decks"], how = "inner")

# create binary feature
test["in decks"] = test["in decks"].str.replace("%", '').astype("float")
test["y"] = np.where(test["in decks"] > 1, 1, 0)

test["leg_dum"] = np.where(test.rarity == "LEGENDARY", 1 ,0)

# Feature engeneering from card text

In [None]:
test["cond"] = np.where(test.text.str.contains('If') == True , 1, 0)
test["summon"] = np.where(test.text.str.contains('Summon') == True , 1, 0)

In [None]:
test.text.dropna().loc[test.text.str.contains("Costs \(\d\) less", regex = True).dropna()]

In [None]:
feat = ['cardClass', "cost", "rarity", "type", "attack", "health", "set" ,"race", "durability", "overload", "spellDamage", "leg_dum"]
for i in feat:
    test[i] = test[i].astype("category")
feat.append("cond")
feat.append("summon")



# Lightgbm

In [None]:
hallo = lgb.LGBMClassifier(n_estimators = 200)
hallo.fit(test[feat], test.y)
hallo.predict(test[feat])

In [None]:
accuracy_score(test.y, hallo.predict(test[feat]))


In [None]:
confusion_matrix(test.y, hallo.predict(test[feat]))


In [None]:
X_train, X_test, y_train, y_test = train_test_split(test[feat], test.y)

In [None]:
hallo = lgb.LGBMClassifier(n_estimators = 1000, learning_rate = 0.1, num_leaves = 15)
hallo.fit(X_train, y_train)

accuracy_score(y_test, hallo.predict(X_test))


In [None]:
confusion_matrix(y_test, hallo.predict(X_test))

In [None]:
lgb.plot_importance(hallo, max_num_features = None)

In [None]:
X_train.type

In [None]:
test.loc["Abomination", "mechanics"] = test.loc["Abomination", "mechanics"][1]

In [None]:
test.loc["Abomination", "mechanics"]

In [None]:
data.columns.to_list()

In [None]:
data.head()

In [None]:
pd.get_dummies(data[['cardClass','rarity', 'set','type','cost','attack','health']], drop_first = True)

In [None]:
a = np.ones(5)
a

In [None]:
b = np.ones((5, 3))
b.T

In [None]:
test

In [None]:
data

In [None]:
cat_columns = test.select_dtypes(['category']).columns
test[cat_columns] = test[cat_columns].apply(lambda x: x.cat.codes)

In [None]:
test

In [None]:
test.dtypes

# UMAP

In [None]:
import umap

In [None]:
test[cat_columns]

In [None]:
mapper = umap.UMAP(n_neighbors = 30, min_dist=0.1).fit(test[cat_columns])

In [None]:
import umap.plot

In [None]:
umap.plot.points(mapper, values = test.y, theme='darkblue')

In [None]:
hover_data = pd.DataFrame({'index':test.index,
                           'label':test.y})
hover_data['item'] = np.where(hover_data.label == 0, "no" , "yes")

In [None]:
umap.plot.output_notebook()

In [None]:
p = umap.plot.interactive(mapper, labels = test.y.values, hover_data=hover_data.loc[:, ["index", "label", "item"]], point_size=2)
umap.plot.show(p)