In [1]:
import numpy as np
import pandas as pd
import bs4
import requests
import json

page = requests.get('https://www.smogon.com/dex/sm/pokemon/')
soup = bs4.BeautifulSoup(page.text)
script = soup.script.string.strip()[14:]
j = json.loads(script)
pokemon = pd.DataFrame(j['injectRpcs'][1][1]['pokemon'])

In [2]:
#Split lists in a column into separate rows.
def split_df(df, column):
    lst = []
    def split_alts(row):
        split_row = row[column]
        for s in split_row:
            new_row = row.to_dict()
            new_row[column] = s
            lst.append(new_row)
    df.apply(split_alts, axis=1, args=(lst))
    df_alt = pd.DataFrame(lst)
    df = pd.concat([df_alt.drop([column], axis=1), df_alt[column].apply(pd.Series)], axis=1)
    return df

#Remove elements from list in each row.
def remove_all(to_exclude, column, df):
    def remove(row):
        for e in to_exclude:
            if e in row:
                row.remove(e)
        return row
    df[column] = df[column].apply(remove)
    
    mapping = df[column].map(lambda x: len(x) != 0)
    df = df[mapping]
    return df.reset_index(drop=True)

#Extend lists in each row.
def extend_all(order, column, df):
    def extend(row):
        lst = []
        for i in row:
            if i in order:
                index = order.index(i)
                lst.extend(order[index:])
        if lst:
            return list(set(lst))
        return lst
    
    df[column] = df[column].apply(extend)
    return df

In [3]:
col_order = ['name', 'suffix', 'types', 'hp', 'atk', 'def', 'spa', 'spd', 'spe', 'abilities', 'formats']
order = ['Untiered', 'PU', 'PUBL', 'NU', 'NUBL', 'RU', 'RUBL', 'UU', 'UUBL', 'OU', 'Uber', 'AG']
tiers_to_exclude = ['CAP', 'LC', 'LGPE OU', 'Limbo', 'Untiered', 'PUBL', 'NUBL', 'RUBL', 'UUBL']

df = split_df(pokemon, 'alts')
df = df[col_order]
df = extend_all(order, 'formats', df)
df = remove_all(tiers_to_exclude, 'formats', df)

In [4]:
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

mlb_x = MultiLabelBinarizer()
labels = mlb_x.fit_transform(df.types + df.abilities)
stats = df[['hp','atk','def','spa','spd','spe']].to_numpy()
X = np.hstack([stats, labels])

mlb_y = MultiLabelBinarizer()
y = mlb_y.fit_transform(df.formats)

model = Sequential()
model.add(BatchNormalization())
model.add(Dense(14, activation='relu'))
model.add(Dense(7, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model.fit(X, y, epochs=50, batch_size=8, validation_split=0.2, verbose=0)

#Predict the probability of being playable in a given tier.
def predict_tier(stats, types, abilities):
    labels = mlb_x.transform([types + abilities])
    stats = np.array([stats])
    X = np.hstack([stats, labels])
    y = model.predict(X)
    pred = pd.DataFrame(zip(mlb_y.classes_, y[0]), columns=['tier', 'probability'])
    return pred

#Compare prediction after a pokemon is changed. 
def compare(old_stat, old_type, old_ability, new_stat=None, new_type=None, new_ability=None):
    if not new_stat:
        new_stat = old_stat
    if not new_type:
        new_type = old_type
    if not new_ability:
        new_ability = old_ability

    df_1 = predict_tier(old_stat, old_type, old_ability)
    df_2 = predict_tier(new_stat, new_type, new_ability)
    return pd.merge(df_1, df_2, on='tier', suffixes=('_old', '_new'))

In [5]:
stat_test = [60, 60, 60, 105, 105, 105]
type_test = ['Ghost']
ability_test = ['Levitate']

predict_tier(stat_test, type_test, ability_test)

Unnamed: 0,tier,probability
0,AG,0.999998
1,NU,0.541708
2,OU,0.995791
3,PU,0.28898
4,RU,0.710819
5,UU,0.93896
6,Uber,0.998997
