# 03 - Recommender System

In [1]:
# imports
import pandas as pd
import numpy as np
import time

from scipy import sparse # cut down on memory size
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler

pd.options.display.max_columns = 35

___

In [2]:
# read in the data
df = pd.read_csv('../Data/cards_cleaned.csv')
df.head()

Unnamed: 0,name,layout,colors,color_identity,mana_cost,cmc,type_line,card_type,super_type,sub_type,oracle_text,oracle_text_token,legalities,rarity,power,toughness,loyalty,card_faces,activated_ability,triggered_ability,oracle_text_back,oracle_text_back_token,colors_back,power_back,toughness_back,loyalty_back,card_type_back,super_type_back,sub_type_back,mana_cost_back
0,Static Orb,normal,[],[],{3},3.0,Artifact,Artifact,NONE,NONE,"As long as Static Orb is untapped, players can...",as long as static orb is untapped players can'...,legacy vintage commander duel,rare,NONE,NONE,NONE,NONE,0.0,0.0,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE
1,Sensory Deprivation,normal,['U'],['U'],{U},1.0,Enchantment — Aura,Enchantment,NONE,Aura,Enchant creature Enchanted creature gets -3/-0.,enchant creature enchanted creature gets -3/-0,pioneer modern legacy pauper vintage penny com...,common,NONE,NONE,NONE,NONE,0.0,0.0,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE
2,Road of Return,normal,['G'],['G'],{G}{G},2.0,Sorcery,Sorcery,NONE,NONE,Choose one — • Return target permanent card fr...,choose one return target permanent card from y...,legacy vintage commander duel,rare,NONE,NONE,NONE,NONE,0.0,0.0,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE
3,Storm Crow,normal,['U'],['U'],{1}{U},2.0,Creature — Bird,Creature,NONE,Bird,Flying (This creature can't be blocked except ...,flying this creature can't be blocked except b...,modern legacy pauper vintage penny commander duel,common,1,2,NONE,NONE,0.0,0.0,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE
4,Walking Sponge,normal,['U'],['U'],{1}{U},2.0,Creature — Sponge,Creature,NONE,Sponge,{T}: Target creature loses your choice of flyi...,{t} target creature loses your choice of flyin...,legacy vintage commander duel,uncommon,1,1,NONE,NONE,1.0,0.0,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE


In [3]:
df.shape

(18108, 30)

___

## Oracle text recommender system
To start I want to build a recommender system that just looks at oracle text. In order to do that I need to combine both oracle_text and oracle_text_back into one single column

In [4]:
df['oracle_combined'] = df['oracle_text_token'] + " " + df['oracle_text_back_token']

In [5]:
# start by isolating the name of the card and it's oracle_text
oracle = df['oracle_combined']

# vectorize all our words
cvec = CountVectorizer(stop_words=['none'],
                      min_df=2,
                      max_df=.99,
                      ngram_range=(1,6),
                      token_pattern="[a-zA-Z{}+'0-9-/−]+") # we should use the same RegEx to keep certain characters together 

oracle_vec = cvec.fit_transform(oracle)

# convert to a dataframe so we can use this later on as well
converted_df = pd.DataFrame(oracle_vec.toarray(), columns=cvec.get_feature_names(), index=df['name'])

In [6]:
converted_df.shape

(18108, 130070)

This is a whole lot of features to examine so I'm going to use Variance Threshold to drop some n_gram columns that only exist in a small number of cards

In [7]:
selector = VarianceThreshold(.001)
new_array = selector.fit_transform(converted_df)

new_array.shape

(18108, 15256)

In [8]:
# get the list of columns kept
vt_list = converted_df.columns[selector.get_support()]

# update our converted_df with just the important columns
converted_df = converted_df[vt_list]

For the recommender system to work efficiently we need to convert the data back into a sparse matrix with the new names as the index

In [9]:
sparse_df = sparse.csr_matrix(converted_df)

In [10]:
# build the recommender system using cosine similarity
rec = pairwise_distances(sparse_df, metric='cosine')

In [11]:
rec.shape

(18108, 18108)

In [12]:
rec_df = pd.DataFrame(rec, index=converted_df.index, columns=converted_df.index)
rec_df.head()

name,Static Orb,Sensory Deprivation,Road of Return,Storm Crow,Walking Sponge,Ravnica at War,Torrent of Fire,Pteramander,Nantuko Elder,Vedalken Heretic,Waterknot,Ruthless Knave,"Hua Tuo, Honored Physician",Veil of Summer,Disposal Mummy,Marang River Prowler,Aura Graft,...,Trinket Mage,Skyshroud Blessing,"Omnath, Locus of the Roil",Harvest Hand // Scrounged Scythe,Polis Crusher,Test of Endurance,Venom Sliver,Borderland Ranger,Curse of Thirst,Temporary Truce,Clearwater Goblet,Quarry Beetle,Devoted Hero,Without Weakness,Firesong and Sunspeaker,"Samut, the Tested",Sinew Sliver
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
Static Orb,0.0,1.0,1.0,0.976967,1.0,0.916955,0.976224,1.0,1.0,1.0,0.897467,0.979617,0.982608,0.962356,1.0,0.771425,1.0,...,1.0,0.977805,0.987648,0.800691,0.983457,0.975825,0.980091,1.0,1.0,0.922746,1.0,1.0,1.0,0.985455,1.0,0.944326,1.0
Sensory Deprivation,1.0,0.0,1.0,0.939834,0.921311,1.0,1.0,0.952886,1.0,1.0,0.62503,0.946757,0.954569,1.0,1.0,1.0,0.962576,...,1.0,1.0,1.0,0.869842,0.956786,1.0,0.89599,1.0,0.943989,1.0,1.0,1.0,1.0,0.962006,0.947074,0.927285,1.0
Road of Return,1.0,1.0,0.0,1.0,0.925013,1.0,0.917141,0.910205,1.0,0.952435,0.979581,0.979705,0.79219,0.900045,0.869256,0.852062,0.928673,...,0.790645,0.96685,0.858557,0.909041,0.942347,0.855568,0.970265,0.800785,0.925274,0.974359,0.911157,0.692745,1.0,0.927588,0.949564,0.854482,1.0
Storm Crow,0.976967,0.939834,1.0,0.0,0.919516,1.0,1.0,0.927716,1.0,1.0,0.958908,0.959156,0.976766,0.907804,1.0,0.877859,1.0,...,0.980629,0.9407,0.991749,0.789214,0.96685,0.983852,0.946808,1.0,1.0,0.982799,0.981662,1.0,1.0,0.970855,0.9594,0.97211,0.962602
Walking Sponge,1.0,0.921311,0.925013,0.919516,0.0,1.0,0.937689,0.952731,0.906341,1.0,0.946257,0.928776,0.848066,0.791724,0.971323,0.960064,0.924906,...,0.949331,0.728552,0.913674,0.941962,0.956645,0.915522,0.930432,0.96116,0.88761,1.0,0.916058,0.960064,1.0,0.809408,0.911501,0.768979,1.0


In [13]:
# now to test the recommender system
rec_df['Shock'].sort_values()[0:11]

name
Shock                0.000000
Unyaro Bee Sting     0.000000
Tarfire              0.000000
Magma Jet            0.058487
Shock Troops         0.133975
Deadapult            0.133975
Moonglove Extract    0.133975
Blood Rites          0.149037
Goblin Test Pilot    0.149037
Seismic Assault      0.149037
Crackling Triton     0.149037
Name: Shock, dtype: float64

In [14]:
rec_df['Static Orb'].sort_values()[0:11]

name
Static Orb        0.000000
Winter Orb        0.099500
Damping Field     0.360979
Imi Statue        0.360979
Stoic Angel       0.442914
Castle Raptors    0.543250
Storage Matrix    0.544510
Kill Switch       0.578152
Giant Tortoise    0.594660
Watchdog          0.627065
Eyekite           0.628609
Name: Static Orb, dtype: float64

In [15]:
rec_df['Prized Amalgam'].sort_values()[0:11]

name
Prized Amalgam            0.000000
Bone Dragon               0.367289
Reassembling Skeleton     0.374905
Footsteps of the Goryo    0.378368
Chronosavant              0.408758
Scrapheap Scrounger       0.409055
Apprentice Necromancer    0.414345
Wake the Dead             0.418516
Despoiler of Souls        0.418516
Ghoulsteed                0.424205
Cauldron Dance            0.424953
Name: Prized Amalgam, dtype: float64

In [16]:
rec_df['Wrath of God'].sort_values()[0:11]

name
Wrath of God               0.000000
Damnation                  0.000000
Perish                     0.117647
Winds of Rath              0.153190
Shatterstorm               0.185908
Plague Wind                0.207882
Abu Ja'far                 0.223886
Retribution of the Meek    0.344064
Do or Die                  0.348305
Obliterate                 0.356079
Child of Alara             0.356404
Name: Wrath of God, dtype: float64

In [17]:
rec_df['Grizzly Bears'].sort_values()[0:11] # vanilla creature

name
Grizzly Bears                   0.0
Static Orb                      1.0
Doomgape                        1.0
Ghitu Chronicler                1.0
Aerial Volley                   1.0
Explosive Apparatus             1.0
Spontaneous Mutation            1.0
Rakdos Locket                   1.0
Voltaic Brawler                 1.0
Jousting Lance                  1.0
Tatsumasa, the Dragon's Fang    1.0
Name: Grizzly Bears, dtype: float64

This is a great start! Now I want to add the numerical features and see the results
___

## Adding numerical data to our features

In [18]:
df.dtypes

name                       object
layout                     object
colors                     object
color_identity             object
mana_cost                  object
cmc                       float64
type_line                  object
card_type                  object
super_type                 object
sub_type                   object
oracle_text                object
oracle_text_token          object
legalities                 object
rarity                     object
power                      object
toughness                  object
loyalty                    object
card_faces                 object
activated_ability         float64
triggered_ability         float64
oracle_text_back           object
oracle_text_back_token     object
colors_back                object
power_back                 object
toughness_back             object
loyalty_back               object
card_type_back             object
super_type_back            object
sub_type_back              object
mana_cost_back

In [19]:
# we already have a df for the oracle text so we can just use that one and add to it.
converted_df['cmc'] = df['cmc'].values
converted_df['activated_ability'] = df['activated_ability'].values
converted_df['triggered_ability'] = df['triggered_ability'].values

# convert to a sparse matrix
sparse_df = sparse.csr_matrix(converted_df)

# build the recommender system using cosine similarity
rec = pairwise_distances(sparse_df, metric='cosine')

# turn into a dataframe for interpretability
rec_df = pd.DataFrame(rec, index=converted_df.index, columns=converted_df.index)
rec_df.head()

name,Static Orb,Sensory Deprivation,Road of Return,Storm Crow,Walking Sponge,Ravnica at War,Torrent of Fire,Pteramander,Nantuko Elder,Vedalken Heretic,Waterknot,Ruthless Knave,"Hua Tuo, Honored Physician",Veil of Summer,Disposal Mummy,Marang River Prowler,Aura Graft,...,Trinket Mage,Skyshroud Blessing,"Omnath, Locus of the Roil",Harvest Hand // Scrounged Scythe,Polis Crusher,Test of Endurance,Venom Sliver,Borderland Ranger,Curse of Thirst,Temporary Truce,Clearwater Goblet,Quarry Beetle,Devoted Hero,Without Weakness,Firesong and Sunspeaker,"Samut, the Tested",Sinew Sliver
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
Static Orb,0.0,0.885292,0.911516,0.863296,0.851569,0.539805,0.720115,0.953171,0.595071,0.844143,0.763221,0.831784,0.854321,0.934378,0.774718,0.644097,0.856491,...,0.889318,0.867995,0.864715,0.706802,0.82426,0.758095,0.880962,0.887022,0.757875,0.8049,0.831684,0.746309,0.513336,0.91239,0.734547,0.814129,0.748688
Sensory Deprivation,0.885292,0.0,0.957145,0.886499,0.856223,0.794262,0.872918,0.931959,0.803884,0.924515,0.582248,0.877794,0.894167,0.984109,0.890891,0.91835,0.895743,...,0.946394,0.9452,0.939519,0.817426,0.882149,0.891852,0.85175,0.945282,0.835827,0.937006,0.918481,0.877132,0.764298,0.927261,0.82858,0.864968,0.878284
Road of Return,0.911516,0.957145,0.0,0.956223,0.875228,0.841296,0.833349,0.895027,0.848718,0.8981,0.924177,0.924585,0.755084,0.889676,0.803614,0.800552,0.879366,...,0.758787,0.926024,0.819214,0.866988,0.886364,0.79144,0.933291,0.767853,0.846222,0.927111,0.855369,0.649317,0.818182,0.901807,0.859504,0.811206,0.906109
Storm Crow,0.863296,0.886499,0.956223,0.0,0.853131,0.789837,0.870185,0.907327,0.799666,0.922891,0.88704,0.887649,0.913512,0.894487,0.888544,0.805387,0.929,...,0.926989,0.888043,0.930495,0.740971,0.889646,0.875717,0.899041,0.944106,0.880212,0.919564,0.900073,0.874489,0.759228,0.934984,0.835837,0.905169,0.844583
Walking Sponge,0.851569,0.856223,0.875228,0.853131,0.0,0.733777,0.786224,0.911955,0.661636,0.902323,0.856908,0.826053,0.767189,0.78409,0.835283,0.859128,0.842607,...,0.884391,0.680903,0.843475,0.86875,0.847501,0.790087,0.87211,0.893794,0.757213,0.918486,0.820675,0.809211,0.695003,0.76469,0.76432,0.694222,0.8425


In [20]:
# now to test the recommender system
rec_df['Shock'].sort_values()[0:11]

name
Shock                0.000000e+00
Tarfire              4.440892e-16
Magma Jet            6.580127e-02
Unyaro Bee Sting     1.237505e-01
Arc Trail            1.691324e-01
Moonglove Extract    1.699426e-01
Deadapult            1.699426e-01
Seal of Fire         1.708438e-01
Ember Hauler         1.711375e-01
Orcish Vandal        1.711375e-01
Crackling Triton     1.806535e-01
Name: Shock, dtype: float64

In [21]:
rec_df['Static Orb'].sort_values()[0:11]

name
Static Orb           0.000000
Winter Orb           0.095126
Imi Statue           0.264233
Damping Field        0.264233
Stoic Angel          0.315962
Castle Raptors       0.371254
Wardscale Dragon     0.423140
Storage Matrix       0.453183
Mungha Wurm          0.454499
Scoria Cat           0.454827
Rampaging Cyclops    0.455752
Name: Static Orb, dtype: float64

In [22]:
rec_df['Prized Amalgam'].sort_values()[0:11]

name
Prized Amalgam            0.000000
Footsteps of the Goryo    0.352702
Reassembling Skeleton     0.362452
Bone Dragon               0.368833
Scrapheap Scrounger       0.392484
Cauldron Dance            0.397659
Apprentice Necromancer    0.397741
Wake the Dead             0.398827
Despoiler of Souls        0.401777
Stitchwing Skaab          0.424360
Skyfire Phoenix           0.425799
Name: Prized Amalgam, dtype: float64

In [23]:
rec_df['Wrath of God'].sort_values()[0:11]

name
Damnation          0.000000
Wrath of God       0.000000
Winds of Rath      0.076011
Perish             0.078235
Shatterstorm       0.093307
Plague Wind        0.116612
Obliterate         0.167576
Catastrophe        0.182008
Day of Judgment    0.183503
Child of Alara     0.187596
Mageta the Lion    0.225403
Name: Wrath of God, dtype: float64

In [24]:
rec_df['Jace, the Mind Sculptor'].sort_values()[0:11]

name
Jace, the Mind Sculptor    0.000000
Coral Fighters             0.374088
Voyage's End               0.389159
Brainstorm                 0.413580
Riverwise Augur            0.415442
Select for Inspection      0.432234
Eye Spy                    0.434878
Anchor to the Aether       0.438873
Precognition               0.443417
Dream Cache                0.450290
Cavalier of Gales          0.453835
Name: Jace, the Mind Sculptor, dtype: float64

In [25]:
rec_df['Grizzly Bears'].sort_values()[0:11] # vanilla creature

name
Ancient Carp                          0.0
Falkenrath Reaver                     0.0
Field Creeper                         0.0
Norwood Ranger                        0.0
Hollowhenge Beast                     0.0
Rotted Hystrix                        0.0
Tolarian Scholar                      0.0
Pearled Unicorn                       0.0
Fusion Elemental                      0.0
Garenbrig Carver // Shield's Might    0.0
Incurable Ogre                        0.0
Name: Grizzly Bears, dtype: float64

___

## Non-numerical data
Now I have to convert the non-numerical data into numerical data

### Power / Toughness / Loyalty

Through some outside reasearch on ScryFall and how other sources interpret * s in power and toughness, I'm going to impute the * s as 0 and * +1s as 1 for both power and toughness. Same is true for loyalty

In [26]:
# power
zero_power_index = df.loc[(df['power'] == '*')].index
zero_power_back_index = df.loc[(df['power_back'] == '*')].index
df.loc[zero_power_index, 'power'] = 0
df.loc[zero_power_back_index, 'power_back'] = 0

one_power_index = df.loc[(df['power'] == '1+*')].index
df.loc[one_power_index, 'power'] = 1

# toughness
zero_toughness_index = df.loc[(df['toughness'] == '*')].index
zero_toughness_back_index = df.loc[(df['toughness_back'] == '*')].index
df.loc[zero_toughness_index, 'toughness'] = 0
df.loc[zero_toughness_back_index, 'toughness_back'] = 0

one_toughness_index = df.loc[(df['toughness'] == '1+*')].index
df.loc[one_toughness_index, 'toughness'] = 1

# loyalty
zero_loyalty_index = df.loc[(df['loyalty'] == 'X')].index
zero_loyalty_back_index = df.loc[(df['loyalty_back'] == 'X')].index
df.loc[zero_loyalty_index, 'loyalty'] = 0
df.loc[zero_loyalty_back_index, 'loyalty_back'] = 0

In [27]:
df.dtypes

name                       object
layout                     object
colors                     object
color_identity             object
mana_cost                  object
cmc                       float64
type_line                  object
card_type                  object
super_type                 object
sub_type                   object
oracle_text                object
oracle_text_token          object
legalities                 object
rarity                     object
power                      object
toughness                  object
loyalty                    object
card_faces                 object
activated_ability         float64
triggered_ability         float64
oracle_text_back           object
oracle_text_back_token     object
colors_back                object
power_back                 object
toughness_back             object
loyalty_back               object
card_type_back             object
super_type_back            object
sub_type_back              object
mana_cost_back

In [28]:
# add the newly made numerical columns to the converted df
converted_df['power'] = df['power'].values
converted_df['power_back'] = df['power_back'].values
converted_df['toughness'] = df['toughness'].values
converted_df['toughness_back'] = df['toughness_back'].values
converted_df['loyalty'] = df['loyalty'].values
converted_df['loyalty_back'] = df['loyalty_back'].values

In [29]:
converted_df.head()

Unnamed: 0_level_0,+0/+1,+0/+1 until,+0/+1 until end,+0/+1 until end of,+0/+1 until end of turn,+0/+2,+0/+3,+1,+1/+0,+1/+0 and,+1/+0 and gains,+1/+0 and has,+1/+0 for,+1/+0 for each,+1/+0 until,+1/+0 until end,+1/+0 until end of,...,{w} {t},{w}{u}{b}{r}{g},{w}{w},{x},{x} {t},−1,−2,−3,−6,−7,−8,cmc,activated_ability,triggered_ability,power_back,toughness_back,loyalty_back
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
Static Orb,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,3.0,0.0,0.0,NONE,NONE,NONE
Sensory Deprivation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,1.0,0.0,0.0,NONE,NONE,NONE
Road of Return,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,2.0,0.0,0.0,NONE,NONE,NONE
Storm Crow,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,2.0,0.0,0.0,NONE,NONE,NONE
Walking Sponge,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,2.0,1.0,0.0,NONE,NONE,NONE


___

In [30]:
# set the index of df to be the same as the converted_df; the name of the card
df = df.set_index(df['name']).drop(columns='name')

In [31]:
# essentially makeing dummy variables for non-numerical data

# initialize some lists to check
wburg = ['B', 'G', 'R', 'U', 'W']

card_types = ['Creature', 'Instant', 'Enchantment', 'Sorcery', 'Artifact', 'Land', 'Planeswalker', 'Tribal']

subtypes = list(set(" ".join(df['sub_type'].value_counts().index).split()))
subtypes.remove('NONE')

super_types = list(set(" ".join(df['super_type'].value_counts().index).split()))
super_types.remove('NONE')

formats = list(set(" ".join(df['legalities'].value_counts().index).split()))
formats.remove('NONE')

rarities = ['common', 'uncommon', 'rare', 'mythic']


# timer and progress checker
t0 = time.time()
counter = 0

for counter, index in enumerate(converted_df.index):
    # colors
    for color in wburg:
        converted_df.loc[index, 'colors_' + color] = (color in df.loc[index, 'colors']) * 1
        converted_df.loc[index, 'color_identity_' + color] = (color in df.loc[index, 'color_identity']) * 1
    
    # card_types
    for ctype in card_types:
        converted_df.loc[index, 'card_type_' + ctype] = (ctype in df.loc[index, 'card_type']) * 1
        
    # sub_types
    for stype in subtypes:
        converted_df.loc[index, 'sub_type_' + stype] = (stype in df.loc[index, 'sub_type']) * 1
        
    # super_type
    for st in super_types:
        converted_df.loc[index, 'super_type_' + st] = (st in df.loc[index, 'super_type']) * 1
    
    # legalities
    for form in formats:
        converted_df.loc[index, 'legalities_' + form] = (form in df.loc[index, 'legalities']) * 1
    
    # rarity
    for r in rarities:
        converted_df.loc[index, 'rarity_' + r] = (r in df.loc[index, 'rarity']) * 1
    
    # progress checker
    if counter % 1000 == 0:
        print(f'Converted {counter} cards out of a total of {len(converted_df.index)}')
        print(f'mins: {(time.time() - t0)/60}')
        print('-------------------------------')

Converted 0 cards out of a total of 18108
mins: 0.28106763362884524
-------------------------------
Converted 1000 cards out of a total of 18108
mins: 3.5997273008028667
-------------------------------
Converted 2000 cards out of a total of 18108
mins: 6.9177250345548
-------------------------------
Converted 3000 cards out of a total of 18108
mins: 10.226051199436188
-------------------------------
Converted 4000 cards out of a total of 18108
mins: 13.571101482709249
-------------------------------
Converted 5000 cards out of a total of 18108
mins: 17.051464637120564
-------------------------------
Converted 6000 cards out of a total of 18108
mins: 20.522286117076874
-------------------------------
Converted 7000 cards out of a total of 18108
mins: 23.983444583415984
-------------------------------
Converted 8000 cards out of a total of 18108
mins: 27.45131818453471
-------------------------------
Converted 9000 cards out of a total of 18108
mins: 30.91543374856313
-------------------

In [32]:
# convert 'NONE's to np.NaN's so we can scale our data then impute all the NaNs
converted_df = converted_df.replace('NONE', np.NaN)

# convert those columns to numerical data
converted_df['power_back'] = converted_df['power_back'].astype(float)
converted_df['power'] = converted_df['power'].astype(float)
converted_df['toughness'] = converted_df['toughness'].astype(float)
converted_df['toughness_back'] = converted_df['toughness_back'].astype(float)
converted_df['loyalty'] = converted_df['loyalty'].astype(float)
converted_df['loyalty_back'] = converted_df['loyalty_back'].astype(float)

Now that we have everything coverted to numerical data, we should scale the non-binarized features so everything will be weighted equally. This means we should just scale the columns that have a range of values, e.g. power, toughness, loyalty, and cmc.

In [33]:
range_col_list = ['power', 'power_back', 'toughness', 'toughness_back', 'loyalty', 'loyalty_back', 'cmc']
converted_df[range_col_list]

Unnamed: 0_level_0,power,power_back,toughness,toughness_back,loyalty,loyalty_back,cmc
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Static Orb,,,,,,,3.0
Sensory Deprivation,,,,,,,1.0
Road of Return,,,,,,,2.0
Storm Crow,1.0,,2.0,,,,2.0
Walking Sponge,1.0,,1.0,,,,2.0
...,...,...,...,...,...,...,...
Devoted Hero,1.0,,2.0,,,,1.0
Without Weakness,,,,,,,2.0
Firesong and Sunspeaker,4.0,,6.0,,,,6.0
"Samut, the Tested",,,,,4.0,,4.0


In [34]:
ss = StandardScaler()
range_col_sc = ss.fit_transform(converted_df[range_col_list])

In [35]:
sc_df = pd.DataFrame(range_col_sc, index = df.index.values, columns=range_col_list)

In [36]:
sc_df.head()

Unnamed: 0,power,power_back,toughness,toughness_back,loyalty,loyalty_back,cmc
Static Orb,,,,,,,-0.171137
Sensory Deprivation,,,,,,,-1.299672
Road of Return,,,,,,,-0.735404
Storm Crow,-0.901288,,-0.414735,,,,-0.735404
Walking Sponge,-0.901288,,-0.999139,,,,-0.735404


In [37]:
sc_df = sc_df.fillna(0)

In [38]:
sc_df.head()

Unnamed: 0,power,power_back,toughness,toughness_back,loyalty,loyalty_back,cmc
Static Orb,0.0,0.0,0.0,0.0,0.0,0.0,-0.171137
Sensory Deprivation,0.0,0.0,0.0,0.0,0.0,0.0,-1.299672
Road of Return,0.0,0.0,0.0,0.0,0.0,0.0,-0.735404
Storm Crow,-0.901288,0.0,-0.414735,0.0,0.0,0.0,-0.735404
Walking Sponge,-0.901288,0.0,-0.999139,0.0,0.0,0.0,-0.735404


In [39]:
converted_df = pd.concat([converted_df.fillna(0), sc_df], axis=1)

In [40]:
# convert to a sparse matrix
sparse_df = sparse.csr_matrix(converted_df)

In [41]:
converted_df.isnull().sum().sum()

0

In [42]:
t0 = time.time()
# build the recommender system using cosine similarity
rec = pairwise_distances(sparse_df, metric='cosine')
print((time.time() - t0)/60)

0.6951145847638448


In [43]:
# turn into a dataframe for interpretability
rec_df = pd.DataFrame(rec, index=converted_df.index, columns=converted_df.index)
rec_df.head()

name,Static Orb,Sensory Deprivation,Road of Return,Storm Crow,Walking Sponge,Ravnica at War,Torrent of Fire,Pteramander,Nantuko Elder,Vedalken Heretic,Waterknot,Ruthless Knave,"Hua Tuo, Honored Physician",Veil of Summer,Disposal Mummy,Marang River Prowler,Aura Graft,...,Trinket Mage,Skyshroud Blessing,"Omnath, Locus of the Roil",Harvest Hand // Scrounged Scythe,Polis Crusher,Test of Endurance,Venom Sliver,Borderland Ranger,Curse of Thirst,Temporary Truce,Clearwater Goblet,Quarry Beetle,Devoted Hero,Without Weakness,Firesong and Sunspeaker,"Samut, the Tested",Sinew Sliver
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
Static Orb,0.0,0.809625,0.852681,0.820776,0.798494,0.550537,0.694761,0.904719,0.647407,0.778593,0.736127,0.807216,0.808582,0.899651,0.763877,0.644153,0.797054,...,0.859131,0.81764,0.84784,0.685219,0.804693,0.705397,0.838919,0.856904,0.735193,0.735012,0.787283,0.767803,0.754602,0.874864,0.75507,0.799829,0.723005
Sensory Deprivation,0.809625,0.0,0.893109,0.720398,0.723876,0.694892,0.825775,0.79041,0.710354,0.746971,0.486649,0.805078,0.86368,0.901007,0.742935,0.762883,0.721904,...,0.846637,0.82964,0.88018,0.758282,0.842141,0.838148,0.748701,0.856959,0.75925,0.848565,0.884412,0.831817,0.657363,0.817833,0.849714,0.842788,0.683929
Road of Return,0.852681,0.893109,0.0,0.91979,0.842902,0.799654,0.80895,0.86959,0.808692,0.831301,0.896534,0.902856,0.724331,0.857177,0.808494,0.791672,0.84178,...,0.753381,0.870608,0.807674,0.851054,0.862147,0.763328,0.885706,0.749483,0.833653,0.862847,0.837539,0.69603,0.862215,0.878425,0.868877,0.79678,0.863791
Storm Crow,0.820776,0.720398,0.91979,0.0,0.70688,0.756265,0.833059,0.792442,0.633206,0.749223,0.801463,0.775377,0.827371,0.861058,0.700048,0.684139,0.82173,...,0.811524,0.83096,0.838178,0.660523,0.778955,0.851895,0.786744,0.831913,0.837512,0.873059,0.874763,0.738069,0.615865,0.876027,0.737957,0.888098,0.655277
Walking Sponge,0.798494,0.723876,0.842902,0.70688,0.0,0.74282,0.766802,0.771519,0.54113,0.732327,0.793206,0.719591,0.70592,0.759547,0.710418,0.710203,0.727089,...,0.783814,0.647561,0.777252,0.768783,0.778848,0.773853,0.754516,0.809265,0.737679,0.859618,0.818452,0.728072,0.60755,0.748301,0.734075,0.720976,0.65602


In [44]:
# now to test the recommender system
rec_df['Shock'].sort_values()[1:11]

name
Magma Jet              0.100619
Tarfire                0.104870
Seal of Fire           0.200068
Explosive Apparatus    0.217389
Ember Hauler           0.227399
Arc Trail              0.239780
Molten Vortex          0.240619
Moonglove Extract      0.254173
Parch                  0.261419
Orcish Vandal          0.262087
Name: Shock, dtype: float64

In [45]:
rec_df['Lightning Bolt'].sort_values()[1:11]

name
Searing Spear            0.085010
Lightning Strike         0.091295
Volcanic Hammer          0.111956
Open Fire                0.150193
Precision Bolt           0.163383
Ghostfire                0.165268
Lightning Helix          0.282013
Mudbutton Torchrunner    0.282211
Valakut Invoker          0.290374
Incinerate               0.307168
Name: Lightning Bolt, dtype: float64

In [46]:
rec_df['Static Orb'].sort_values()[1:11]

name
Winter Orb        0.085138
Imi Statue        0.246043
Damping Field     0.315013
Storage Matrix    0.415676
Eon Hub           0.424165
Kill Switch       0.431249
Mindlock Orb      0.434810
Stabilizer        0.439506
Stoic Angel       0.467908
Mirror Gallery    0.468589
Name: Static Orb, dtype: float64

In [47]:
rec_df['Prized Amalgam'].sort_values()[1:11]

name
Bone Dragon               0.341582
Scrapheap Scrounger       0.351119
Despoiler of Souls        0.365528
Skyfire Phoenix           0.383465
Ghoulsteed                0.385769
Footsteps of the Goryo    0.386484
Advanced Stitchwing       0.387474
Reassembling Skeleton     0.392616
Stitchwing Skaab          0.392876
Apprentice Necromancer    0.395457
Name: Prized Amalgam, dtype: float64

In [48]:
rec_df['Wrath of God'].sort_values()[1:11]

name
Damnation                  0.047444
Winds of Rath              0.085973
Day of Judgment            0.140317
Shatterstorm               0.155985
Perish                     0.169006
Plague Wind                0.170911
Catastrophe                0.174133
Obliterate                 0.201387
Mass Calcify               0.220907
Retribution of the Meek    0.225256
Name: Wrath of God, dtype: float64

In [49]:
rec_df['Jace, the Mind Sculptor'].sort_values()[1:11]

name
Voyage's End             0.402974
Coral Fighters           0.405433
Brainstorm               0.441652
Select for Inspection    0.444665
Anchor to the Aether     0.446959
Precognition             0.450361
Riverwise Augur          0.458374
Eye Spy                  0.459236
Dream Cache              0.462377
Dissolve                 0.475177
Name: Jace, the Mind Sculptor, dtype: float64

In [50]:
rec_df['Delver of Secrets // Insectile Aberration'].sort_values()[1:11]

name
Think Tank                               0.316347
Aberrant Researcher // Perfected Form    0.322247
Puresight Merrow                         0.351741
Geist of the Archives                    0.367037
Etherwrought Page                        0.394177
Precognition Field                       0.396804
Galvanoth                                0.416097
Rummaging Wizard                         0.420285
Mudbutton Clanger                        0.424091
Nightveil Sprite                         0.430172
Name: Delver of Secrets // Insectile Aberration, dtype: float64

In [51]:
rec_df['Grizzly Bears'].sort_values()[1:11] # vanilla creature

name
Runeclaw Bear        0.019564
Balduvian Bears      0.041153
Forest Bear          0.041153
Bear Cub             0.041153
Cylian Elf           0.059082
Elvish Warrior       0.073079
Alpine Grizzly       0.080186
Swordwise Centaur    0.087519
Gnarled Mass         0.090722
Pouncing Cheetah     0.092996
Name: Grizzly Bears, dtype: float64

In [52]:
rec_df['Oko, Thief of Crowns'].sort_values()[1:11]

name
Bake into a Pie          0.378067
Fell the Pheasant        0.416079
Wolf's Quarry            0.443908
Fortifying Provisions    0.449466
Bartered Cow             0.455600
Savvy Hunter             0.462641
Tempting Witch           0.467555
Shrewd Negotiation       0.472206
Foreboding Fruit         0.485801
Fierce Witchstalker      0.499607
Name: Oko, Thief of Crowns, dtype: float64

In [53]:
rec_df['Gaze of Granite'].sort_values()[1:11]

name
Pernicious Deed      0.172204
Forced March         0.181180
Meltdown             0.242874
Displacement Wave    0.272041
Dominate             0.342684
Granulate            0.352364
Hammer Mage          0.360637
Culling Sun          0.375249
Villainous Wealth    0.409279
Ritual of Soot       0.414993
Name: Gaze of Granite, dtype: float64

In [54]:
rec_df['Tarmogoyf'].sort_values()[1:11]

name
Lhurgoyf                0.153502
Swarm of Rats           0.333565
Coiling Woodworm        0.341797
Wilderness Elemental    0.365974
Yavimaya Kavu           0.368930
Shambling Suit          0.390276
People of the Woods     0.395471
Spellheart Chimera      0.403124
Sylvan Yeti             0.413779
Treefolk Seedlings      0.416566
Name: Tarmogoyf, dtype: float64

In [55]:
rec_df['Jace, Vryn\'s Prodigy // Jace, Telepath Unbound'].sort_values()[1:11]

name
Sins of the Past        0.492850
Kess, Dissident Mage    0.507893
Bösium Strip            0.508458
Finale of Promise       0.508812
Dreadhorde Arcanist     0.516600
Sphinx's Tutelage       0.529604
Bag of Holding          0.546170
Dire Fleet Daredevil    0.551543
Mission Briefing        0.556551
Jaya Ballard            0.558404
Name: Jace, Vryn's Prodigy // Jace, Telepath Unbound, dtype: float64

In [57]:
rec_df['Fatal Push'].sort_values()[1:11]

name
Fragmentize              0.501629
Renegade Rallier         0.534575
Thoughtbind              0.551672
Wretched Banquet         0.556169
Overload                 0.559364
Threads of Disloyalty    0.562616
Despark                  0.566174
Granulate                0.567496
Smother                  0.577638
Disembowel               0.588417
Name: Fatal Push, dtype: float64

In [58]:
rec_df['Veil of Summer'].sort_values()[1:11]

name
Autumn's Veil           0.309686
Display of Dominance    0.357039
Lazotep Plating         0.432822
Veilstone Amulet        0.469778
Blinding Fog            0.487491
Join Shields            0.519558
Skyshroud Blessing      0.525665
Glaring Spotlight       0.534698
Spellbane Centaur       0.539845
Tortoise Formation      0.540786
Name: Veil of Summer, dtype: float64

In [59]:
rec_df['Urza, Lord High Artificer'].sort_values()[1:11]

name
Aerial Caravan        0.457736
Djinn of Wishes       0.474863
Mind's Desire         0.480227
Knacksaw Clique       0.488480
Lodestone Myr         0.493280
Magus of the Mind     0.494340
Oracle's Vault        0.496914
Intet, the Dreamer    0.501782
Vital Splicer         0.512462
Leaf-Crowned Elder    0.515929
Name: Urza, Lord High Artificer, dtype: float64