# 03 - Recommender System

In [1]:
# imports
import pandas as pd
import numpy as np
import time
import pickle
import sqlite3

from sqlalchemy import create_engine
from scipy import sparse # cut down on memory size
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler

pd.options.display.max_columns = 35

___

In [2]:
# read in the data
df = pd.read_csv('../Data/cards_cleaned.csv')
df.head()

Unnamed: 0,name,layout,colors,color_identity,mana_cost,cmc,type_line,card_type,super_type,sub_type,oracle_text,oracle_text_token,legalities,rarity,power,toughness,loyalty,activated_ability,triggered_ability,oracle_text_back,oracle_text_back_token,colors_back,power_back,toughness_back,loyalty_back,card_type_back,super_type_back,sub_type_back,mana_cost_back,scryfall_uri
0,Static Orb,normal,[],[],{3},3.0,Artifact,Artifact,NONE,NONE,"As long as Static Orb is untapped, players can...",as long as static orb is untapped players can'...,legacy vintage commander duel,rare,NONE,NONE,NONE,0.0,0.0,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,https://scryfall.com/card/7ed/319/static-orb?u...
1,Sensory Deprivation,normal,['U'],['U'],{U},1.0,Enchantment — Aura,Enchantment,NONE,Aura,Enchant creature Enchanted creature gets -3/-0.,enchant creature enchanted creature gets -3/-0,pioneer modern legacy pauper vintage penny com...,common,NONE,NONE,NONE,0.0,0.0,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,https://scryfall.com/card/m14/71/sensory-depri...
2,Road of Return,normal,['G'],['G'],{G}{G},2.0,Sorcery,Sorcery,NONE,NONE,Choose one — • Return target permanent card fr...,choose one return target permanent card from y...,legacy vintage commander duel,rare,NONE,NONE,NONE,0.0,0.0,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,https://scryfall.com/card/c19/34/road-of-retur...
3,Storm Crow,normal,['U'],['U'],{1}{U},2.0,Creature — Bird,Creature,NONE,Bird,Flying (This creature can't be blocked except ...,flying this creature can't be blocked except b...,modern legacy pauper vintage penny commander duel,common,1,2,NONE,0.0,0.0,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,https://scryfall.com/card/9ed/100/storm-crow?u...
4,Walking Sponge,normal,['U'],['U'],{1}{U},2.0,Creature — Sponge,Creature,NONE,Sponge,{T}: Target creature loses your choice of flyi...,{t} target creature loses your choice of flyin...,legacy vintage commander duel,uncommon,1,1,NONE,1.0,0.0,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,https://scryfall.com/card/ulg/47/walking-spong...


In [3]:
df.shape

(19521, 30)

___

## Oracle text recommender system
To start I want to build a recommender system that just looks at oracle text. In order to do that I need to combine both oracle_text and oracle_text_back into one single column

In [4]:
df['oracle_combined'] = df['oracle_text_token'] + " " + df['oracle_text_back_token']

In [5]:
# start by isolating the name of the card and it's oracle_text
oracle = df['oracle_combined']

# vectorize all our words
cvec = CountVectorizer(stop_words=['none'],
                      min_df=2,
                      max_df=.99,
                      ngram_range=(1,6),
                      token_pattern="[a-zA-Z{}+'0-9-/−]+") # we should use the same RegEx to keep certain characters together 

oracle_vec = cvec.fit_transform(oracle)

# convert to a dataframe so we can use this later on as well
converted_df = pd.DataFrame(oracle_vec.toarray(), columns=cvec.get_feature_names(), index=df['name'])

In [6]:
converted_df.shape

(19521, 138293)

This is a whole lot of features to examine so I'm going to use Variance Threshold to drop some n_gram columns that only exist in a small number of cards

In [7]:
selector = VarianceThreshold(.001)
new_array = selector.fit_transform(converted_df)

new_array.shape

(19521, 15096)

In [8]:
# get the list of columns kept
vt_list = converted_df.columns[selector.get_support()]

# update our converted_df with just the important columns
converted_df = converted_df[vt_list]

For the recommender system to work efficiently we need to convert the data back into a sparse matrix with the new names as the index

In [9]:
sparse_df = sparse.csr_matrix(converted_df)

In [10]:
# build the recommender system using cosine similarity
rec = pairwise_distances(sparse_df, metric='cosine')

In [11]:
rec.shape

(19521, 19521)

In [12]:
rec_df = pd.DataFrame(rec, index=converted_df.index, columns=converted_df.index)
rec_df.head()

name,Static Orb,Sensory Deprivation,Road of Return,Storm Crow,Walking Sponge,Ravnica at War,Torrent of Fire,Wyluli Wolf,Pteramander,Nantuko Elder,Vedalken Heretic,Waterknot,Ruthless Knave,Palinchron,"Hua Tuo, Honored Physician",Veil of Summer,Disposal Mummy,...,"Omnath, Locus of the Roil",Harvest Hand // Scrounged Scythe,Stinging Lionfish,Polis Crusher,Test of Endurance,Venom Sliver,Borderland Ranger,Curse of Thirst,Temporary Truce,Freyalise's Winds,Clearwater Goblet,Quarry Beetle,Devoted Hero,Without Weakness,Firesong and Sunspeaker,"Samut, the Tested",Sinew Sliver
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
Static Orb,0.0,1.0,1.0,0.976967,1.0,0.916955,0.976224,1.0,1.0,1.0,1.0,0.897467,0.979617,0.933296,0.982608,0.962356,1.0,...,0.987648,0.800691,0.920416,0.983457,0.975825,0.980091,1.0,1.0,0.922746,0.91304,1.0,1.0,1.0,0.9855,1.0,0.944326,1.0
Sensory Deprivation,1.0,0.0,1.0,0.939834,0.922326,1.0,1.0,0.84051,0.952886,1.0,1.0,0.62503,0.946757,1.0,0.954569,1.0,1.0,...,1.0,0.869842,1.0,0.956786,1.0,0.89599,1.0,0.943989,1.0,1.0,1.0,1.0,1.0,0.962122,0.947387,0.927285,1.0
Road of Return,1.0,1.0,0.0,1.0,0.926605,1.0,0.91784,0.98493,0.910962,1.0,0.952836,0.979754,0.979876,0.884749,0.793944,0.900888,0.870359,...,0.859751,0.909809,0.895234,0.942834,0.856787,0.970516,0.801835,0.925904,0.974575,0.948486,0.911906,0.695338,1.0,0.928418,0.950285,0.85571,1.0
Storm Crow,0.976967,0.939834,1.0,0.0,0.920554,1.0,1.0,0.979609,0.927716,1.0,1.0,0.958908,0.959156,0.955445,0.976766,0.907804,1.0,...,0.991749,0.789214,0.982281,0.96685,0.983852,0.946808,1.0,1.0,0.982799,0.988383,0.981662,1.0,1.0,0.970944,0.95964,0.97211,0.962602
Walking Sponge,1.0,0.922326,0.926605,0.920554,0.0,1.0,0.938493,0.552477,0.953341,0.90755,1.0,0.94695,0.929695,0.97124,0.850026,0.794411,0.971693,...,0.914787,0.942711,0.885623,0.957204,0.916612,0.93133,0.961538,0.88906,1.0,1.0,0.917141,0.960579,1.0,0.812441,0.913158,0.77196,1.0


In [13]:
# now to test the recommender system
rec_df['Shock'].sort_values()[0:11]

name
Shock                0.000000
Tarfire              0.000000
Bee Sting            0.000000
Unyaro Bee Sting     0.000000
Magma Jet            0.058487
Deadapult            0.133975
Shock Troops         0.133975
Moonglove Extract    0.133975
Goblin Test Pilot    0.149037
Crackling Triton     0.149037
Seismic Assault      0.149037
Name: Shock, dtype: float64

In [14]:
rec_df['Static Orb'].sort_values()[0:11]

name
Static Orb                0.000000
Winter Orb                0.099500
Damping Field             0.360979
Imi Statue                0.360979
Smoke                     0.406144
Stoic Angel               0.442914
Storage Matrix            0.540666
Castle Raptors            0.543250
Kill Switch               0.578152
Giant Tortoise            0.594660
Juniper Order Advocate    0.620690
Name: Static Orb, dtype: float64

In [15]:
rec_df['Prized Amalgam'].sort_values()[0:11]

name
Prized Amalgam            0.000000
Reassembling Skeleton     0.374905
Footsteps of the Goryo    0.378368
Bone Dragon               0.383870
Chronosavant              0.408758
Scrapheap Scrounger       0.409055
Apprentice Necromancer    0.414345
Despoiler of Souls        0.418516
Wake the Dead             0.423640
Ghoulsteed                0.424205
Cauldron Dance            0.425656
Name: Prized Amalgam, dtype: float64

In [16]:
rec_df['Wrath of God'].sort_values()[0:11]

name
Wrath of God               0.000000
Damnation                  0.000000
Perish                     0.117647
Winds of Rath              0.172660
Shatterstorm               0.185908
Plague Wind                0.207882
Abu Ja'far                 0.238958
Jokulhaups                 0.240743
Retribution of the Meek    0.344064
Do or Die                  0.348305
Obliterate                 0.356079
Name: Wrath of God, dtype: float64

In [17]:
rec_df['Grizzly Bears'].sort_values()[0:11] # vanilla creature

name
Grizzly Bears                   0.0
Static Orb                      1.0
Tatsumasa, the Dragon's Fang    1.0
Deadeye Tormentor               1.0
Fiery Hellhound                 1.0
Mask of Avacyn                  1.0
Saheeli, Sublime Artificer      1.0
Saheeli's Silverwing            1.0
Dungeon Geists                  1.0
Grind // Dust                   1.0
Rage Nimbus                     1.0
Name: Grizzly Bears, dtype: float64

This is a great start! Now I want to add the numerical features and see the results
___

## Adding numerical data to our features

In [18]:
df.dtypes

name                       object
layout                     object
colors                     object
color_identity             object
mana_cost                  object
cmc                       float64
type_line                  object
card_type                  object
super_type                 object
sub_type                   object
oracle_text                object
oracle_text_token          object
legalities                 object
rarity                     object
power                      object
toughness                  object
loyalty                    object
activated_ability         float64
triggered_ability         float64
oracle_text_back           object
oracle_text_back_token     object
colors_back                object
power_back                 object
toughness_back             object
loyalty_back               object
card_type_back             object
super_type_back            object
sub_type_back              object
mana_cost_back             object
scryfall_uri  

In [19]:
# we already have a df for the oracle text so we can just use that one and add to it.
converted_df['cmc'] = df['cmc'].values
converted_df['activated_ability'] = df['activated_ability'].values
converted_df['triggered_ability'] = df['triggered_ability'].values

# convert to a sparse matrix
sparse_df = sparse.csr_matrix(converted_df)

# build the recommender system using cosine similarity
rec = pairwise_distances(sparse_df, metric='cosine')

# turn into a dataframe for interpretability
rec_df = pd.DataFrame(rec, index=converted_df.index, columns=converted_df.index)
rec_df.head()

name,Static Orb,Sensory Deprivation,Road of Return,Storm Crow,Walking Sponge,Ravnica at War,Torrent of Fire,Wyluli Wolf,Pteramander,Nantuko Elder,Vedalken Heretic,Waterknot,Ruthless Knave,Palinchron,"Hua Tuo, Honored Physician",Veil of Summer,Disposal Mummy,...,"Omnath, Locus of the Roil",Harvest Hand // Scrounged Scythe,Stinging Lionfish,Polis Crusher,Test of Endurance,Venom Sliver,Borderland Ranger,Curse of Thirst,Temporary Truce,Freyalise's Winds,Clearwater Goblet,Quarry Beetle,Devoted Hero,Without Weakness,Firesong and Sunspeaker,"Samut, the Tested",Sinew Sliver
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
Static Orb,0.0,0.885292,0.912238,0.863296,0.853265,0.539805,0.720115,0.849812,0.953171,0.595071,0.844143,0.763221,0.831784,0.58797,0.854321,0.934378,0.774718,...,0.864715,0.706802,0.80132,0.82426,0.758095,0.880962,0.886683,0.757875,0.8049,0.759053,0.831684,0.746309,0.513336,0.91265,0.735637,0.814129,0.748688
Sensory Deprivation,0.885292,0.0,0.957495,0.886499,0.857866,0.794262,0.872918,0.781782,0.931959,0.803884,0.924515,0.582248,0.877794,0.817797,0.894167,0.984109,0.890891,...,0.939519,0.817426,0.93585,0.882149,0.891852,0.85175,0.945118,0.835827,0.937006,0.917626,0.918481,0.877132,0.764298,0.927476,0.829284,0.864968,0.878284
Road of Return,0.912238,0.957495,0.0,0.956581,0.877661,0.842592,0.83471,0.930435,0.895884,0.849953,0.898932,0.924796,0.925201,0.790897,0.757083,0.890577,0.805217,...,0.820689,0.868074,0.852758,0.887291,0.793143,0.933836,0.769056,0.847477,0.927706,0.889709,0.856549,0.652179,0.819666,0.902897,0.861223,0.812747,0.906876
Storm Crow,0.863296,0.886499,0.956581,0.0,0.854809,0.789837,0.870185,0.90712,0.907327,0.799666,0.922891,0.88704,0.887649,0.78729,0.913512,0.894487,0.888544,...,0.930495,0.740971,0.918088,0.889646,0.875717,0.899041,0.943937,0.880212,0.919564,0.905337,0.900073,0.874489,0.759228,0.935177,0.836512,0.905169,0.844583
Walking Sponge,0.853265,0.857866,0.877661,0.854809,0.0,0.736819,0.788667,0.488234,0.912961,0.665503,0.903439,0.858544,0.828041,0.733629,0.76985,0.786557,0.837165,...,0.845264,0.87025,0.815363,0.849244,0.792486,0.873572,0.894692,0.759988,0.919418,0.894627,0.822724,0.811392,0.698489,0.768068,0.767971,0.697717,0.8443


In [20]:
# now to test the recommender system
rec_df['Shock'].sort_values()[0:11]

name
Shock                0.000000e+00
Tarfire              4.440892e-16
Magma Jet            6.580127e-02
Bee Sting            1.237505e-01
Unyaro Bee Sting     1.237505e-01
Moonglove Extract    1.699426e-01
Deadapult            1.699426e-01
Seal of Fire         1.708438e-01
Ember Hauler         1.711375e-01
Orcish Vandal        1.711375e-01
Arc Trail            1.776544e-01
Name: Shock, dtype: float64

In [21]:
rec_df['Static Orb'].sort_values()[0:11]

name
Static Orb          0.000000
Winter Orb          0.095126
Damping Field       0.264233
Imi Statue          0.264233
Stoic Angel         0.315962
Smoke               0.331901
Castle Raptors      0.371254
Wardscale Dragon    0.423140
Skyrider Trainee    0.437517
Storage Matrix      0.449177
Mungha Wurm         0.454499
Name: Static Orb, dtype: float64

In [22]:
rec_df['Prized Amalgam'].sort_values()[0:11]

name
Prized Amalgam            0.000000
Footsteps of the Goryo    0.352702
Reassembling Skeleton     0.362452
Bone Dragon               0.380206
Scrapheap Scrounger       0.392484
Apprentice Necromancer    0.397741
Cauldron Dance            0.398335
Despoiler of Souls        0.401777
Wake the Dead             0.403900
Stitchwing Skaab          0.424360
Skyfire Phoenix           0.425799
Name: Prized Amalgam, dtype: float64

In [23]:
rec_df['Wrath of God'].sort_values()[0:11]

name
Wrath of God       0.000000
Damnation          0.000000
Perish             0.078235
Winds of Rath      0.085894
Shatterstorm       0.093307
Jokulhaups         0.116040
Plague Wind        0.116612
Obliterate         0.167576
Catastrophe        0.182008
Day of Judgment    0.183503
Child of Alara     0.187596
Name: Wrath of God, dtype: float64

In [24]:
rec_df['Jace, the Mind Sculptor'].sort_values()[0:11]

name
Jace, the Mind Sculptor    0.000000
Coral Fighters             0.374088
Voyage's End               0.389159
Brainstorm                 0.413580
Riverwise Augur            0.415442
Select for Inspection      0.432234
Eye Spy                    0.434878
Anchor to the Aether       0.438873
Precognition               0.443417
Dream Cache                0.450290
Cavalier of Gales          0.453835
Name: Jace, the Mind Sculptor, dtype: float64

In [25]:
rec_df['Grizzly Bears'].sort_values()[0:11] # vanilla creature

name
Moriok Reaver         0.0
Broodhunter Wurm      0.0
Scoria Elemental      0.0
Bogstomper            0.0
Headless Horseman     0.0
Stone Golem           0.0
Great-Horn Krushok    0.0
Dromoka Warrior       0.0
Ruination Wurm        0.0
Jwari Scuttler        0.0
Vizzerdrix            0.0
Name: Grizzly Bears, dtype: float64

___

## Non-numerical data
Now I have to convert the non-numerical data into numerical data

### Power / Toughness / Loyalty

Through some outside reasearch on ScryFall and how other sources interpret * s in power and toughness, I'm going to impute the * s as 0 and * +1s as 1 for both power and toughness. Same is true for loyalty

In [26]:
# power
zero_power_index = df.loc[(df['power'] == '*')].index
zero_power_back_index = df.loc[(df['power_back'] == '*')].index
df.loc[zero_power_index, 'power'] = 0
df.loc[zero_power_back_index, 'power_back'] = 0

one_power_index = df.loc[(df['power'] == '1+*')].index
df.loc[one_power_index, 'power'] = 1

two_power_index = df.loc[(df['power'] == '2+*')].index
df.loc[two_power_index, 'power'] = 2

# toughness
zero_toughness_index = df.loc[(df['toughness'] == '*')].index
zero_toughness_back_index = df.loc[(df['toughness_back'] == '*')].index
df.loc[zero_toughness_index, 'toughness'] = 0
df.loc[zero_toughness_back_index, 'toughness_back'] = 0

one_toughness_index = df.loc[(df['toughness'] == '1+*')].index
df.loc[one_toughness_index, 'toughness'] = 1

two_toughness_index = df.loc[(df['toughness'] == '2+*')].index
df.loc[two_toughness_index, 'toughness'] = 2

seven_toughness_index = df.loc[(df['toughness'] == '7-*')].index
df.loc[seven_toughness_index, 'toughness'] = 7

# loyalty
zero_loyalty_index = df.loc[(df['loyalty'] == 'X')].index
zero_loyalty_back_index = df.loc[(df['loyalty_back'] == 'X')].index
df.loc[zero_loyalty_index, 'loyalty'] = 0
df.loc[zero_loyalty_back_index, 'loyalty_back'] = 0

In [27]:
df.dtypes

name                       object
layout                     object
colors                     object
color_identity             object
mana_cost                  object
cmc                       float64
type_line                  object
card_type                  object
super_type                 object
sub_type                   object
oracle_text                object
oracle_text_token          object
legalities                 object
rarity                     object
power                      object
toughness                  object
loyalty                    object
activated_ability         float64
triggered_ability         float64
oracle_text_back           object
oracle_text_back_token     object
colors_back                object
power_back                 object
toughness_back             object
loyalty_back               object
card_type_back             object
super_type_back            object
sub_type_back              object
mana_cost_back             object
scryfall_uri  

In [28]:
# add the newly made numerical columns to the converted df
converted_df['power'] = df['power'].values
converted_df['power_back'] = df['power_back'].values
converted_df['toughness'] = df['toughness'].values
converted_df['toughness_back'] = df['toughness_back'].values
converted_df['loyalty'] = df['loyalty'].values
converted_df['loyalty_back'] = df['loyalty_back'].values

In [29]:
converted_df.head()

Unnamed: 0_level_0,+0/+1,+0/+1 until,+0/+1 until end,+0/+1 until end of,+0/+1 until end of turn,+0/+2,+0/+2 until,+0/+2 until end,+0/+2 until end of,+0/+2 until end of turn,+0/+3,+1,+1/+0,+1/+0 and,+1/+0 and gains,+1/+0 and has,+1/+0 counters,...,{w} {t} tap target,{w}{u}{b}{r}{g},{w}{w},{x},{x} {t},−1,−2,−3,−6,−7,−8,cmc,activated_ability,triggered_ability,power_back,toughness_back,loyalty_back
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
Static Orb,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,3.0,0.0,0.0,NONE,NONE,NONE
Sensory Deprivation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,1.0,0.0,0.0,NONE,NONE,NONE
Road of Return,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,2.0,0.0,0.0,NONE,NONE,NONE
Storm Crow,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,2.0,0.0,0.0,NONE,NONE,NONE
Walking Sponge,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,2.0,1.0,0.0,NONE,NONE,NONE


___

In [30]:
# set the index of df to be the same as the converted_df; the name of the card
df = df.set_index(df['name']).drop(columns='name')

In [31]:
# essentially makeing dummy variables for non-numerical data

# initialize some lists to check
wburg = ['B', 'G', 'R', 'U', 'W']

card_types = ['Creature', 'Instant', 'Enchantment', 'Sorcery', 'Artifact', 'Land', 'Planeswalker', 'Tribal']

subtypes = list(set(" ".join(df['sub_type'].value_counts().index).split()))
subtypes.remove('NONE')

super_types = list(set(" ".join(df['super_type'].value_counts().index).split()))
super_types.remove('NONE')

formats = list(set(" ".join(df['legalities'].value_counts().index).split()))
formats.remove('NONE')

rarities = ['common', 'uncommon', 'rare', 'mythic']


# timer and progress checker
t0 = time.time()
counter = 0

for counter, index in enumerate(converted_df.index):
    # colors
    for color in wburg:
        converted_df.loc[index, 'colors_' + color] = (color in df.loc[index, 'colors']) * 1
        converted_df.loc[index, 'color_identity_' + color] = (color in df.loc[index, 'color_identity']) * 1
    
    # card_types
    for ctype in card_types:
        converted_df.loc[index, 'card_type_' + ctype] = (ctype in df.loc[index, 'card_type']) * 1
        
    # sub_types
    for stype in subtypes:
        converted_df.loc[index, 'sub_type_' + stype] = (stype in df.loc[index, 'sub_type']) * 1
        
    # super_type
    for st in super_types:
        converted_df.loc[index, 'super_type_' + st] = (st in df.loc[index, 'super_type']) * 1
    
    # legalities
    for form in formats:
        converted_df.loc[index, 'legalities_' + form] = (form in df.loc[index, 'legalities']) * 1
    
    # rarity
    for r in rarities:
        converted_df.loc[index, 'rarity_' + r] = (r in df.loc[index, 'rarity']) * 1
    
    # progress checker
    if counter % 1000 == 0:
        print(f'Converted {counter} cards out of a total of {len(converted_df.index)}')
        print(f'mins: {(time.time() - t0)/60}')
        print('-------------------------------')

Converted 0 cards out of a total of 19521
mins: 0.37101365327835084
-------------------------------
Converted 1000 cards out of a total of 19521
mins: 3.6360658526420595
-------------------------------
Converted 2000 cards out of a total of 19521
mins: 7.02250611782074
-------------------------------
Converted 3000 cards out of a total of 19521
mins: 10.231791086991628
-------------------------------
Converted 4000 cards out of a total of 19521
mins: 13.437855585416157
-------------------------------
Converted 5000 cards out of a total of 19521
mins: 16.631912672519682
-------------------------------
Converted 6000 cards out of a total of 19521
mins: 19.81955570379893
-------------------------------
Converted 7000 cards out of a total of 19521
mins: 23.001061534881593
-------------------------------
Converted 8000 cards out of a total of 19521
mins: 26.17591067155202
-------------------------------
Converted 9000 cards out of a total of 19521
mins: 29.642980782190957
------------------

In [32]:
# convert 'NONE's to np.NaN's so we can scale our data then impute all the NaNs
converted_df = converted_df.replace('NONE', np.NaN)

# convert those columns to numerical data
converted_df['power_back'] = converted_df['power_back'].astype(float)
converted_df['power'] = converted_df['power'].astype(float)
converted_df['toughness'] = converted_df['toughness'].astype(float)
converted_df['toughness_back'] = converted_df['toughness_back'].astype(float)
converted_df['loyalty'] = converted_df['loyalty'].astype(float)
converted_df['loyalty_back'] = converted_df['loyalty_back'].astype(float)

Now that we have everything coverted to numerical data, we should scale the non-binarized features so everything will be weighted equally. This means we should just scale the columns that have a range of values, e.g. power, toughness, loyalty, and cmc.

In [33]:
range_col_list = ['power', 'power_back', 'toughness', 'toughness_back', 'loyalty', 'loyalty_back', 'cmc']
converted_df[range_col_list]

Unnamed: 0_level_0,power,power_back,toughness,toughness_back,loyalty,loyalty_back,cmc
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Static Orb,,,,,,,3.0
Sensory Deprivation,,,,,,,1.0
Road of Return,,,,,,,2.0
Storm Crow,1.0,,2.0,,,,2.0
Walking Sponge,1.0,,1.0,,,,2.0
...,...,...,...,...,...,...,...
Devoted Hero,1.0,,2.0,,,,1.0
Without Weakness,,,,,,,2.0
Firesong and Sunspeaker,4.0,,6.0,,,,6.0
"Samut, the Tested",,,,,4.0,,4.0


In [34]:
ss = StandardScaler()
range_col_sc = ss.fit_transform(converted_df[range_col_list])

In [35]:
sc_df = pd.DataFrame(range_col_sc, index = df.index.values, columns=range_col_list)

In [36]:
sc_df.head()

Unnamed: 0,power,power_back,toughness,toughness_back,loyalty,loyalty_back,cmc
Static Orb,,,,,,,-0.167735
Sensory Deprivation,,,,,,,-1.294101
Road of Return,,,,,,,-0.730918
Storm Crow,-0.897382,,-0.418719,,,,-0.730918
Walking Sponge,-0.897382,,-0.999858,,,,-0.730918


In [37]:
sc_df = sc_df.fillna(0)

In [38]:
sc_df.head()

Unnamed: 0,power,power_back,toughness,toughness_back,loyalty,loyalty_back,cmc
Static Orb,0.0,0.0,0.0,0.0,0.0,0.0,-0.167735
Sensory Deprivation,0.0,0.0,0.0,0.0,0.0,0.0,-1.294101
Road of Return,0.0,0.0,0.0,0.0,0.0,0.0,-0.730918
Storm Crow,-0.897382,0.0,-0.418719,0.0,0.0,0.0,-0.730918
Walking Sponge,-0.897382,0.0,-0.999858,0.0,0.0,0.0,-0.730918


In [39]:
converted_df = pd.concat([converted_df.fillna(0).drop(columns=range_col_list), sc_df], axis=1)

In [40]:
# convert to a sparse matrix
sparse_df = sparse.csr_matrix(converted_df)

In [41]:
converted_df.isnull().sum().sum()

0

In [42]:
t0 = time.time()
# build the recommender system using cosine similarity
rec = pairwise_distances(sparse_df, metric='cosine')
print((time.time() - t0)/60)

0.7014942685763041


In [43]:
# turn into a dataframe for interpretability
rec_df = pd.DataFrame(rec, index=converted_df.index, columns=converted_df.index)
rec_df.head()

name,Static Orb,Sensory Deprivation,Road of Return,Storm Crow,Walking Sponge,Ravnica at War,Torrent of Fire,Wyluli Wolf,Pteramander,Nantuko Elder,Vedalken Heretic,Waterknot,Ruthless Knave,Palinchron,"Hua Tuo, Honored Physician",Veil of Summer,Disposal Mummy,...,"Omnath, Locus of the Roil",Harvest Hand // Scrounged Scythe,Stinging Lionfish,Polis Crusher,Test of Endurance,Venom Sliver,Borderland Ranger,Curse of Thirst,Temporary Truce,Freyalise's Winds,Clearwater Goblet,Quarry Beetle,Devoted Hero,Without Weakness,Firesong and Sunspeaker,"Samut, the Tested",Sinew Sliver
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
Static Orb,0.0,0.873397,0.923358,0.90232,0.903717,0.77669,0.902908,0.902784,0.937139,0.829895,0.879203,0.845139,0.914303,0.839222,0.909265,0.919885,0.899815,...,0.946987,0.75446,0.855779,0.915999,0.87856,0.914021,0.947846,0.931592,0.823605,0.848758,0.928783,0.928546,0.807181,0.934672,0.923103,0.903563,0.858808
Sensory Deprivation,0.873397,0.0,0.922183,0.740403,0.755728,0.743218,0.899581,0.703764,0.78564,0.768319,0.778159,0.509051,0.834802,0.916056,0.901592,0.910409,0.785084,...,0.915385,0.799579,0.745633,0.888521,0.903402,0.78915,0.88812,0.81713,0.88705,0.928139,0.951782,0.887275,0.630976,0.853204,0.897219,0.878715,0.713825
Road of Return,0.923358,0.922183,0.0,0.954688,0.883303,0.887361,0.881189,0.906889,0.883241,0.864499,0.869814,0.944961,0.945312,0.867267,0.754494,0.866509,0.841988,...,0.838319,0.885718,0.86699,0.898276,0.82076,0.916134,0.775248,0.903823,0.902871,0.898577,0.889425,0.684952,0.881483,0.90286,0.924511,0.829842,0.918616
Storm Crow,0.90232,0.740403,0.954688,0.0,0.785379,0.856442,0.928983,0.830551,0.819092,0.799645,0.830551,0.849653,0.885577,0.900711,0.918533,0.866939,0.861554,...,0.939739,0.744025,0.834565,0.927882,0.935601,0.856971,0.917176,0.925029,0.919725,0.952092,0.948683,0.938192,0.727382,0.910268,0.931618,0.937395,0.753901
Walking Sponge,0.903717,0.755728,0.883303,0.785379,0.0,0.885747,0.880192,0.491882,0.807924,0.64256,0.819834,0.857954,0.828577,0.893617,0.786401,0.764378,0.8527,...,0.87428,0.86056,0.738057,0.916675,0.870895,0.816841,0.895981,0.835293,0.919445,0.953779,0.897237,0.905567,0.694403,0.775335,0.917673,0.764218,0.766353


In [44]:
# now to test the recommender system
rec_df['Shock'].sort_values()[1:11]

name
Magma Jet              0.094770
Tarfire                0.107739
Ember Hauler           0.172635
Seal of Fire           0.220325
Explosive Apparatus    0.222572
Unyaro Bee Sting       0.236927
Orcish Vandal          0.238030
Moonglove Extract      0.238148
Bee Sting              0.253660
Molten Vortex          0.262035
Name: Shock, dtype: float64

In [45]:
rec_df['Lightning Bolt'].sort_values()[1:11]

name
Lightning Strike         0.072628
Searing Spear            0.075171
Open Fire                0.103053
Volcanic Hammer          0.104121
Ghostfire                0.118336
Fire Ambush              0.133071
Precision Bolt           0.137319
Valakut Invoker          0.181382
Fateful End              0.195237
Mudbutton Torchrunner    0.262980
Name: Lightning Bolt, dtype: float64

In [46]:
rec_df['Static Orb'].sort_values()[1:11]

name
Winter Orb        0.086514
Imi Statue        0.303581
Smoke             0.384707
Damping Field     0.403270
Stoic Angel       0.472613
Storage Matrix    0.479578
Kill Switch       0.509902
Castle Raptors    0.563678
Stabilizer        0.564627
Watchdog          0.577101
Name: Static Orb, dtype: float64

In [47]:
rec_df['Prized Amalgam'].sort_values()[1:11]

name
Footsteps of the Goryo    0.379860
Bone Dragon               0.401907
Scrapheap Scrounger       0.406178
Despoiler of Souls        0.408712
Wake the Dead             0.414113
Apprentice Necromancer    0.415082
Cauldron Dance            0.433444
Reassembling Skeleton     0.438629
Advanced Stitchwing       0.443229
Stitchwing Skaab          0.443434
Name: Prized Amalgam, dtype: float64

In [48]:
rec_df['Wrath of God'].sort_values()[1:11]

name
Damnation                  0.076463
Winds of Rath              0.142733
Perish                     0.236029
Day of Judgment            0.238782
Shatterstorm               0.253206
Jokulhaups                 0.268388
Retribution of the Meek    0.278144
Plague Wind                0.278940
Catastrophe                0.299106
Obliterate                 0.351389
Name: Wrath of God, dtype: float64

In [49]:
rec_df['Jace, the Mind Sculptor'].sort_values()[1:11]

name
Voyage's End             0.407273
Coral Fighters           0.408380
Select for Inspection    0.439064
Brainstorm               0.442925
Riverwise Augur          0.459136
Eye Spy                  0.460693
Anchor to the Aether     0.461277
Precognition             0.473147
Dream Cache              0.481941
Dissolve                 0.492379
Name: Jace, the Mind Sculptor, dtype: float64

In [50]:
rec_df['Delver of Secrets // Insectile Aberration'].sort_values()[1:11]

name
Think Tank                               0.276391
Puresight Merrow                         0.320040
Geist of the Archives                    0.330403
Aberrant Researcher // Perfected Form    0.343916
Etherwrought Page                        0.356894
Precognition Field                       0.366579
Rummaging Wizard                         0.380935
Galvanoth                                0.381017
Into the Wilds                           0.399796
Mudbutton Clanger                        0.406682
Name: Delver of Secrets // Insectile Aberration, dtype: float64

In [51]:
rec_df['Grizzly Bears'].sort_values()[1:11] # vanilla creature

name
Runeclaw Bear        0.036896
Forest Bear          0.081399
Bear Cub             0.081399
Balduvian Bears      0.081399
Alpine Grizzly       0.099794
Cylian Elf           0.112100
Trained Armodon      0.119920
Elvish Warrior       0.125671
Swordwise Centaur    0.155249
Gnarled Mass         0.164994
Name: Grizzly Bears, dtype: float64

In [52]:
rec_df['Oko, Thief of Crowns'].sort_values()[1:11]

name
Bake into a Pie          0.404241
Fell the Pheasant        0.440607
Bartered Cow             0.447424
Savvy Hunter             0.456416
Wolf's Quarry            0.468662
Shrewd Negotiation       0.471643
Fierce Witchstalker      0.471962
Tempting Witch           0.478143
Fortifying Provisions    0.483236
Foreboding Fruit         0.516778
Name: Oko, Thief of Crowns, dtype: float64

In [53]:
rec_df['Gaze of Granite'].sort_values()[1:11]

name
Pernicious Deed      0.195378
Forced March         0.208232
Meltdown             0.222931
Displacement Wave    0.299442
Hammer Mage          0.375940
Dominate             0.395531
Granulate            0.452845
Villainous Wealth    0.454082
Disembowel           0.462508
Stir the Grave       0.466550
Name: Gaze of Granite, dtype: float64

In [54]:
rec_df['Tarmogoyf'].sort_values()[1:11]

name
Lhurgoyf                0.145749
Swarm of Rats           0.359460
Coiling Woodworm        0.366530
Wilderness Elemental    0.371145
Yavimaya Kavu           0.397080
Dakmor Sorceress        0.410047
People of the Woods     0.411078
Shambling Suit          0.411874
Treefolk Seedlings      0.416398
Sylvan Yeti             0.420198
Name: Tarmogoyf, dtype: float64

In [55]:
rec_df['Jace, Vryn\'s Prodigy // Jace, Telepath Unbound'].sort_values()[1:11]

name
Sins of the Past        0.457813
Kess, Dissident Mage    0.478718
Finale of Promise       0.494273
Bösium Strip            0.494887
Dreadhorde Arcanist     0.512231
Sphinx's Tutelage       0.514060
Jaya Ballard            0.516312
Diluvian Primordial     0.518597
Torrential Gearhulk     0.519700
Dire Fleet Daredevil    0.529995
Name: Jace, Vryn's Prodigy // Jace, Telepath Unbound, dtype: float64

In [56]:
rec_df['Fatal Push'].sort_values()[1:11]

name
Fragmentize              0.506632
Renegade Rallier         0.517557
Thoughtbind              0.541528
Granulate                0.543480
Wretched Banquet         0.553908
Threads of Disloyalty    0.557239
Overload                 0.561679
Despark                  0.571028
Smother                  0.583836
Disembowel               0.595119
Name: Fatal Push, dtype: float64

In [57]:
rec_df['Veil of Summer'].sort_values()[1:11]

name
Autumn's Veil           0.304158
Display of Dominance    0.347318
Lazotep Plating         0.443828
Veilstone Amulet        0.460400
Spellbane Centaur       0.462851
Blinding Fog            0.489273
Join Shields            0.520273
Skyshroud Blessing      0.521551
Tortoise Formation      0.522402
Leonin Abunas           0.524888
Name: Veil of Summer, dtype: float64

In [58]:
rec_df['Urza, Lord High Artificer'].sort_values()[1:11]

name
Oracle's Vault         0.491109
Mind's Desire          0.514795
Temporal Aperture      0.526137
Aerial Caravan         0.526165
Abbot of Keral Keep    0.538169
Karn, Scion of Urza    0.540265
Stolen Goods           0.559586
Djinn of Wishes        0.560237
Outpost Siege          0.569939
Knacksaw Clique        0.579685
Name: Urza, Lord High Artificer, dtype: float64

In [71]:
rec_df['Underworld Breach'].sort_values()[1:11]

name
Glimpse of Freedom       0.435
Ox of Agonas             0.445
Fruit of Tizerus         0.445
Ichorid                  0.452
Voracious Typhon         0.458
Mogis's Favor            0.459
Underworld Charger       0.465
Escape Velocity          0.466
Loathsome Chimera        0.469
Underworld Rage-Hound    0.470
Name: Underworld Breach, dtype: float64

In [59]:
converted_df = converted_df.round(decimals=3)

In [60]:
converted_df.head()

Unnamed: 0_level_0,+0/+1,+0/+1 until,+0/+1 until end,+0/+1 until end of,+0/+1 until end of turn,+0/+2,+0/+2 until,+0/+2 until end,+0/+2 until end of,+0/+2 until end of turn,+0/+3,+1,+1/+0,+1/+0 and,+1/+0 and gains,+1/+0 and has,+1/+0 counters,...,legalities_duel,legalities_pauper,legalities_standard,legalities_legacy,legalities_future,legalities_brawl,rarity_common,rarity_uncommon,rarity_rare,rarity_mythic,power,power_back,toughness,toughness_back,loyalty,loyalty_back,cmc
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
Static Orb,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.168
Sensory Deprivation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.294
Road of Return,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.731
Storm Crow,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,-0.897,0.0,-0.419,0.0,0.0,0.0,-0.731
Walking Sponge,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,-0.897,0.0,-1.0,0.0,0.0,0.0,-0.731


In [61]:
rec_df = rec_df.round(decimals=3)

In [62]:
rec_df.head()

name,Static Orb,Sensory Deprivation,Road of Return,Storm Crow,Walking Sponge,Ravnica at War,Torrent of Fire,Wyluli Wolf,Pteramander,Nantuko Elder,Vedalken Heretic,Waterknot,Ruthless Knave,Palinchron,"Hua Tuo, Honored Physician",Veil of Summer,Disposal Mummy,...,"Omnath, Locus of the Roil",Harvest Hand // Scrounged Scythe,Stinging Lionfish,Polis Crusher,Test of Endurance,Venom Sliver,Borderland Ranger,Curse of Thirst,Temporary Truce,Freyalise's Winds,Clearwater Goblet,Quarry Beetle,Devoted Hero,Without Weakness,Firesong and Sunspeaker,"Samut, the Tested",Sinew Sliver
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
Static Orb,0.0,0.873,0.923,0.902,0.904,0.777,0.903,0.903,0.937,0.83,0.879,0.845,0.914,0.839,0.909,0.92,0.9,...,0.947,0.754,0.856,0.916,0.879,0.914,0.948,0.932,0.824,0.849,0.929,0.929,0.807,0.935,0.923,0.904,0.859
Sensory Deprivation,0.873,0.0,0.922,0.74,0.756,0.743,0.9,0.704,0.786,0.768,0.778,0.509,0.835,0.916,0.902,0.91,0.785,...,0.915,0.8,0.746,0.889,0.903,0.789,0.888,0.817,0.887,0.928,0.952,0.887,0.631,0.853,0.897,0.879,0.714
Road of Return,0.923,0.922,0.0,0.955,0.883,0.887,0.881,0.907,0.883,0.864,0.87,0.945,0.945,0.867,0.754,0.867,0.842,...,0.838,0.886,0.867,0.898,0.821,0.916,0.775,0.904,0.903,0.899,0.889,0.685,0.881,0.903,0.925,0.83,0.919
Storm Crow,0.902,0.74,0.955,0.0,0.785,0.856,0.929,0.831,0.819,0.8,0.831,0.85,0.886,0.901,0.919,0.867,0.862,...,0.94,0.744,0.835,0.928,0.936,0.857,0.917,0.925,0.92,0.952,0.949,0.938,0.727,0.91,0.932,0.937,0.754
Walking Sponge,0.904,0.756,0.883,0.785,0.0,0.886,0.88,0.492,0.808,0.643,0.82,0.858,0.829,0.894,0.786,0.764,0.853,...,0.874,0.861,0.738,0.917,0.871,0.817,0.896,0.835,0.919,0.954,0.897,0.906,0.694,0.775,0.918,0.764,0.766


In [63]:
# save out the csv
converted_df.to_csv('../Data/converted_df.csv')

In [64]:
# save out the csv
rec_df.to_csv('../Data/recommender_df.csv')

In [65]:
# this is to make a table to merge with the rec_df for filtering.

drop_list = list(vt_list) + ['rarity_common', 'rarity_uncommon', 'rarity_rare', 'rarity_mythic',
                            'activated_ability', 'triggered_ability', 'cmc', 'power', 'power_back', 'toughness',
                            'toughness_back', 'loyalty', 'loyalty_back', 'super_type_Basic', 'super_type_World',
                            'super_type_Legendary', 'super_type_Snow']
filter_df = converted_df.drop(columns=drop_list)
filter_df['cmc'] = df['cmc']
filter_df['card_link'] = df['scryfall_uri']
filter_df.head()

Unnamed: 0_level_0,colors_B,color_identity_B,colors_G,color_identity_G,colors_R,color_identity_R,colors_U,color_identity_U,colors_W,color_identity_W,card_type_Creature,card_type_Instant,card_type_Enchantment,card_type_Sorcery,card_type_Artifact,card_type_Land,card_type_Planeswalker,...,sub_type_Xenagos,sub_type_Leviathan,legalities_historic,legalities_penny,legalities_pioneer,legalities_commander,legalities_modern,legalities_oldschool,legalities_vintage,legalities_duel,legalities_pauper,legalities_standard,legalities_legacy,legalities_future,legalities_brawl,cmc,card_link
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
Static Orb,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,3.0,https://scryfall.com/card/7ed/319/static-orb?u...
Sensory Deprivation,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,https://scryfall.com/card/m14/71/sensory-depri...
Road of Return,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,2.0,https://scryfall.com/card/c19/34/road-of-retur...
Storm Crow,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,https://scryfall.com/card/9ed/100/storm-crow?u...
Walking Sponge,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,2.0,https://scryfall.com/card/ulg/47/walking-spong...


In [66]:
# merge the two dfs and pickle out the result
filter_rec_df = pd.concat([rec_df, filter_df], axis=1)
filter_rec_df.to_pickle('../Data/filter_rec_df.pkl')

___
### SQL

In [67]:
# create a database
conn = sqlite3.connect('../Data/MTG_Recommender.db')

In [68]:
# create an engine for the database
engine = create_engine('sqlite:///../Data/MTG_Recommender.db')

In [69]:
# save the recommender table to the database
# rec_df.to_sql('recommender', con=engine)
# after doing some outside research on SQL best practices it is not recommended to have such large tables. In fact
# the max limit for columns in SQLite is 2,000 which is much smaller than our 19,267 X 19,267

In [76]:
# save the filter table to the database (minus the card_link column)
filter_df.drop(columns='card_link').to_sql('filter', con=engine, if_exists='replace')

In [77]:
# save the image link table
filter_df[['card_link']].to_sql('links', con=engine, if_exists='replace')

Maybe, from here I can apply user-created filters to the filter_table using SQL queries in order to return a list of cards that fit those filters. Then, from that list pick the top cards whose cosine similarity is lowest compared to the user-selected card.

In [78]:
# proof of concept
filtered_list = ['Static Orb', 'Waterknot', 'Palinchron'] # this list would be returned from the user's filter request

In [79]:
users_card = 'Storm Crow' # an example of a card a user would enter
rec_df[filtered_list].T[users_card].sort_values()[0:11]

name
Waterknot     0.850
Palinchron    0.901
Static Orb    0.902
Name: Storm Crow, dtype: float64

This will be covered in the next notebook. 04-Filter