In [110]:
import pyspark as ps
import psycopg2
import os
import pandas as pd
import numpy as np
from pyspark.sql.types import StructType, StructField, IntegerType
from pyspark.mllib.recommendation import ALS

In [254]:
dbname = os.environ['CAPSTONE_DB_DBNAME']
host = os.environ['CAPSTONE_DB_HOST']
username = os.environ['CAPSTONE_DB_USERNAME']
password = os.environ['CAPSTONE_DB_PASSWORD']

conn = psycopg2.connect('dbname={} host={} user={} password={}'.format(dbname, host, username, password))

In [255]:
cursor = conn.cursor()

In [245]:
query = '''SELECT deck_id, cardstorm_id, card_count
           FROM decks'''

cursor.execute(query)

In [246]:
decks_schema = StructType([StructField('deck_id', IntegerType()),
                           StructField('cardstorm_id', IntegerType()),
                           StructField('card_count', IntegerType())])

In [247]:
decks_spark = spark.createDataFrame(cursor.fetchall(), schema=decks_schema)

In [248]:
als_model = ALS.trainImplicit(ratings=decks_spark, rank=10)

In [249]:
product_features = als_model.productFeatures()

In [250]:
spark_product_features = product_features.toDF()

In [251]:
spark_product_features.count()

11348

In [252]:
spark_product_features.take(1)[0][1]

[5.599943619927217e-07,
 9.303290653406293e-07,
 3.3896577633640845e-07,
 -2.9984553293616045e-06,
 -1.2540406260086456e-06,
 -5.267905862638145e-07,
 -3.007692384926486e-06,
 3.2962809655145975e-06,
 4.2720736814771954e-07,
 -7.71732629800681e-07]

In [215]:
pd.DataFrame(data=spark_product_features[1])

TypeError: 'Column' object is not callable

In [85]:
pd_product_features = spark_product_features.toPandas()

In [95]:
pd_product_features.set_index(keys='_1', inplace=True)

In [96]:
pd_product_features.apply

Unnamed: 0_level_0,_2
_1,Unnamed: 1_level_1
28,"[0.0006151899578981102, 0.00010950645082630217..."
76,"[0.00013237980601843446, -0.000165603400091640..."
96,"[0.00040547989192418754, 0.0001493739691795781..."
156,"[0.0010473811998963356, 0.0006976151489652693,..."
164,"[0.003016624366864562, 0.0013111965963616967, ..."
256,"[0.04734504222869873, 0.021816296502947807, 0...."
296,"[-6.567534001078457e-05, -0.000247386982664465..."
412,"[-0.0007390055689029396, 0.00854551512748003, ..."
512,"[0.0014713333221152425, 0.0015333227347582579,..."
552,"[0.002409742446616292, 0.00201679696328938, -0..."


In [106]:
pd_product_features.iloc[28]

_2    [-0.029444199055433273, -0.04134610295295715, ...
Name: 1172, dtype: object

In [133]:
pd_product_features.values

[0.0006151899578981102,
 0.00010950645082630217,
 -0.0018775667995214462,
 -0.000590339710470289,
 -0.0007529487484134734,
 -0.0006458790739998221,
 -0.0005680898320861161,
 -0.0004197876260150224,
 0.00025138462660834193,
 -0.0008187477942556143]

In [136]:
matrix = []
for row in pd_product_features.values:
    mat_row = []
    print(row)
    for entry in row[0]:
        mat_row.append(entry)
    matrix.append(mat_row)

[ list([0.0006151899578981102, 0.00010950645082630217, -0.0018775667995214462, -0.000590339710470289, -0.0007529487484134734, -0.0006458790739998221, -0.0005680898320861161, -0.0004197876260150224, 0.00025138462660834193, -0.0008187477942556143])]
[ list([0.00013237980601843446, -0.00016560340009164065, -0.0002430373424431309, 0.00038135109934955835, 0.0002527665928937495, -0.0005255143041722476, -0.000406524253776297, 2.95970694423886e-05, 0.00021857020328752697, -0.0001917874615173787])]
[ list([0.00040547989192418754, 0.00014937396917957813, 8.531381172360852e-05, 0.0001774914126144722, 0.00014763644139748067, -0.0003577808674890548, -2.4607230443507433e-05, 9.266076085623354e-05, 0.00020686043717432767, 0.00014359744091052562])]
[ list([0.0010473811998963356, 0.0006976151489652693, -0.0015072522219270468, -0.0005915156216360629, -0.0007688805926591158, -0.000543002737686038, 0.00046128660324029624, 0.00024722059606574476, 0.0005787524860352278, 0.00047576407087035477])]
[ list([0.0

[ list([2.1422256395453587e-05, -0.00018854148220270872, -0.00024397122615482658, -0.0006531981634907424, -0.0006180632044561207, -0.00027248967671766877, -7.878788892412558e-05, -4.672214799938956e-06, -0.0002560874563641846, 0.00012438562407623976])]
[ list([-0.004946268163621426, -0.008504423312842846, -0.0008728125831112266, -0.001871619955636561, -0.005412542261183262, 0.0057517606765031815, -0.003215491073206067, -0.00036306632682681084, -0.0007641704869456589, -0.0009806484449654818])]
[ list([-0.013638830743730068, -0.04020551219582558, -0.0005228727241046727, -0.020549029111862183, -0.0360589474439621, 0.036598339676856995, -0.012147416360676289, 0.0007658432004973292, -0.006802764255553484, -0.006083543878048658])]
[ list([0.18911349773406982, 0.11295513808727264, 0.03835852071642876, -0.029797077178955078, -0.025006921961903572, -0.011886714957654476, 0.08850055187940598, 0.05557306483387947, 0.06426110118627548, 0.08840139210224152])]
[ list([0.0002223056071670726, 2.253009

In [137]:
np_matrix = np.array(matrix)

In [138]:
np_matrix.shape

(1443, 10)

In [156]:
new_df = pd.DataFrame(data=np_matrix, index=pd_product_features.index)

In [157]:
new_df.loc[28, :]

0    0.000615
1    0.000110
2   -0.001878
3   -0.000590
4   -0.000753
5   -0.000646
6   -0.000568
7   -0.000420
8    0.000251
9   -0.000819
Name: 28, dtype: float64

In [158]:
new_df

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9
_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
28,0.000615,0.000110,-0.001878,-0.000590,-0.000753,-0.000646,-0.000568,-0.000420,0.000251,-0.000819
76,0.000132,-0.000166,-0.000243,0.000381,0.000253,-0.000526,-0.000407,0.000030,0.000219,-0.000192
96,0.000405,0.000149,0.000085,0.000177,0.000148,-0.000358,-0.000025,0.000093,0.000207,0.000144
156,0.001047,0.000698,-0.001507,-0.000592,-0.000769,-0.000543,0.000461,0.000247,0.000579,0.000476
164,0.003017,0.001311,-0.000101,0.001507,0.001038,-0.002794,-0.000130,0.000763,0.001832,0.001126
256,0.047345,0.021816,0.021271,-0.029057,-0.015669,-0.009752,0.035113,0.019945,0.006722,0.044555
296,-0.000066,-0.000247,-0.000141,0.000275,0.000158,-0.000331,-0.000411,-0.000031,0.000059,-0.000244
412,-0.000739,0.008546,0.015857,-0.016525,-0.006895,-0.000895,-0.007372,0.000686,-0.000356,-0.006225
512,0.001471,0.001533,0.000103,0.001852,0.000132,-0.001494,0.000581,0.000594,0.001362,0.001803
552,0.002410,0.002017,-0.000693,0.001191,0.002177,-0.002912,0.000127,0.000616,0.002524,0.000690


In [159]:
import requests

In [160]:
response = requests.get(url='http://mtgtop8.com/mtgo?d=313114')

In [161]:
response.text

"4 Karn Liberated\r\n2 Ugin, the Spirit Dragon\r\n2 Ulamog, the Ceaseless Hunger\r\n4 Walking Ballista\r\n1 World Breaker\r\n2 Wurmcoil Engine\r\n4 Ancient Stirrings\r\n4 Sylvan Scrying\r\n2 Dismember\r\n4 Chromatic Sphere\r\n4 Chromatic Star\r\n4 Expedition Map\r\n4 Oblivion Stone\r\n5 Forest\r\n1 Ghost Quarter\r\n1 Sanctum of Ugin\r\n4 Urza's Mine\r\n4 Urza's Power Plant\r\n4 Urza's Tower\r\nSideboard\r\n2 Relic of Progenitus\r\n2 Spatial Contortion\r\n2 Surgical Extraction\r\n3 Thought-Knot Seer\r\n2 Thragtusk\r\n1 Warping Wail\r\n1 Crucible of Worlds\r\n2 Nature's Claim\r\n"

In [163]:
cd ../src/


/Users/benjaminwalzer/Documents/Galvanize/mtg-capstone/src


In [164]:
pwd

'/Users/benjaminwalzer/Documents/Galvanize/mtg-capstone/src'

In [180]:
from deck_scraping import format_deck, make_user_card_counts, ReflexiveDict

In [169]:
deck_list = format_deck(response.text)
deck_list

['4 Karn Liberated',
 '2 Ugin, the Spirit Dragon',
 '2 Ulamog, the Ceaseless Hunger',
 '4 Walking Ballista',
 '1 World Breaker',
 '2 Wurmcoil Engine',
 '4 Ancient Stirrings',
 '4 Sylvan Scrying',
 '2 Dismember',
 '4 Chromatic Sphere',
 '4 Chromatic Star',
 '4 Expedition Map',
 '4 Oblivion Stone',
 '5 Forest',
 '1 Ghost Quarter',
 '1 Sanctum of Ugin',
 "4 Urza's Mine",
 "4 Urza's Power Plant",
 "4 Urza's Tower"]

In [175]:
user_card_counts = make_user_card_counts(event_id=18244, deck_id=313114, deck_list=deck_list)
user_card_counts

[(18244, 313114, 5111, 'Karn Liberated', '4'),
 (18244, 313114, 10392, 'Ugin, the Spirit Dragon', '2'),
 (18244, 313114, 10399, 'Ulamog, the Ceaseless Hunger', '2'),
 (18244, 313114, 10884, 'Walking Ballista', '4'),
 (18244, 313114, 11225, 'World Breaker', '1'),
 (18244, 313114, 11262, 'Wurmcoil Engine', '2'),
 (18244, 313114, 278, 'Ancient Stirrings', '4'),
 (18244, 313114, 9734, 'Sylvan Scrying', '4'),
 (18244, 313114, 2427, 'Dismember', '2'),
 (18244, 313114, 1549, 'Chromatic Sphere', '4'),
 (18244, 313114, 1550, 'Chromatic Star', '4'),
 (18244, 313114, 3047, 'Expedition Map', '4'),
 (18244, 313114, 6727, 'Oblivion Stone', '4'),
 (18244, 313114, 3453, 'Forest', '5'),
 (18244, 313114, 3713, 'Ghost Quarter', '1'),
 (18244, 313114, 8187, 'Sanctum of Ugin', '1'),
 (18244, 313114, 10518, "Urza's Mine", '4'),
 (18244, 313114, 10519, "Urza's Power Plant", '4'),
 (18244, 313114, 10520, "Urza's Tower", '4')]

In [177]:
user_card_counts[0][-1]

'4'

In [178]:
card_count_dict = {card[2]: card[-1] for card in user_card_counts}

In [179]:
card_count_dict

{278: '4',
 1549: '4',
 1550: '4',
 2427: '2',
 3047: '4',
 3453: '5',
 3713: '1',
 5111: '4',
 6727: '4',
 8187: '1',
 9734: '4',
 10392: '2',
 10399: '2',
 10518: '4',
 10519: '4',
 10520: '4',
 10884: '4',
 11225: '1',
 11262: '2'}

In [181]:
modern_cards = ReflexiveDict()
modern_cards.get_cards()

True

In [182]:
cardstorm_ids = [key for key in modern_cards.keys() if isinstance(key, int)]

In [184]:
len(cardstorm_ids)

11348

In [190]:
modern_cards[11348]

'zur the enchanter'

In [196]:
deck_vector = []
for cardstorm_id in sorted(cardstorm_ids):
    if cardstorm_id in card_count_dict:
        print('{}: {}'.format(cardstorm_id, card_count_dict[cardstorm_id]))
        deck_vector.append(card_count_dict[cardstorm_id])
    else:
        deck_vector.append(0)

278: 4
1549: 4
1550: 4
2427: 2
3047: 4
3453: 5
3713: 1
5111: 4
6727: 4
8187: 1
9734: 4
10392: 2
10399: 2
10518: 4
10519: 4
10520: 4
10884: 4
11225: 1
11262: 2


In [201]:
len(deck_vector)

11348

In [217]:
new_df.shape

(1443, 10)

In [227]:
query = '''SELECT cardstorm_id FROM cards WHERE cardstorm_id NOT IN (SELECT DISTINCT cardstorm_id FROM decks)'''

cursor.execute(query)

In [228]:
_ = cursor.fetchall()

In [229]:
unused_ids = [x[0] for x in _]

In [232]:
len(unused_ids)

9905

In [233]:
unused_cards = []
for unused_id in unused_ids:
    unused_cards.append((-1, -1, unused_id, modern_cards[unused_id], 1))

In [236]:
from deck_scraping import upload_user_card_counts

In [241]:
template

'%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,

In [256]:
template = ', '.join(['%s'] * len(unused_cards))
query = 'INSERT INTO decks (event_id, deck_id, cardstorm_id, card_name, card_count) VALUES {}'.format(template)

try:
    cursor.execute(query=query, vars=unused_cards)

except psycopg2.IntegrityError:
    print('            duplicate key: deck not added to db')


In [257]:
conn.commit()

In [234]:
unused_cards

[(-1, -1, 1, 'abandoned sarcophagus', 1),
 (-1, -1, 2, 'abandon reason', 1),
 (-1, -1, 3, 'abattoir ghoul', 1),
 (-1, -1, 4, 'abbey griffin', 1),
 (-1, -1, 6, 'aberrant researcher', 1),
 (-1, -1, 7, 'abhorrent overlord', 1),
 (-1, -1, 8, 'abomination of gudul', 1),
 (-1, -1, 11, 'absolver thrull', 1),
 (-1, -1, 12, 'absorb vis', 1),
 (-1, -1, 13, 'abstruse interference', 1),
 (-1, -1, 14, 'abuna acolyte', 1),
 (-1, -1, 15, "abuna's chant", 1),
 (-1, -1, 16, 'abundance', 1),
 (-1, -1, 18, 'abundant maw', 1),
 (-1, -1, 19, 'abyssal nocturnus', 1),
 (-1, -1, 20, 'abyssal persecutor', 1),
 (-1, -1, 21, 'abyssal specter', 1),
 (-1, -1, 22, 'abzan advantage', 1),
 (-1, -1, 23, 'abzan ascendancy', 1),
 (-1, -1, 24, 'abzan banner', 1),
 (-1, -1, 25, 'abzan battle priest', 1),
 (-1, -1, 26, 'abzan beastmaster', 1),
 (-1, -1, 29, 'abzan guide', 1),
 (-1, -1, 30, 'abzan kin-guard', 1),
 (-1, -1, 31, 'abzan runemark', 1),
 (-1, -1, 32, 'abzan skycaptain', 1),
 (-1, -1, 177, 'akki underling', 1),
 

In [None]:
template = 