# Import Dependencies and prepare text and non text dataframes

In [77]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.metrics import confusion_matrix

In [78]:
creatures = pd.read_csv('creatures_cleaned.csv', index_col=[0])
noncreatures = pd.read_csv('noncreatures_cleaned.csv',index_col=[0])
creatures = creatures.reset_index(drop=True)
noncreatures = noncreatures.reset_index(drop=True)

In [79]:
for index, row in creatures.iterrows():
    prices = row['prices']
    bucket = 0
    if prices <= 5:
        bucket = 0
    elif prices <= 10:
        bucket =1
    elif prices <=20:
        bucket = 2
    elif prices <= 50:
        bucket = 3
    else:
        bucket = 4
    creatures.loc[index, 'price_bucket'] = bucket
creatures

Unnamed: 0,name,mana_cost,cmc,oracle_text,power,toughness,prices,set,rarity,price_bucket
0,Storm Crow,{1}{U},2.0,Flying (This creature can't be blocked except ...,1.0,2.0,0.20,9ed,0,0.0
1,Walking Sponge,{1}{U},2.0,{T}: Target creature loses your choice of flyi...,1.0,1.0,0.15,ulg,1,0.0
2,Pteramander,{U},1.0,Flying {7}{U}: Adapt 4. This ability costs {1}...,1.0,1.0,0.15,rna,1,0.0
3,Nantuko Elder,{2}{G},3.0,{T}: Add {C}{G}.,1.0,2.0,0.56,ody,1,0.0
4,Vedalken Heretic,{G}{U},2.0,Whenever This Card deals damage to an opponent...,1.0,1.0,0.26,arb,2,0.0
...,...,...,...,...,...,...,...,...,...,...
12152,Borderland Ranger,{2}{G},3.0,When This Card enters the battlefield you may...,2.0,2.0,0.05,avr,0,0.0
12153,Quarry Beetle,{4}{G},5.0,When This Card enters the battlefield you may...,4.0,5.0,0.06,hou,1,0.0
12154,Devoted Hero,{W},1.0,_,1.0,2.0,0.18,s99,0,0.0
12155,Firesong and Sunspeaker,{4}{R}{W},6.0,Red instant and sorcery spells you control hav...,4.0,6.0,0.30,2x2,2,0.0


In [80]:
for index, row in noncreatures.iterrows():
    prices = row['prices']
    bucket = 0
    if prices <= 5:
        bucket = 0
    elif prices <= 10:
        bucket =1
    elif prices <=20:
        bucket = 2
    elif prices <= 50:
        bucket = 3
    else:
        bucket = 4
    noncreatures.loc[index, 'price_bucket'] = bucket
noncreatures

Unnamed: 0,name,mana_cost,cmc,oracle_text,prices,set,rarity,price_bucket
0,Static Orb,{3},3.0,As long as This Card is untapped players can'...,20.58,7ed,2,3.0
1,Sensory Deprivation,{U},1.0,Enchant creature Enchanted creature gets -3/-0.,0.04,m14,0,0.0
2,Road of Return,{G}{G},2.0,Choose one — • Return target permanent card fr...,0.24,c19,2,0.0
3,Ravnica at War,{3}{W},4.0,Exile all multicolored permanents.,0.23,war,2,0.0
4,Torrent of Fire,{3}{R}{R},5.0,This Card deals damage to any target equal to ...,0.12,scg,0,0.0
...,...,...,...,...,...,...,...,...
10329,Curse of Thirst,{4}{B},5.0,Enchant player At the beginning of enchanted p...,0.30,dka,1,0.0
10330,Temporary Truce,{1}{W},2.0,Each player may draw up to two cards. For each...,11.45,por,2,2.0
10331,Clearwater Goblet,{5},5.0,Sunburst (This enters the battlefield with a c...,0.44,5dn,2,0.0
10332,Without Weakness,{1}{B},2.0,Target creature you control gains indestructib...,0.04,hou,0,0.0


# Combined Text Processing

In [81]:
for index, row in creatures.iterrows():
    mana_cost = row['mana_cost']
    cmc = row['cmc']
    power = row['power']
    toughness = row['toughness']
    rarity = row['rarity']
    completed_string = f"The mana cost of this card is {mana_cost}. The converted mana cost of this card is {cmc}. The power of this creature is {power}. The toughness of this card is {toughness}. The rarity of this card is {rarity}. The oracle text is as follows: {row['oracle_text']}."
    creatures.loc[index, 'compiled_text'] = completed_string
creatures

Unnamed: 0,name,mana_cost,cmc,oracle_text,power,toughness,prices,set,rarity,price_bucket,compiled_text
0,Storm Crow,{1}{U},2.0,Flying (This creature can't be blocked except ...,1.0,2.0,0.20,9ed,0,0.0,The mana cost of this card is {1}{U}. The conv...
1,Walking Sponge,{1}{U},2.0,{T}: Target creature loses your choice of flyi...,1.0,1.0,0.15,ulg,1,0.0,The mana cost of this card is {1}{U}. The conv...
2,Pteramander,{U},1.0,Flying {7}{U}: Adapt 4. This ability costs {1}...,1.0,1.0,0.15,rna,1,0.0,The mana cost of this card is {U}. The convert...
3,Nantuko Elder,{2}{G},3.0,{T}: Add {C}{G}.,1.0,2.0,0.56,ody,1,0.0,The mana cost of this card is {2}{G}. The conv...
4,Vedalken Heretic,{G}{U},2.0,Whenever This Card deals damage to an opponent...,1.0,1.0,0.26,arb,2,0.0,The mana cost of this card is {G}{U}. The conv...
...,...,...,...,...,...,...,...,...,...,...,...
12152,Borderland Ranger,{2}{G},3.0,When This Card enters the battlefield you may...,2.0,2.0,0.05,avr,0,0.0,The mana cost of this card is {2}{G}. The conv...
12153,Quarry Beetle,{4}{G},5.0,When This Card enters the battlefield you may...,4.0,5.0,0.06,hou,1,0.0,The mana cost of this card is {4}{G}. The conv...
12154,Devoted Hero,{W},1.0,_,1.0,2.0,0.18,s99,0,0.0,The mana cost of this card is {W}. The convert...
12155,Firesong and Sunspeaker,{4}{R}{W},6.0,Red instant and sorcery spells you control hav...,4.0,6.0,0.30,2x2,2,0.0,The mana cost of this card is {4}{R}{W}. The c...


In [82]:
for index, row in noncreatures.iterrows():
    mana_cost = row['mana_cost']
    cmc = row['cmc']
    rarity = row['rarity']
    completed_string = f"The mana cost of this card is {mana_cost}. The converted mana cost of this card is {cmc}. The rarity of this card is {rarity}. The oracle text is as follows: {row['oracle_text']}."
    noncreatures.loc[index, 'compiled_text'] = completed_string
noncreatures

Unnamed: 0,name,mana_cost,cmc,oracle_text,prices,set,rarity,price_bucket,compiled_text
0,Static Orb,{3},3.0,As long as This Card is untapped players can'...,20.58,7ed,2,3.0,The mana cost of this card is {3}. The convert...
1,Sensory Deprivation,{U},1.0,Enchant creature Enchanted creature gets -3/-0.,0.04,m14,0,0.0,The mana cost of this card is {U}. The convert...
2,Road of Return,{G}{G},2.0,Choose one — • Return target permanent card fr...,0.24,c19,2,0.0,The mana cost of this card is {G}{G}. The conv...
3,Ravnica at War,{3}{W},4.0,Exile all multicolored permanents.,0.23,war,2,0.0,The mana cost of this card is {3}{W}. The conv...
4,Torrent of Fire,{3}{R}{R},5.0,This Card deals damage to any target equal to ...,0.12,scg,0,0.0,The mana cost of this card is {3}{R}{R}. The c...
...,...,...,...,...,...,...,...,...,...
10329,Curse of Thirst,{4}{B},5.0,Enchant player At the beginning of enchanted p...,0.30,dka,1,0.0,The mana cost of this card is {4}{B}. The conv...
10330,Temporary Truce,{1}{W},2.0,Each player may draw up to two cards. For each...,11.45,por,2,2.0,The mana cost of this card is {1}{W}. The conv...
10331,Clearwater Goblet,{5},5.0,Sunburst (This enters the battlefield with a c...,0.44,5dn,2,0.0,The mana cost of this card is {5}. The convert...
10332,Without Weakness,{1}{B},2.0,Target creature you control gains indestructib...,0.04,hou,0,0.0,The mana cost of this card is {1}{B}. The conv...


In [83]:
creatures = creatures.astype({'price_bucket': 'int64', 'power': 'int64', 'toughness': 'int64', 'cmc':'int64'})
noncreatures = noncreatures.astype({'price_bucket': 'int64','cmc':'int64'})

# Importing a Validation Dataset

In [84]:
bc_df = pd.read_csv('c2.csv', index_col=[0])
bc_df = bc_df.reset_index(drop=True)

In [85]:
for index, row in bc_df.iterrows():
    price = row['prices']
    bucket = 0
    if price <=5:
        bucket =0
    elif price <=10:
        bucket= 1
    elif price <=20:
        bucket = 2
    elif price <= 50:
        bucket =3
    else:
        bucket =4
    bc_df.loc[index, 'price_bucket'] = bucket
bc_df

Unnamed: 0,name,mana_cost,cmc,oracle_text,power,toughness,prices,set,rarity,price_bucket
0,Simian Simulacrum,{3},3.0,When This Card enters the battlefield put two...,2.0,1.0,0.29,bro,2,0.0
1,Sardian Cliffstomper,{1}{R},2.0,As long as it's your turn and you control four...,0.0,4.0,2.68,bro,1,0.0
2,"Liberator, Urza's Battlethopter",{3},3.0,Flash Flying You may cast colorless spells and...,1.0,2.0,3.77,bro,2,0.0
3,Skystrike Officer,{2}{U},3.0,Flying Whenever This Card attacks create a 1/...,2.0,3.0,0.79,bro,2,0.0
4,Bladecoil Serpent,{X}{6},6.0,When This Card enters the battlefield for eac...,5.0,4.0,1.57,bro,3,0.0
...,...,...,...,...,...,...,...,...,...,...
194,"Gix, Yawgmoth Praetor",{1}{B}{B},3.0,Whenever a creature deals combat damage to one...,3.0,3.0,22.27,bro,3,3.0
195,Clay Revenant,{1},1.0,This Card enters the battlefield tapped. {2}{B...,1.0,2.0,0.03,bro,0,0.0
196,Thraxodemon,{1}{B},2.0,{3} {T} Sacrifice another creature or artifa...,2.0,2.0,0.05,bro,0,0.0
197,"Mishra, Tamer of Mak Fawa",{3}{B}{R},5.0,"Permanents you control have ""Ward—Sacrifice a ...",4.0,4.0,0.84,bro,2,0.0


In [86]:
for index, row in bc_df.iterrows():
    mana_cost = row['mana_cost']
    cmc = row['cmc']
    power = row['power']
    toughness = row['toughness']
    rarity = row['rarity']
    completed_string = f"The mana cost of this card is {mana_cost}. The converted mana cost of this card is {cmc}. The power of this creature is {power}. The toughness of this card is {toughness}. The rarity of this card is {rarity}. The oracle text is as follows: {row['oracle_text']}."
    bc_df.loc[index, 'compiled_text'] = completed_string
bc_df

Unnamed: 0,name,mana_cost,cmc,oracle_text,power,toughness,prices,set,rarity,price_bucket,compiled_text
0,Simian Simulacrum,{3},3.0,When This Card enters the battlefield put two...,2.0,1.0,0.29,bro,2,0.0,The mana cost of this card is {3}. The convert...
1,Sardian Cliffstomper,{1}{R},2.0,As long as it's your turn and you control four...,0.0,4.0,2.68,bro,1,0.0,The mana cost of this card is {1}{R}. The conv...
2,"Liberator, Urza's Battlethopter",{3},3.0,Flash Flying You may cast colorless spells and...,1.0,2.0,3.77,bro,2,0.0,The mana cost of this card is {3}. The convert...
3,Skystrike Officer,{2}{U},3.0,Flying Whenever This Card attacks create a 1/...,2.0,3.0,0.79,bro,2,0.0,The mana cost of this card is {2}{U}. The conv...
4,Bladecoil Serpent,{X}{6},6.0,When This Card enters the battlefield for eac...,5.0,4.0,1.57,bro,3,0.0,The mana cost of this card is {X}{6}. The conv...
...,...,...,...,...,...,...,...,...,...,...,...
194,"Gix, Yawgmoth Praetor",{1}{B}{B},3.0,Whenever a creature deals combat damage to one...,3.0,3.0,22.27,bro,3,3.0,The mana cost of this card is {1}{B}{B}. The c...
195,Clay Revenant,{1},1.0,This Card enters the battlefield tapped. {2}{B...,1.0,2.0,0.03,bro,0,0.0,The mana cost of this card is {1}. The convert...
196,Thraxodemon,{1}{B},2.0,{3} {T} Sacrifice another creature or artifa...,2.0,2.0,0.05,bro,0,0.0,The mana cost of this card is {1}{B}. The conv...
197,"Mishra, Tamer of Mak Fawa",{3}{B}{R},5.0,"Permanents you control have ""Ward—Sacrifice a ...",4.0,4.0,0.84,bro,2,0.0,The mana cost of this card is {3}{B}{R}. The c...


In [87]:
bc_df = bc_df.astype({'price_bucket': 'int64', 'power': 'int64', 'toughness': 'int64', 'cmc':'int64'})

In [88]:
bn_df = pd.read_csv('nc2.csv', index_col=[0])

In [89]:
for index, row in bn_df.iterrows():
    price = row['prices']
    bucket = 0
    if price <=5:
        bucket =0
    elif price <=10:
        bucket= 1
    elif price <=20:
        bucket = 2
    elif price <= 50:
        bucket =3
    else:
        bucket =4
    bn_df.loc[index, 'price_bucket'] = bucket
bn_df

Unnamed: 0,name,mana_cost,cmc,oracle_text,prices,set,rarity,price_bucket
0,Wasteful Harvest,{2}{G},3.0,Mill five cards. You may put a permanent card ...,0.10,bro,0,0.0
1,Calamity's Wake,{1}{W},2.0,Exile all graveyards. Players can't cast noncr...,0.34,bro,1,0.0
2,Over the Top,{5}{R}{R},7.0,Each player reveals a number of cards from the...,0.17,bro,2,0.0
3,The Mightstone and Weakstone,{5},5.0,When This Card enters the battlefield choose ...,4.67,bro,2,0.0
4,Repair and Recharge,{3}{W}{W},5.0,Return target artifact enchantment or planes...,0.11,bro,1,0.0
...,...,...,...,...,...,...,...,...
178,Aeronaut's Wings,{2},2.0,Equipped creature gets +1/+0 and has flying. E...,0.05,bro,0,0.0
179,Curate,{1}{U},2.0,Surveil 2. (Look at the top two cards of your ...,0.09,bro,0,0.0
180,Island,_,0.0,({T}: Add {U}.),0.30,bro,0,0.0
181,"Teferi, Temporal Pilgrim",{3}{U}{U},5.0,Whenever you draw a card put a loyalty counte...,13.69,bro,3,2.0


In [90]:
for index, row in bn_df.iterrows():
    mana_cost = row['mana_cost']
    cmc = row['cmc']
    rarity = row['rarity']
    completed_string = f"The mana cost of this card is {mana_cost}. The converted mana cost of this card is {cmc}. The rarity of this card is {rarity}. The oracle text is as follows: {row['oracle_text']}."
    bn_df.loc[index, 'compiled_text'] = completed_string
bn_df

Unnamed: 0,name,mana_cost,cmc,oracle_text,prices,set,rarity,price_bucket,compiled_text
0,Wasteful Harvest,{2}{G},3.0,Mill five cards. You may put a permanent card ...,0.10,bro,0,0.0,The mana cost of this card is {2}{G}. The conv...
1,Calamity's Wake,{1}{W},2.0,Exile all graveyards. Players can't cast noncr...,0.34,bro,1,0.0,The mana cost of this card is {1}{W}. The conv...
2,Over the Top,{5}{R}{R},7.0,Each player reveals a number of cards from the...,0.17,bro,2,0.0,The mana cost of this card is {5}{R}{R}. The c...
3,The Mightstone and Weakstone,{5},5.0,When This Card enters the battlefield choose ...,4.67,bro,2,0.0,The mana cost of this card is {5}. The convert...
4,Repair and Recharge,{3}{W}{W},5.0,Return target artifact enchantment or planes...,0.11,bro,1,0.0,The mana cost of this card is {3}{W}{W}. The c...
...,...,...,...,...,...,...,...,...,...
178,Aeronaut's Wings,{2},2.0,Equipped creature gets +1/+0 and has flying. E...,0.05,bro,0,0.0,The mana cost of this card is {2}. The convert...
179,Curate,{1}{U},2.0,Surveil 2. (Look at the top two cards of your ...,0.09,bro,0,0.0,The mana cost of this card is {1}{U}. The conv...
180,Island,_,0.0,({T}: Add {U}.),0.30,bro,0,0.0,The mana cost of this card is _. The converted...
181,"Teferi, Temporal Pilgrim",{3}{U}{U},5.0,Whenever you draw a card put a loyalty counte...,13.69,bro,3,2.0,The mana cost of this card is {3}{U}{U}. The c...


In [91]:
bn_df = bn_df.astype({'price_bucket': 'int64', 'cmc':'int64'})

# Processing the compiled text version

Creatures

In [146]:
X = creatures['compiled_text'].values
y = creatures['price_bucket'].values
v = CountVectorizer()

In [147]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
X_train_count = v.fit_transform(X_train)
X_train_count = X_train_count.toarray()

In [148]:
X_test_count = v.transform(X_test)
X_test_count = X_test_count.toarray()

NonCreatures

In [149]:
nX = noncreatures['compiled_text'].values
ny = noncreatures['price_bucket'].values
nv = CountVectorizer()
nX2 = nv.fit_transform(nX)

In [150]:
nX_train, nX_test, ny_train, ny_test = train_test_split(nX2, ny, random_state=42)

In [151]:
nX_train_count = nX_train.toarray()
nX_test_count = nX_test.toarray()

# Attempting a Gaussian Naive Bayes

Creatures

In [152]:
gmodel = GaussianNB()

In [153]:
gmodel.fit(X_train_count,y_train)

In [154]:
gmodel.score(X_test_count, y_test)

0.31776315789473686

NonCreatures

In [155]:
ngmodel = GaussianNB()

In [156]:
ngmodel.fit(nX_train_count, ny_train)

In [157]:
ngmodel.score(nX_test_count, ny_test)

0.293343653250774

# Attempting Multinomial Naive Bayes 

In [158]:
mmodel = MultinomialNB()

In [159]:
mmodel.fit(X_train_count,y_train)

In [160]:
mmodel.score(X_test_count, y_test)

0.9421052631578948

In [161]:
nmmodel = MultinomialNB()

In [162]:
nmmodel.fit(nX_train_count, ny_train)

In [163]:
nmmodel.score(nX_test_count, ny_test)

0.8839009287925697

# Validation

In [164]:
bX = bc_df['compiled_text'].values
by = bc_df['price_bucket'].values

In [165]:
bXcount = v.transform(bX)
bXcount = bXcount.toarray()

In [166]:
predictions = mmodel.predict(bXcount)

In [167]:
confusion_matrix(by, predictions)

array([[186,   0,   0,   0],
       [  4,   0,   0,   0],
       [  4,   0,   0,   0],
       [  3,   1,   1,   0]], dtype=int64)

In [168]:
bnX = bn_df['compiled_text'].values
bny = bn_df['price_bucket'].values

In [169]:
bnXcount = nv.transform(bnX)
bnXcount = bnXcount.toarray()

In [170]:
npredictions = nmmodel.predict(bnXcount)

In [173]:
confusion_matrix(bny, npredictions)

array([[169,   2,   0,   0],
       [  5,   0,   0,   0],
       [  3,   0,   0,   0],
       [  2,   0,   2,   0]], dtype=int64)

In [176]:
creatures['price_bucket'].value_counts()

0    11633
1      259
2      162
3       69
4       34
Name: price_bucket, dtype: int64