In [1]:
# import packages
import pandas as pd

# pip install mtgsdk
from mtgsdk import Card

In [25]:
# Import the previously scraped data, and create a master card list for adding additional data to and referencing later

# Maindeck import
maindeck_filepath = "data/maindeck_cards.csv"
maindeck_df = pd.read_csv(maindeck_filepath)

# Sideboard import
sideboard_filepath = "data/sideboard_cards.csv"
sideboard_df = pd.read_csv(sideboard_filepath)

# Concatenate only the 'card name' column from maindeck_df and sideboard_df
all_cards_df = pd.concat([maindeck_df[['card name']], sideboard_df[['card name']]], ignore_index=True)

# Keep only unique rows based on 'card name'
all_cards_df = all_cards_df.drop_duplicates(ignore_index=True)

# Add a unique id for each card
all_cards_df['card id'] = range(1, len(all_cards_df) + 1)

# Rearrange dataframe
all_cards_df = all_cards_df[['card id', 'card name']]

# Export to CSV
all_cards_df.to_csv("data/all_cards.csv", index=False)

all_cards_df


Unnamed: 0,card id,card name
0,1,Birds of Paradise
1,2,Brainstorm
2,3,Brazen Borrower
3,4,Collector Ouphe
4,5,Crashing Footfalls
...,...,...
843,844,Radiant Flames
844,845,Remorseful Cleric
845,846,Rending Volley
846,847,"Saheeli, Sublime Artificer"


In [31]:
# testing api

# cardname = all_cards_df['card name'][0]
cardname = "Snapcaster Mage"

try:
    cards = Card.where(name = cardname).all()
    exact_match = next((card for card in cards if card.name == cardname), cards[0])
    print(exact_match.name)
    print(exact_match.colors)
    print(int(exact_match.cmc))
    print(exact_match.type)
except IndexError:
    print(f"card not found - index error")
except Exception as e:
    print(f"An error occurred for card '{cardname}': {e}")

Snapcaster Mage
['U']
2
Creature — Human Wizard


In [29]:
# WARNING ------------------------
# Executing this cell will take 10+ minutes to fully collect the data.
# To see the final output, see the subsequent csv in data/card_info.csv
# Note that cards missing from the API are from the newest set. These will be manually cleaned as its a small subset.

card_info = []
for index, row in all_cards_df.iterrows():
    cardname = row['card name']
    card_id = row['card id']
    try:
        cards = Card.where(name = cardname).all()
        exact_match = next((card for card in cards if card.name == cardname), cards[0])
        card_info.append({'card id': card_id, 'card name': cardname, 'colors': exact_match.colors, 'cmc': int(exact_match.cmc), 'type': exact_match.type})
        print(f"{row['card name']} information found. Next card.")
    except:
        card_info.append({'card id': card_id,'card name': cardname, 'colors': 'card not found', 'cmc': 'card not found', 'type': 'card not found'})
        print("------------")
        print(f"!!! {cardname} not found!")
        print("------------")


Birds of Paradise information found. Next card.
Brainstorm information found. Next card.
Brazen Borrower information found. Next card.
Collector Ouphe information found. Next card.
Crashing Footfalls information found. Next card.
Daze information found. Next card.
Delver of Secrets information found. Next card.
Dragon's Rage Channeler information found. Next card.
Drown in the Loch information found. Next card.
Elvish Spirit Guide information found. Next card.
Ezuri, Claw of Progress information found. Next card.
Fatal Push information found. Next card.
Fire // Ice information found. Next card.
Flooded Strand information found. Next card.
Force of Negation information found. Next card.
Force of Will information found. Next card.
Forest information found. Next card.
Grief information found. Next card.
Griselbrand information found. Next card.
Ice-Fang Coatl information found. Next card.
Island information found. Next card.
Lightning Bolt information found. Next card.
Lórien Revealed inf

In [33]:
# Show the additional data in a dataframe
card_info_df = pd.DataFrame(card_info)

# Store as a csv
card_info_df.to_csv("data/card_info.csv", index=False)

card_info_df

Unnamed: 0,card id,card name,colors,cmc,type
0,1,Birds of Paradise,[G],1,Creature — Bird
1,2,Brainstorm,[U],1,Instant
2,3,Brazen Borrower,[U],3,Creature — Faerie Rogue
3,4,Collector Ouphe,[G],2,Creature — Ouphe
4,5,Crashing Footfalls,[G],0,Sorcery
...,...,...,...,...,...
843,844,Radiant Flames,[R],3,Sorcery
844,845,Remorseful Cleric,[W],2,Creature — Spirit Cleric
845,846,Rending Volley,[R],1,Instant
846,847,"Saheeli, Sublime Artificer","[R, U]",3,Legendary Planeswalker — Saheeli


In [45]:
# Cards missing from the API are from the newest set. Manually cleaned up the 41 missing records for completeness. 
# Re-import the completed csv
# Also has the benefit of not re-running the API call process

file = "data/card_info_complete.csv"
card_info_complete_df = pd.read_csv(file)

card_info_complete_df

Unnamed: 0,card id,card name,colors,cmc,type
0,1,Birds of Paradise,['G'],1,Creature — Bird
1,2,Brainstorm,['U'],1,Instant
2,3,Brazen Borrower,['U'],3,Creature — Faerie Rogue
3,4,Collector Ouphe,['G'],2,Creature — Ouphe
4,5,Crashing Footfalls,['G'],0,Sorcery
...,...,...,...,...,...
843,844,Radiant Flames,['R'],3,Sorcery
844,845,Remorseful Cleric,['W'],2,Creature — Spirit Cleric
845,846,Rending Volley,['R'],1,Instant
846,847,"Saheeli, Sublime Artificer","['R', 'U']",3,Legendary Planeswalker — Saheeli


In [46]:

# Split the type into type and subtype, and store the results as a list for each for parsing for graphs later.
# Convert 'type' column to string
card_info_complete_df['type'] = card_info_complete_df['type'].astype(str)

# Create new 'type' and 'subtype' columns
split_df = card_info_complete_df['type'].apply(lambda x: pd.Series(x.split(' — ') if ' — ' in x else [x, None]))

# Rename the resulting columns
split_df.columns = ['type', 'subtype']

# Drop the original 'type' column
card_info_complete_df.drop('type', axis=1, inplace=True)

# Concatenate the new columns with the original DataFrame
card_info_complete_df = pd.concat([card_info_complete_df, split_df], axis=1)

# Covert new columns to lists
card_info_complete_df['type'] = card_info_complete_df['type'].apply(lambda x: [f"'{item}'" for item in x.split()] if x else None)
card_info_complete_df['subtype'] = card_info_complete_df['subtype'].apply(lambda x: [f"'{item}'" for item in x.split()] if x else None)

# Not sure if this is needed yet for later SQL - revisit
# Function to convert "None" to an empty list
# def transform_value(value):
#     return [] if value == 'None' or value == None else value

# card_info_complete_df = card_info_complete_df.applymap(transform_value)

card_info_complete_df

Unnamed: 0,card id,card name,colors,cmc,type,subtype
0,1,Birds of Paradise,['G'],1,['Creature'],['Bird']
1,2,Brainstorm,['U'],1,['Instant'],
2,3,Brazen Borrower,['U'],3,['Creature'],"['Faerie', 'Rogue']"
3,4,Collector Ouphe,['G'],2,['Creature'],['Ouphe']
4,5,Crashing Footfalls,['G'],0,['Sorcery'],
...,...,...,...,...,...,...
843,844,Radiant Flames,['R'],3,['Sorcery'],
844,845,Remorseful Cleric,['W'],2,['Creature'],"['Spirit', 'Cleric']"
845,846,Rending Volley,['R'],1,['Instant'],
846,847,"Saheeli, Sublime Artificer","['R', 'U']",3,"['Legendary', 'Planeswalker']",['Saheeli']


#### Include Pricing Information for Each card
* To capture finance data, I collected a set of text files that have daily pricing information for all of January for every card from https://www.goatbots.com/download-prices. 
* I wrote a script (\scripts\combine_pricing_data_files_script.py) to combine each text file and include its date as part of a master JSON file
* There are typically many variations per card. To simplify, I'll capture the lowest price of all versions to associate with the tournament decks

In [74]:
# Import the definition file as a dataframe, 
card_definition_file = "data/prices/definitions/card-definitions.json"
card_definitions_df = pd.read_json(card_definition_file, orient='records').T
card_definitions_df = card_definitions_df.rename_axis('MTGO_id').reset_index()


In [75]:
# Do some naming cleanup to match other files 
card_definitions_df['name'] = card_definitions_df['name'].str.replace('"Name Sticker"', '_____')
card_definitions_df['name'] = card_definitions_df['name'].str.replace('/', ' // ')

In [76]:
# Filter to only card names that are in tournament lists (removes 70,000 rows!)
filtered_card_definitions_df = card_definitions_df[card_definitions_df['name'].isin(card_info_complete_df['card name'])]

filtered_card_definitions_df

Unnamed: 0,MTGO_id,name,cardset,rarity,foil
1,121150,Watery Grave,RVR,Rare,1
2,121149,Watery Grave,RVR,Rare,0
3,121148,Temple Garden,RVR,Rare,1
4,121147,Temple Garden,RVR,Rare,0
5,121146,Stomping Ground,RVR,Rare,1
...,...,...,...,...,...
78526,235,Swamp,PRM,Common,0
78527,234,Plains,PRM,Common,1
78528,233,Plains,PRM,Common,0
78529,232,Plains,PRM,Common,1


In [73]:
# Import pricing information for a single day (most recent at the time data was captured, Jan 25th)
# We are doing this because we want to find the cheapest version of each tournament played card for tracking over time
# This will then let us tie the pricing ID to our card ID
sample_pricing_file = "data/prices/price-history-2024-01-25.json"
sample_pricing_df = pd.read_json(sample_pricing_file, orient='index')
sample_pricing_df = sample_pricing_df.rename_axis('MTGO_id').reset_index()
sample_pricing_df.columns = ['MTGO_id', 'price']
sample_pricing_df

Unnamed: 0,MTGO_id,price
0,121151,2.240
1,121150,0.490
2,121149,0.240
3,121148,0.700
4,121147,1.080
...,...,...
78568,19,0.007
78569,18,0.070
78570,14,0.007
78571,13,0.200


In [79]:
# Merge the dataframes to get prices for selected card.
cards_merged_df = pd.merge(filtered_card_definitions_df, sample_pricing_df, on='MTGO_id', how='inner')

# Filter for the lowest price for each unique card name
min_price_indices = cards_merged_df.groupby('name')['price'].idxmin()
filtered_merged_df = cards_merged_df.loc[min_price_indices]
filtered_merged_df = filtered_merged_df.rename(columns={'name': 'card name'})

filtered_merged_df

Unnamed: 0,MTGO_id,card name,cardset,rarity,foil,price
224,118074,Abrade,LCI,Common,0,0.002
5193,46497,Abrupt Decay,RTR,Rare,0,0.060
4136,59642,Accumulated Knowledge,PRM,Common,1,0.002
1847,91676,Acererak the Archlich,AFR,Mythic,0,0.180
277,117530,"Aclazotz, Deepest Betrayal",LCI,Mythic,0,1.760
...,...,...,...,...,...,...
2838,80527,Zagoth Triome,IKO,Rare,0,0.750
1466,98741,Ziatora's Proving Ground,SNC,Rare,0,1.730
4269,58401,Zulaport Cutthroat,BFZ,Uncommon,0,0.004
414,115722,_____ Goblin,UNF,Common,0,4.810


In [81]:
# Now we can finally relate our original unique card ids to the MTGO_id from the pricing sheets for our DB.
card_info_master_df = pd.merge(card_info_complete_df,filtered_merged_df, on='card name', how='inner')
card_info_master_df = card_info_master_df.rename(columns={'price': 'latest price'})

card_info_master_df

Unnamed: 0,card id,card name,colors,cmc,type,subtype,MTGO_id,cardset,rarity,foil,latest price
0,1,Birds of Paradise,['G'],1,['Creature'],['Bird'],41607,M12,Rare,0,0.006
1,2,Brainstorm,['U'],1,['Instant'],,53117,VMA,Common,0,0.100
2,3,Brazen Borrower,['U'],3,['Creature'],"['Faerie', 'Rogue']",78180,ELD,Mythic,0,2.670
3,4,Collector Ouphe,['G'],2,['Creature'],['Ouphe'],72688,MH1,Rare,0,1.110
4,5,Crashing Footfalls,['G'],0,['Sorcery'],,72692,MH1,Rare,0,3.780
...,...,...,...,...,...,...,...,...,...,...,...
843,844,Radiant Flames,['R'],3,['Sorcery'],,58741,BFZ,Rare,0,0.006
844,845,Remorseful Cleric,['W'],2,['Creature'],"['Spirit', 'Cleric']",68227,M19,Rare,0,0.008
845,846,Rending Volley,['R'],1,['Instant'],,56265,DTK,Uncommon,1,0.360
846,847,"Saheeli, Sublime Artificer","['R', 'U']",3,"['Legendary', 'Planeswalker']",['Saheeli'],72074,WAR,Uncommon,0,0.003


In [None]:
card_info_master_df.to_csv("data/sideboard_cards.csv", index=False)