# Magic: The Gathering Market Analysis 
* Physical Cards, English Only, Retail Pricing

In [1]:
import pandas as pd

In [2]:
dfm = pd.read_csv('../data/dataMagic/cardsMagic.csv') # 8/26/25
# see all columns
pd.set_option('display.max_columns', None)

  dfm = pd.read_csv('../data/dataMagic/cardsMagic.csv') # 8/26/25


In [3]:
dfm.shape

(105961, 78)

* There are a ton of missing values, but that makes sense because some card types have no integer values (e.g. sorceries don't have creature power).
* Determining columns we want to keep in this initial data frame  

In [4]:
# For a full ist of collumns (most of these probably can be dropped for our purposes)
dfm.columns

Index(['artist', 'artistIds', 'asciiName', 'attractionLights', 'availability',
       'boosterTypes', 'borderColor', 'cardParts', 'colorIdentity',
       'colorIndicator', 'colors', 'defense', 'duelDeck', 'edhrecRank',
       'edhrecSaltiness', 'faceConvertedManaCost', 'faceFlavorName',
       'faceManaValue', 'faceName', 'finishes', 'flavorName', 'flavorText',
       'frameEffects', 'frameVersion', 'hand', 'hasAlternativeDeckLimit',
       'isFullArt', 'isFunny', 'isGameChanger', 'isOnlineOnly', 'isOversized',
       'isPromo', 'isRebalanced', 'isReprint', 'isReserved', 'isStarter',
       'isStorySpotlight', 'isTextless', 'isTimeshifted', 'keywords',
       'language', 'layout', 'leadershipSkills', 'life', 'loyalty', 'manaCost',
       'manaValue', 'name', 'number', 'originalPrintings',
       'originalReleaseDate', 'originalText', 'otherFaceIds', 'power',
       'printings', 'promoTypes', 'rarity', 'rebalancedPrintings',
       'relatedCards', 'securityStamp', 'setCode', 'side', 'si

In [5]:
# Specify the needed columns
dfm = dfm[["availability", "colors", "language", "name", "rarity", "setCode", "types", "uuid"]]

dfm

Unnamed: 0,availability,colors,language,name,rarity,setCode,types,uuid
0,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c
1,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,b7c19924-b4bf-56fc-aa73-f586e940bd42
2,"mtgo, paper",W,English,Angel of Mercy,uncommon,10E,Creature,57aaebc1-850c-503d-9f6e-bb8d00d8bf7c
3,"mtgo, paper",W,English,Angel of Mercy,uncommon,10E,Creature,8fd4e2eb-3eb4-50ea-856b-ef638fa47f8a
4,"mtgo, paper",W,English,Angelic Blessing,common,10E,Sorcery,55bd38ca-dc73-5c06-8f80-a6ddd2f44382
...,...,...,...,...,...,...,...,...
105956,"mtgo, paper",U,English,Into the Roil,common,ZNR,Instant,3f492516-7767-5ed7-a1d4-e3f7c06aee2f
105957,"mtgo, paper",B,English,Bloodchief's Thirst,uncommon,ZNR,Sorcery,3f9a0369-5fe7-5aee-85fe-3cfaacd275af
105958,"mtgo, paper",R,English,Roil Eruption,common,ZNR,Sorcery,97577e9e-69a9-5a8b-9c24-a72703790046
105959,"mtgo, paper",G,English,Roiling Regrowth,uncommon,ZNR,Instant,deb51cbd-b890-5b2d-9d6f-7b896e16c6fd


In [6]:
# identify NA and Null values for the newly specified df columns
dfm.isna().sum()

availability        0
colors          22895
language            0
name                0
rarity              0
setCode             0
types               0
uuid                0
dtype: int64

In [7]:
dfm.loc[dfm['colors'].isna()]

Unnamed: 0,availability,colors,language,name,rarity,setCode,types,uuid
423,"mtgo, paper",,English,Angel's Feather,uncommon,10E,Artifact,da0a5791-2fc2-53e4-bc8b-c4d8cd026ded
424,"mtgo, paper",,English,Bottle Gnomes,uncommon,10E,"Artifact, Creature",8402d391-a810-5c04-af77-d3fb01dbacca
425,"mtgo, paper",,English,Chimeric Staff,rare,10E,Artifact,da8f1e81-fc1f-57f4-b4ec-0c5445a299e8
426,"mtgo, paper",,English,Chromatic Star,uncommon,10E,Artifact,3785490a-01f5-511d-b471-60b1209b3d4f
427,"mtgo, paper",,English,Citanul Flute,rare,10E,Artifact,b0a0f3ea-f483-53e3-ae41-bdb409141fdf
...,...,...,...,...,...,...,...,...
105949,"arena, mtgo, paper",,English,Plains,common,ZNR,Land,45260a6a-bb6a-521c-98c3-cd6643ac4f46
105950,"arena, mtgo, paper",,English,Island,common,ZNR,Land,bc649741-e3df-531f-b52c-b3634cb80c7b
105951,"arena, mtgo, paper",,English,Swamp,common,ZNR,Land,8148c863-ee4b-5204-b115-3f172931e08c
105952,"arena, mtgo, paper",,English,Mountain,common,ZNR,Land,6c9650e9-4b6c-5954-94eb-dc6c0418a760


* There are cards that are inherently "colorless" in MTG. It's interesting that they are not labeled as such in this dataset and are NaN instead.  
* Checking to make sure those are all listed as "NaN" instead of a different designation.  
* B, G, R, U, W are all valid color codes (cards can be multiple colors).
* Colors will be useful for our functions later but not necessarily useful for determining card values.

In [8]:
dfm["colors"].unique()

array(['W', 'U', 'B', 'R', 'G', nan, 'B, G, W', 'U, W', 'B, U', 'B, U, W',
       'B, W', 'G, R, U', 'R, U', 'B, R', 'B, G', 'G, R', 'G, R, W',
       'R, W', 'G, U, W', 'G, U', 'B, G, R, U, W', 'G, W', 'B, R, W',
       'R, U, W', 'B, G, R', 'B, R, U', 'B, G, U', 'B, G, U, W',
       'B, R, U, W', 'G, R, U, W', 'B, G, R, W', 'B, G, R, U', 'W, U',
       'U, B', 'U, G', 'W, R', 'U, R', 'R, G', 'W, G', 'W, B', 'U, R, G'],
      dtype=object)

In [9]:
dfm.loc[dfm["colors"].isna(), "types"].unique()

array(['Artifact', 'Artifact, Creature', 'Land', 'Creature',
       'Planeswalker', 'Artifact, Land', 'Sorcery', 'Instant',
       'Enchantment', 'Enchantment, Creature', 'Vanguard',
       'Enchantment, Land', 'Land, Creature', 'Kindred, Sorcery',
       'Kindred, Instant', 'Conspiracy', 'Plane', 'Scheme',
       'Artifact, Planeswalker', 'Kindred, Artifact',
       'Kindred, Enchantment', 'Poly, Artifact', 'Phenomenon', 'Battle',
       'Dungeon', 'pLAnE', 'Stickers', 'Hero', 'Hero, Artifact',
       'Artifact, Enchantment', 'Phenome-nom'], dtype=object)

In [10]:
# Replace all NaN with "C" for "Colorless"
dfm["colors"] = dfm["colors"].fillna("C")

In [11]:
# Check to see if it worked
dfm.loc[dfm["colors"].isna(), "types"].unique()

array([], dtype=object)

In [12]:
# Second check
dfm["colors"].unique()

array(['W', 'U', 'B', 'R', 'G', 'C', 'B, G, W', 'U, W', 'B, U', 'B, U, W',
       'B, W', 'G, R, U', 'R, U', 'B, R', 'B, G', 'G, R', 'G, R, W',
       'R, W', 'G, U, W', 'G, U', 'B, G, R, U, W', 'G, W', 'B, R, W',
       'R, U, W', 'B, G, R', 'B, R, U', 'B, G, U', 'B, G, U, W',
       'B, R, U, W', 'G, R, U, W', 'B, G, R, W', 'B, G, R, U', 'W, U',
       'U, B', 'U, G', 'W, R', 'U, R', 'R, G', 'W, G', 'W, B', 'U, R, G'],
      dtype=object)

* We are strictly looking at physical cards (paper) and not cards that are solely available for online play.  
    - This is due to the wild disparity between online and physical cards.  
    - We are looking only to gauge secondary, physical market.  

In [13]:
dfm[dfm["availability"] == "mtgo"]

Unnamed: 0,availability,colors,language,name,rarity,setCode,types,uuid
5748,mtgo,U,English,Vizzerdrix,rare,8ED,Creature,3ebaaff5-b2ca-5f8c-ba0a-52bbf7370f20
6464,mtgo,U,English,Vizzerdrix,rare,9ED,Creature,1aebb788-b6b4-5332-9532-d96da074cc6a
51922,mtgo,W,English,Angry Mob,uncommon,ME1,Creature,794948e1-4a0f-52e1-bd56-cde1ba99ab3b
51923,mtgo,W,English,Animate Wall,uncommon,ME1,Enchantment,2193fd6f-294f-59de-8ca6-d1c411fcdfda
51924,mtgo,W,English,Argivian Archaeologist,rare,ME1,Creature,8319cfd6-d509-534c-a4c7-3eda07c9fcd3
...,...,...,...,...,...,...,...,...
99997,mtgo,C,English,Tropical Island,rare,VMA,Land,600a90cf-e426-5b8a-915c-6bf9d655f9a8
99998,mtgo,C,English,Tundra,rare,VMA,Land,f799dbb8-5caf-5415-bff5-cbfd3ebb0c5a
99999,mtgo,C,English,Underground Sea,rare,VMA,Land,24981f20-226f-5be3-821e-0ce73532eb03
100000,mtgo,C,English,Volcanic Island,rare,VMA,Land,1c92f806-ac8b-576c-995a-7c9d4197633c


* Having digital and physical cards together in this dataset also accounts for why we're seeing so many rows with the same names.

In [14]:
dfm.duplicated(subset=["name"], keep=False)

0         True
1         True
2         True
3         True
4         True
          ... 
105956    True
105957    True
105958    True
105959    True
105960    True
Length: 105961, dtype: bool

In [15]:
dfm.value_counts('availability')

availability
mtgo, paper           40847
paper                 37369
arena, mtgo, paper    17324
mtgo                   5563
arena                  3684
arena, paper           1152
shandalar                12
dreamcast                10
Name: count, dtype: int64

In [16]:
# Specifying the rows to keep involving paper
dfm = dfm[
    (dfm["availability"] == "mtgo, paper") | 
    (dfm["availability"] == "paper") | 
    (dfm["availability"] == "arena, mtgo, paper") |
    (dfm["availability"] == "arena, paper") 
    ]

dfm

Unnamed: 0,availability,colors,language,name,rarity,setCode,types,uuid
0,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c
1,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,b7c19924-b4bf-56fc-aa73-f586e940bd42
2,"mtgo, paper",W,English,Angel of Mercy,uncommon,10E,Creature,57aaebc1-850c-503d-9f6e-bb8d00d8bf7c
3,"mtgo, paper",W,English,Angel of Mercy,uncommon,10E,Creature,8fd4e2eb-3eb4-50ea-856b-ef638fa47f8a
4,"mtgo, paper",W,English,Angelic Blessing,common,10E,Sorcery,55bd38ca-dc73-5c06-8f80-a6ddd2f44382
...,...,...,...,...,...,...,...,...
105956,"mtgo, paper",U,English,Into the Roil,common,ZNR,Instant,3f492516-7767-5ed7-a1d4-e3f7c06aee2f
105957,"mtgo, paper",B,English,Bloodchief's Thirst,uncommon,ZNR,Sorcery,3f9a0369-5fe7-5aee-85fe-3cfaacd275af
105958,"mtgo, paper",R,English,Roil Eruption,common,ZNR,Sorcery,97577e9e-69a9-5a8b-9c24-a72703790046
105959,"mtgo, paper",G,English,Roiling Regrowth,uncommon,ZNR,Instant,deb51cbd-b890-5b2d-9d6f-7b896e16c6fd


In [17]:
# Check to make sure it worked as intended
dfm.value_counts("availability")

availability
mtgo, paper           40847
paper                 37369
arena, mtgo, paper    17324
arena, paper           1152
Name: count, dtype: int64

* There are still rows with the same card name, but those are alt arts, promos, etc. each with their own values, and have separate uuid #s.

In [18]:
# Check for duplicated rows to be sure (there are none)
dfm.duplicated(keep=False).sum()

np.int64(0)

* We're only going to be working with the english version of cards

In [19]:
dfm = dfm[dfm["language"] == "English"]

* We're also going to remove the basic lands from each set.  
* These lands are printed every set in bulk and are mostly worthless, barring certain outliers.  
* This will tighten our dataset and focus it toward value.  

In [20]:
basic_lands = ["Forest", "Island", "Mountain", "Plains", "Swamp"]

# the tilde flips the boolean values, so we are keeping what is NOT IN basic_lands
dfm = dfm[~dfm["name"].isin(basic_lands)]

In [21]:
# Check to make sure it worked correctly
dfm[dfm["name"] == "Forest"]

Unnamed: 0,availability,colors,language,name,rarity,setCode,types,uuid


* Documenting set code info for reference later  

In [22]:
dfm["setCode"].nunique()

633

In [23]:
dfm["setCode"].unique()

array(['10E', '2ED', '2X2', '2XM', '30A', '3ED', '40K', '4ED', '5DN',
       '5ED', '6ED', '7ED', '8ED', '9ED', 'A25', 'ACR', 'AER', 'AFC',
       'AFR', 'AKH', 'ALA', 'ALL', 'APC', 'ARB', 'ARC', 'ARN', 'ATH',
       'ATQ', 'AVR', 'BBD', 'BFZ', 'BIG', 'BLB', 'BLC', 'BNG', 'BOK',
       'BOT', 'BRB', 'BRC', 'BRO', 'BRR', 'BTD', 'C13', 'C14', 'C15',
       'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'CC1', 'CC2', 'CED',
       'CEI', 'CHK', 'CHR', 'CLB', 'CLU', 'CM1', 'CM2', 'CMA', 'CMB1',
       'CMB2', 'CMD', 'CMM', 'CMR', 'CN2', 'CNS', 'CON', 'CP1', 'CP2',
       'CP3', 'CSP', 'CST', 'DBL', 'DCI', 'DD1', 'DD2', 'DDC', 'DDD',
       'DDE', 'DDF', 'DDG', 'DDH', 'DDI', 'DDJ', 'DDK', 'DDL', 'DDM',
       'DDN', 'DDO', 'DDP', 'DDQ', 'DDR', 'DDS', 'DDT', 'DDU', 'DFT',
       'DGM', 'DIS', 'DKA', 'DKM', 'DMC', 'DMR', 'DMU', 'DOM', 'DPA',
       'DRB', 'DRC', 'DRK', 'DSC', 'DSK', 'DST', 'DTK', 'DVD', 'E01',
       'E02', 'ELD', 'EMA', 'EMN', 'EOC', 'EOE', 'EOS', 'EVE', 'EVG',
       'EXO', 'EXP

* Now that we have a clean dataset, let's join in set details from another CSV file.  
* We're looking to add set name, release date, and release year with this join.

In [24]:
dfmSets = pd.read_csv('../data/dataMagic/setsMagic.csv') # 9/22/25
dfmSets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 864 entries, 0 to 863
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   setName      864 non-null    object
 1   setCode      864 non-null    object
 2   releaseDate  864 non-null    object
 3   releaseYear  864 non-null    int64 
dtypes: int64(1), object(3)
memory usage: 27.1+ KB


In [25]:
dfmSets.isna().sum()

setName        0
setCode        0
releaseDate    0
releaseYear    0
dtype: int64

In [26]:
dfm2 = pd.merge(dfm, dfmSets, on = "setCode", how = "inner")
dfm2.head()

Unnamed: 0,availability,colors,language,name,rarity,setCode,types,uuid,setName,releaseDate,releaseYear
0,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007
1,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,b7c19924-b4bf-56fc-aa73-f586e940bd42,Tenth Edition,7/13/2007,2007
2,"mtgo, paper",W,English,Angel of Mercy,uncommon,10E,Creature,57aaebc1-850c-503d-9f6e-bb8d00d8bf7c,Tenth Edition,7/13/2007,2007
3,"mtgo, paper",W,English,Angel of Mercy,uncommon,10E,Creature,8fd4e2eb-3eb4-50ea-856b-ef638fa47f8a,Tenth Edition,7/13/2007,2007
4,"mtgo, paper",W,English,Angelic Blessing,common,10E,Sorcery,55bd38ca-dc73-5c06-8f80-a6ddd2f44382,Tenth Edition,7/13/2007,2007


* Now let's bring in the card prices.

In [27]:
dfmPrices =  pd.read_csv('../data/dataMagic/pricesMagic.csv') # 8/27/25
dfmPrices.head()

Unnamed: 0,cardFinish,currency,date,gameAvailability,price,priceProvider,providerListing,uuid
0,normal,USD,2025-08-26,mtgo,0.19,cardhoarder,retail,f182e364-0439-5594-a6e6-75f7889ccf45
1,normal,USD,2025-08-26,mtgo,0.38,cardhoarder,retail,330deaa3-dd7a-52a8-bfbc-b323cd16a409
2,normal,USD,2025-08-26,mtgo,0.02,cardhoarder,retail,79e36956-b91f-580f-8309-7d9585a67560
3,normal,USD,2025-08-26,mtgo,0.29,cardhoarder,retail,6afb2b4c-530a-57d5-8e7f-871239f6fa05
4,normal,USD,2025-08-26,mtgo,0.02,cardhoarder,retail,b1fc2762-92aa-5a14-8509-a59cb611e376


* We again only want paper prices.  
* We also don't want buylist prices either, only retail.  
* CardMarket is strictly a EU trader, so their prices are in EUR.  

In [28]:
dfmPrices["gameAvailability"].unique()

array(['mtgo', 'paper'], dtype=object)

In [29]:
dfmPrices["providerListing"].unique()

array(['retail', 'buylist'], dtype=object)

In [30]:
dfmPrices["priceProvider"].unique()

array(['cardhoarder', 'cardmarket', 'cardkingdom', 'cardsphere',
       'tcgplayer'], dtype=object)

In [31]:
mtgo = ["mtgo"]
buylist = ["buylist"]
cardmarket = ["cardmarket"]

dfmPrices = dfmPrices[~dfmPrices["gameAvailability"].isin(mtgo)]
dfmPrices = dfmPrices[~dfmPrices["providerListing"].isin(buylist)]
dfmPrices = dfmPrices[~dfmPrices["priceProvider"].isin(cardmarket)]
dfmPrices

Unnamed: 0,cardFinish,currency,date,gameAvailability,price,priceProvider,providerListing,uuid
78,normal,USD,2025-08-26,paper,20460.00,cardsphere,retail,1c17ce18-bf3e-558b-9389-632588f93851
82,normal,USD,2025-08-26,paper,50219.07,cardsphere,retail,d4d8c9f9-31ed-53ed-ab67-eba86e2198fe
86,normal,USD,2025-08-26,paper,17999.99,cardkingdom,retail,3876cd4c-db88-534e-877c-307fa2e2b160
87,normal,USD,2025-08-26,paper,5724.15,cardsphere,retail,3876cd4c-db88-534e-877c-307fa2e2b160
91,normal,USD,2025-08-26,paper,9206.99,cardsphere,retail,173fc1fb-c465-5d62-b34e-60d76df02fee
...,...,...,...,...,...,...,...,...
621739,normal,USD,2025-08-26,paper,4638.00,cardsphere,retail,1e05a119-4a09-52a2-a68b-02c97d949077
621740,normal,USD,2025-08-26,paper,1.28,cardsphere,retail,14895070-3890-5142-8442-d27af207cf5a
621741,normal,USD,2025-08-26,paper,343.81,cardsphere,retail,478443e8-3f28-5be0-87c2-e1952c281841
621742,normal,USD,2025-08-26,paper,1019.99,cardsphere,retail,72e22db3-a046-52b2-bd16-b1bb470f16d5


In [32]:
dfm3 = pd.merge(dfm2, dfmPrices, on = "uuid", how = "inner")
dfm3.head(5)

Unnamed: 0,availability,colors,language,name,rarity,setCode,types,uuid,setName,releaseDate,releaseYear,cardFinish,currency,date,gameAvailability,price,priceProvider,providerListing
0,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.25,tcgplayer,retail
1,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,foil,USD,2025-08-26,paper,0.52,tcgplayer,retail
2,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.35,cardkingdom,retail
3,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.23,cardsphere,retail
4,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,b7c19924-b4bf-56fc-aa73-f586e940bd42,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.25,tcgplayer,retail


* The goal now is to add an average price for all cards, calculated from the price providers.

In [33]:
# using transform to add a column with the average price
dfm3["avgMarketPrice"] = dfm3.groupby(['uuid', 'cardFinish'])['price'].transform('mean')
dfm3.head()

Unnamed: 0,availability,colors,language,name,rarity,setCode,types,uuid,setName,releaseDate,releaseYear,cardFinish,currency,date,gameAvailability,price,priceProvider,providerListing,avgMarketPrice
0,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.25,tcgplayer,retail,0.276667
1,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,foil,USD,2025-08-26,paper,0.52,tcgplayer,retail,0.52
2,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.35,cardkingdom,retail,0.276667
3,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.23,cardsphere,retail,0.276667
4,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,b7c19924-b4bf-56fc-aa73-f586e940bd42,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.25,tcgplayer,retail,0.25


In [34]:
# Round avgPrice to 2 decimals
dfm3["avgMarketPrice"] = dfm3["avgMarketPrice"].round(2)
dfm3.head()

Unnamed: 0,availability,colors,language,name,rarity,setCode,types,uuid,setName,releaseDate,releaseYear,cardFinish,currency,date,gameAvailability,price,priceProvider,providerListing,avgMarketPrice
0,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.25,tcgplayer,retail,0.28
1,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,foil,USD,2025-08-26,paper,0.52,tcgplayer,retail,0.52
2,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.35,cardkingdom,retail,0.28
3,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.23,cardsphere,retail,0.28
4,"mtgo, paper",W,English,Ancestor's Chosen,uncommon,10E,Creature,b7c19924-b4bf-56fc-aa73-f586e940bd42,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.25,tcgplayer,retail,0.25


* Now to drop card price column and to make one row per uuid, so we don't skew results later one with having multiple indexes per card and price source.  
* There will still be multiple rows for some card names, but they will have different uuids for alt arts and promos.  
* Availability column is also no longer needed now that we have gameAvailability.  
* cardFinish is no longer needed since it is now part of the average price.  
* priceProvider is no longer needed as well, which will help us de-dupe.  

In [35]:
# check to make sure for no NA values after merges
dfm3.isna().sum()

availability        0
colors              0
language            0
name                0
rarity              0
setCode             0
types               0
uuid                0
setName             0
releaseDate         0
releaseYear         0
cardFinish          0
currency            0
date                0
gameAvailability    0
price               0
priceProvider       0
providerListing     0
avgMarketPrice      0
dtype: int64

In [36]:
dfm3.dtypes

availability         object
colors               object
language             object
name                 object
rarity               object
setCode              object
types                object
uuid                 object
setName              object
releaseDate          object
releaseYear           int64
cardFinish           object
currency             object
date                 object
gameAvailability     object
price               float64
priceProvider        object
providerListing      object
avgMarketPrice      float64
dtype: object

* We need to convert releaseDate to datetime so it will sort correctly.

In [37]:
# We don't need availability any longer, since we now have gameAvailability from pricesMagic.csv
dfm3.drop(columns = ["availability"], inplace = True)
dfm3.head()

Unnamed: 0,colors,language,name,rarity,setCode,types,uuid,setName,releaseDate,releaseYear,cardFinish,currency,date,gameAvailability,price,priceProvider,providerListing,avgMarketPrice
0,W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.25,tcgplayer,retail,0.28
1,W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,foil,USD,2025-08-26,paper,0.52,tcgplayer,retail,0.52
2,W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.35,cardkingdom,retail,0.28
3,W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.23,cardsphere,retail,0.28
4,W,English,Ancestor's Chosen,uncommon,10E,Creature,b7c19924-b4bf-56fc-aa73-f586e940bd42,Tenth Edition,7/13/2007,2007,normal,USD,2025-08-26,paper,0.25,tcgplayer,retail,0.25


In [38]:
dfm3['releaseDate'] = pd.to_datetime(dfm3['releaseDate'], format = '%m/%d/%Y', errors = 'raise')
dfm3.dtypes

colors                      object
language                    object
name                        object
rarity                      object
setCode                     object
types                       object
uuid                        object
setName                     object
releaseDate         datetime64[ns]
releaseYear                  int64
cardFinish                  object
currency                    object
date                        object
gameAvailability            object
price                      float64
priceProvider               object
providerListing             object
avgMarketPrice             float64
dtype: object

In [39]:
# Creating a new dataframe here - dfm3 will be used for SQL queries
dfm4 = dfm3.drop(columns = ["price", "priceProvider"])
dfm4.head()

Unnamed: 0,colors,language,name,rarity,setCode,types,uuid,setName,releaseDate,releaseYear,cardFinish,currency,date,gameAvailability,providerListing,avgMarketPrice
0,W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,2007-07-13,2007,normal,USD,2025-08-26,paper,retail,0.28
1,W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,2007-07-13,2007,foil,USD,2025-08-26,paper,retail,0.52
2,W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,2007-07-13,2007,normal,USD,2025-08-26,paper,retail,0.28
3,W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,2007-07-13,2007,normal,USD,2025-08-26,paper,retail,0.28
4,W,English,Ancestor's Chosen,uncommon,10E,Creature,b7c19924-b4bf-56fc-aa73-f586e940bd42,Tenth Edition,2007-07-13,2007,normal,USD,2025-08-26,paper,retail,0.25


In [40]:
dfm4.drop_duplicates(keep = "first", inplace = True)
dfm4.head()

Unnamed: 0,colors,language,name,rarity,setCode,types,uuid,setName,releaseDate,releaseYear,cardFinish,currency,date,gameAvailability,providerListing,avgMarketPrice
0,W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,2007-07-13,2007,normal,USD,2025-08-26,paper,retail,0.28
1,W,English,Ancestor's Chosen,uncommon,10E,Creature,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c,Tenth Edition,2007-07-13,2007,foil,USD,2025-08-26,paper,retail,0.52
4,W,English,Ancestor's Chosen,uncommon,10E,Creature,b7c19924-b4bf-56fc-aa73-f586e940bd42,Tenth Edition,2007-07-13,2007,normal,USD,2025-08-26,paper,retail,0.25
5,W,English,Ancestor's Chosen,uncommon,10E,Creature,b7c19924-b4bf-56fc-aa73-f586e940bd42,Tenth Edition,2007-07-13,2007,foil,USD,2025-08-26,paper,retail,0.5
7,W,English,Angel of Mercy,uncommon,10E,Creature,57aaebc1-850c-503d-9f6e-bb8d00d8bf7c,Tenth Edition,2007-07-13,2007,normal,USD,2025-08-26,paper,retail,0.25


In [41]:
# A more viewer-friendly order
newOrderM = ['name', 'setCode', 'setName', 'language', 'types', 'colors', 'rarity', 'cardFinish', 'releaseDate', 'releaseYear', 'gameAvailability', 'avgMarketPrice', 'currency', 'providerListing', 'date', 'uuid']

dfm4 = dfm4[newOrderM]
dfm4.head()

Unnamed: 0,name,setCode,setName,language,types,colors,rarity,cardFinish,releaseDate,releaseYear,gameAvailability,avgMarketPrice,currency,providerListing,date,uuid
0,Ancestor's Chosen,10E,Tenth Edition,English,Creature,W,uncommon,normal,2007-07-13,2007,paper,0.28,USD,retail,2025-08-26,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c
1,Ancestor's Chosen,10E,Tenth Edition,English,Creature,W,uncommon,foil,2007-07-13,2007,paper,0.52,USD,retail,2025-08-26,5f8287b1-5bb6-5f4c-ad17-316a40d5bb0c
4,Ancestor's Chosen,10E,Tenth Edition,English,Creature,W,uncommon,normal,2007-07-13,2007,paper,0.25,USD,retail,2025-08-26,b7c19924-b4bf-56fc-aa73-f586e940bd42
5,Ancestor's Chosen,10E,Tenth Edition,English,Creature,W,uncommon,foil,2007-07-13,2007,paper,0.5,USD,retail,2025-08-26,b7c19924-b4bf-56fc-aa73-f586e940bd42
7,Angel of Mercy,10E,Tenth Edition,English,Creature,W,uncommon,normal,2007-07-13,2007,paper,0.25,USD,retail,2025-08-26,57aaebc1-850c-503d-9f6e-bb8d00d8bf7c


In [42]:
# Order by year, then setName, then name
dfm4 = dfm4.sort_values(by=["releaseDate", "setName", "name"])
dfm4.head()

Unnamed: 0,name,setCode,setName,language,types,colors,rarity,cardFinish,releaseDate,releaseYear,gameAvailability,avgMarketPrice,currency,providerListing,date,uuid
147891,Air Elemental,LEA,Limited Edition Alpha,English,Creature,U,uncommon,normal,1993-08-05,1993,paper,160.26,USD,retail,2025-08-26,27e92f54-0084-57c2-85e5-197e026fab5c
147894,Ancestral Recall,LEA,Limited Edition Alpha,English,Instant,U,rare,normal,1993-08-05,1993,paper,20460.0,USD,retail,2025-08-26,1c17ce18-bf3e-558b-9389-632588f93851
147895,Animate Artifact,LEA,Limited Edition Alpha,English,Enchantment,U,uncommon,normal,1993-08-05,1993,paper,55.07,USD,retail,2025-08-26,e035e37e-cb8e-5f12-a5db-fe7f927a3457
147998,Animate Dead,LEA,Limited Edition Alpha,English,Enchantment,B,uncommon,normal,1993-08-05,1993,paper,528.04,USD,retail,2025-08-26,a5421ca2-32e5-5b31-bc12-979c626fc6eb
147783,Animate Wall,LEA,Limited Edition Alpha,English,Enchantment,W,rare,normal,1993-08-05,1993,paper,685.84,USD,retail,2025-08-26,2b304dc1-8d7d-50a7-a310-2d0e5427935f


In [43]:
# Reset index after manipulation and to check new number of rows
# Dropping the original index column
dfm4 = dfm4.reset_index(drop = True)
dfm4.head()

Unnamed: 0,name,setCode,setName,language,types,colors,rarity,cardFinish,releaseDate,releaseYear,gameAvailability,avgMarketPrice,currency,providerListing,date,uuid
0,Air Elemental,LEA,Limited Edition Alpha,English,Creature,U,uncommon,normal,1993-08-05,1993,paper,160.26,USD,retail,2025-08-26,27e92f54-0084-57c2-85e5-197e026fab5c
1,Ancestral Recall,LEA,Limited Edition Alpha,English,Instant,U,rare,normal,1993-08-05,1993,paper,20460.0,USD,retail,2025-08-26,1c17ce18-bf3e-558b-9389-632588f93851
2,Animate Artifact,LEA,Limited Edition Alpha,English,Enchantment,U,uncommon,normal,1993-08-05,1993,paper,55.07,USD,retail,2025-08-26,e035e37e-cb8e-5f12-a5db-fe7f927a3457
3,Animate Dead,LEA,Limited Edition Alpha,English,Enchantment,B,uncommon,normal,1993-08-05,1993,paper,528.04,USD,retail,2025-08-26,a5421ca2-32e5-5b31-bc12-979c626fc6eb
4,Animate Wall,LEA,Limited Edition Alpha,English,Enchantment,W,rare,normal,1993-08-05,1993,paper,685.84,USD,retail,2025-08-26,2b304dc1-8d7d-50a7-a310-2d0e5427935f


In [44]:
dfm4.to_csv("../data/dataMagic/completeMagicClean.csv", index = False)

* We started with ~106k rows x 78 columns and now ~89k rows x 15 columns.

In [45]:
# Checking name lookup for reference
dfm4[dfm4["name"] == "Black Lotus"]

Unnamed: 0,name,setCode,setName,language,types,colors,rarity,cardFinish,releaseDate,releaseYear,gameAvailability,avgMarketPrice,currency,providerListing,date,uuid
17,Black Lotus,LEA,Limited Edition Alpha,English,Artifact,C,rare,normal,1993-08-05,1993,paper,50219.07,USD,retail,2025-08-26,d4d8c9f9-31ed-53ed-ab67-eba86e2198fe
301,Black Lotus,LEB,Limited Edition Beta,English,Artifact,C,rare,normal,1993-10-04,1993,paper,55799.07,USD,retail,2025-08-26,7415e72a-f2f7-53e5-bcec-0a8c61ff3a15
587,Black Lotus,2ED,Unlimited Edition,English,Artifact,C,rare,normal,1993-12-01,1993,paper,17902.49,USD,retail,2025-08-26,e6c9fe58-bc4f-529d-a387-77d61af87de4
873,Black Lotus,CED,Collectors’ Edition,English,Artifact,C,rare,normal,1993-12-10,1993,paper,2860.91,USD,retail,2025-08-26,ecfef046-ee24-54ca-9670-7fc2d09e09dc
1159,Black Lotus,CEI,Intl. Collectors’ Edition,English,Artifact,C,rare,normal,1993-12-10,1993,paper,3611.14,USD,retail,2025-08-26,27580ad8-a961-5c7e-9832-001faaa3455b
99048,Black Lotus,30A,30th Anniversary Edition,English,Artifact,C,rare,normal,2022-11-28,2022,paper,2999.99,USD,retail,2025-08-26,d710e0f5-4d66-594e-9317-a61a1673262b
99049,Black Lotus,30A,30th Anniversary Edition,English,Artifact,C,rare,normal,2022-11-28,2022,paper,7999.99,USD,retail,2025-08-26,0e1cfa11-8ad4-5f3d-909f-232c390d8617


* Prep for visualizations

In [46]:
setValueMagic = dfm4.groupby("setName").agg({
    "avgMarketPrice" : "sum",
    "releaseYear" : "first"
})

# reset index so setName and releaseYear are columns
setValueMagicClean = setValueMagic.reset_index()
# ordering
setValueMagicClean = setValueMagicClean.sort_values(by=["releaseYear", "avgMarketPrice"], ascending = False)
# outputting
setValueMagicClean.to_csv("../data/dataMagic/setValueMagicClean.csv", index = False)

In [47]:
# To show the top sets from each year for graph ax text
setSumMagic = dfm4.groupby(["releaseYear", "setName"], as_index=False)["avgMarketPrice"].sum()

# For each year, the set with the highest total price
topSetsMagic = setSumMagic.loc[setSumMagic.groupby("releaseYear")["avgMarketPrice"].idxmax()]
topSetsMagic = topSetsMagic.sort_values("avgMarketPrice").round()
topSetsMagic = topSetsMagic.sort_values("releaseYear")

topSetsMagic

Unnamed: 0,releaseYear,setName,avgMarketPrice
3,1993,Limited Edition Alpha,390429.0
10,1994,Legends,13955.0
19,1995,Media Inserts,534.0
23,1996,Mirage,1266.0
28,1997,Tempest,1238.0
37,1998,Judge Gift Cards 1998,3633.0
50,1999,Mercadian Masques,8612.0
65,2000,Invasion,4894.0
81,2001,Seventh Edition,35947.0
87,2002,Onslaught,9048.0
