In [None]:
# import analytics tool
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# import ML tools
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import learning_curve
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression

In [None]:
# import card data
cards_df = pd.read_csv('cards.csv')
cards_df.head(5)

In [None]:
# list all columns
cards_df.columns.to_list()

In [None]:
# Trim down the columns 
cards_df = cards_df[['name', 'types', 'colors','power',
          'toughness', 'manaCost','manaValue', 'rarity',  'setCode', 'uuid', 'isReprint']]

In [None]:
# import price data
price_df = pd.read_csv('cardPrices.csv')
price_df.head(5)

In [None]:
price_df.columns.to_list()

In [None]:
price_df[['price', 'priceProvider', 'uuid']]

In [None]:
# Import set data
set_df = pd.read_csv('sets.csv')
set_df.head(5)

In [None]:
# List all columns
set_df.columns.to_list()

In [None]:
set_df[['code', 'releaseDate']]

## Clean Data

In [None]:
# List set codes to find the standard legal cards I want to analyze
cards_df['setCode'].unique().tolist()

In [None]:
# Dataframe that shows most of all standard legal cards
standard_legal = ['MID','VOW','NEO','SNC', 'DMU', 'BRO','MOM','MAT','WOE']
standard_cards_df = cards_df[cards_df['setCode'].isin(standard_legal)]
standard_cards_df.head(5)

In [None]:
# Cards in MID with duplicates dropped via names
mom_cards = standard_cards_df[standard_cards_df['setCode'] == 'MOM']
mom_cards = mom_cards.drop_duplicates(subset='name', keep='first')
mom_cards.head(5)

## Dataframe Including All Standard Legal Cards

In [None]:
# Using a for loop to drop all duplicates in all standard legal sets

# Legal Sets
standard_legal = ['MID', 'VOW', 'NEO', 'SNC', 'DMU', 'BRO', 'MOM', 'MAT', 'WOE']

# Empty Data Frame
standard_cards_df = pd.DataFrame()

#For lopp to ad all the data into empty data frame
for set_code in standard_legal:
    set_cards = cards_df[cards_df['setCode'] == set_code]
    set_cards_unique = set_cards.drop_duplicates(subset='name', keep='first')
    standard_cards_df = pd.concat([standard_cards_df, set_cards_unique], ignore_index=True)

standard_cards_df.head(5)

In [None]:
# Check for cards that are not legal in standard. Aftermath has mutliverse legends which are not standard legal
mat_cards = standard_cards_df.loc[standard_cards_df['setCode'] == 'MAT', 'name'].unique()
mat_cards = pd.Series(mat_cards)
print(f'Number of unique card in MAT is {mat_cards.count()}.')

#### Create a Banned list to be removed from the data set

In [None]:
# Remove all banned cards from
standard_ban_list = ['The Meathook Massacre',
'Fable of the Mirror-Breaker//Reflection of Kiki-Jiki',
'Invoke Despair',
'Reckoner Bankbuster']
banned_cards = standard_cards_df[standard_cards_df['name'].isin(standard_ban_list)]
banned_cards

This did not give me all the cards I wanted to ban Fable of the Mirror-Breaker//Reflection of Kiki-Jiki was missing from the list

In [None]:
# Find Fable of the Mirror-Breaker
fable_card = standard_cards_df[standard_cards_df['name'].str.contains('Fable')]
fable_card

In [None]:
# Final df that shows all banned cards
banned_cards = pd.concat([banned_cards, fable_card], ignore_index=True).drop_duplicates()
banned_cards

In [None]:
standard_legal_df = pd.concat([standard_cards_df, banned_cards]).drop_duplicates(keep=False)
standard_legal_df.head(5)

In [None]:
# Count of Rarities
custom_palette = {
    'common': '#F0F0F0',
    'uncommon': 'silver',
    'mythic': 'orange',
    'rare': 'gold'
}

rarity_counts =standard_legal_df['rarity'].value_counts()

sns.barplot(x=rarity_counts.index, y=rarity_counts.values, palette=custom_palette)
plt.ylabel('Count')
plt.title('Distribution of Rarity in Standard Legal Cards')
plt.show()

In [None]:
# Count of card types

type_counts = standard_legal_df['types'].value_counts()

sns.barplot(x=type_counts.index, y=type_counts.values, pallette)
plt.ylabel('Count')
plt.title('Distribution of card types in Standard Legal Cards')
plt.xticks(rotation=45, ha='right')
plt.show()

In [None]:
# Distirbution of Colors, nan color?
standard_legal_df['colors'].unique()

In [None]:
# Nan color seems to be colorless cards and lands
null_color_df = standard_legal_df[standard_legal_df['colors'].isnull()]
null_color_df.head(5)

In [None]:
lands_df = standard_legal_df[standard_legal_df['types']=='Land'].copy()
lands_df.head(5)

In [None]:
# Create a data frame for all colorless cards which are not lands
colorless_cards_df = pd.concat([null_color_df, lands_df]).drop_duplicates(keep=False)
colorless_cards_df.head(5)

In [None]:
# Give a color code to colorless cards
colorless_cards_df.loc[:, 'colors'] = 'C'
colorless_cards_df.head(5)

In [None]:
# Give lands a code for lands
lands_df.loc[:, 'colors'] = 'L'
lands_df.head(5)