In [1]:
import pandas as pd
from scipy.stats import ttest_ind

In [2]:
encoding = 'latin1'
cards_cleaned = pd.read_csv('cards_cleaned.csv', encoding=encoding)

In [3]:
cards_cleaned.head(5)

Unnamed: 0,card_id,playerClass,type,name,set,text,mana cost,attack,health,rarity,collectible,flavor,sentiment
0,AT_132,NEUTRAL,MINION,Justicar Trueheart,TGT,<b>Battlecry:</b> Replace your starting Hero P...,6,6,3,LEGENDARY,True,It's like putting racing stripes and a giant s...,very positive
1,AT_131,NEUTRAL,MINION,Eydis Darkbane,TGT,Whenever <b>you</b> target this minion with a ...,3,3,4,LEGENDARY,True,"HATES being called ""the wonder twins"".",very negative
2,EX1_583,NEUTRAL,MINION,Priestess of Elune,EXPERT1,<b>Battlecry:</b> Restore 4 Health to your hero.,6,5,4,COMMON,True,"If she threatens to ""moon"" you, it's not what ...",negative
3,CFM_095,NEUTRAL,MINION,Weasel Tunneler,GANGS,<b>Deathrattle:</b> Shuffle this minion into y...,1,1,1,EPIC,True,He's the reason the First Bank of Gadgetzan ha...,positive
4,BRM_018,PALADIN,MINION,Dragon Consort,BRM,<b>Battlecry:</b> The next Dragon you play cos...,5,5,5,RARE,True,Everybody wants someone to snuggle with. Even ...,very negative


Hypothesis: The average attack and health of legendary minions are different from non-legendary minions.

Null Hypothesis (H0): μ_attack(legendary) = μ_attack(non-legendary) and μ_health(legendary) = μ_health(non-legendary)

Alternative Hypothesis (H1): At least one of the average stats for legendary minions is different from that of non-legendary minions.

In [4]:
# Separate data for legendary and non-legendary minions
legendary_minions = cards_cleaned[(cards_cleaned['rarity'] == 'LEGENDARY') & (cards_cleaned['type'] == 'MINION')]
non_legendary_minions = cards_cleaned[(cards_cleaned['rarity'] != 'LEGENDARY') & (cards_cleaned['type'] == 'MINION')]

# Perform t-test for attack
attack_statistic, attack_p_value = ttest_ind(legendary_minions['attack'], non_legendary_minions['attack'], equal_var=False)

# Perform t-test for health
health_statistic, health_p_value = ttest_ind(legendary_minions['health'], non_legendary_minions['health'], equal_var=False)

# Set significance level
alpha = 0.05

# Print the results
print(f'Test for Attack - Statistic: {attack_statistic}, P-value: {attack_p_value}')
if attack_p_value < alpha:
    print('Reject the null hypothesis for attack.')
else:
    print('Fail to reject the null hypothesis for attack.')

print('\n')

print(f'Test for Health - Statistic: {health_statistic}, P-value: {health_p_value}')
if health_p_value < alpha:
    print('Reject the null hypothesis for health.')
else:
    print('Fail to reject the null hypothesis for health.')

Test for Attack - Statistic: 9.261209957308273, P-value: 6.875007195621618e-17
Reject the null hypothesis for attack.


Test for Health - Statistic: 9.115870152914475, P-value: 1.7453530923513324e-16
Reject the null hypothesis for health.


The average attack of legendary minions is different from common minions.

Null Hypothesis (H0): μ_attack(legendary) = μ_attack(common). Legendary cards attack value is equal to common cards attack value.

Alternative Hypothesis (H1): μ_attack(legendary) ≠ μ_attack(common). Legendary cards attack value is different from common cards attack value

In [5]:
# Separate data for legendary and common minions
legendary_minions = cards_cleaned[(cards_cleaned['rarity'] == 'LEGENDARY') & (cards_cleaned['type'] == 'MINION')]
common_minions = cards_cleaned[(cards_cleaned['rarity'] == 'COMMON') & (cards_cleaned['type'] == 'MINION')]

# Perform two-sample t-test for attack
attack_statistic, attack_p_value = ttest_ind(legendary_minions['attack'], common_minions['attack'], equal_var=False)

# Set significance level
alpha = 0.05

# Print the results
print(f'Test for Attack - Statistic: {attack_statistic}, P-value: {attack_p_value}')
if attack_p_value < alpha:
    print('Reject the null hypothesis. Legendary cards attack value is different from common cards attack value')
else:
    print('Fail to reject the null hypothesis.')

Test for Attack - Statistic: 10.035844391402186, P-value: 1.914019599904708e-19
Reject the null hypothesis. Legendary cards attack value is different from common cards attack value


Hypothesis: The distribution of card types (MINION, SPELL, etc.) varies across different sets.

Null Hypothesis (H0): The distribution of card types is the same across all sets.

Alternative Hypothesis (H1): The distribution of card types is different across sets.

In [6]:
from scipy.stats import chi2_contingency

In [7]:
# Create a contingency table of observed frequencies
contingency_table = pd.crosstab(cards_cleaned['type'], cards_cleaned['set'])
contingency_table

set,BRM,CHEAT,CORE,EXPERT1,GANGS,GVG,KARA,LOE,NAXX,OG,PROMO,REWARD,TGT
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
MINION,23,0,50,155,98,91,33,33,25,97,2,2,96
SPELL,8,1,68,82,30,23,9,10,4,29,0,0,28
WEAPON,0,0,1,7,3,5,2,1,1,3,0,0,4


In [8]:
# Perform chi-square test
chi2_stat, p_value, _, expected = chi2_contingency(contingency_table)

# Set significance level
alpha = 0.05

# Print the results
print(f'Chi-square Statistic: {chi2_stat}, P-value: {p_value}')
if p_value < alpha:
    print('Reject the null hypothesis. The distribution of card types is different across sets.')
else:
    print('Fail to reject the null hypothesis.')

Chi-square Statistic: 75.80704812741477, P-value: 2.792079596827584e-07
Reject the null hypothesis. The distribution of card types is different across sets.
