In [1]:
# import pandas and numpy
import pandas as pd
import numpy as np

In [2]:
# upload file
df = pd.read_csv('countrycleanmedel.csv')

In [3]:
# print out missing value
print(df.isnull().sum())

discipline_title         0
slug_game                0
event_title              0
event_gender             0
medal_type               0
participant_type         0
country_name             0
country_3_letter_code    0
dtype: int64


In [4]:
# delete the first ccolumn
df.drop('country_3_letter_code', axis=1, inplace=True)

In [5]:
# convert catigorical into one-hot format
onehotdf = pd.get_dummies(df)

In [None]:
# transfor code into integer type
onehotdf = onehotdf.astype(int)

# Calculate support for each items
supportnumber = onehotdf.sum() / len(onehotdf)

# create a list the rules
rules = []

factors = onehotdf.columns
for i in range(len(factors)):
    for j in range(i + 1, len(factors)):
        eventa, eventb = factors[i], factors[j]
        
        # Support calculations
        supta = supportnumber[eventa]
        suptb = supportnumber[eventb]
        suptab = (onehotdf[eventa] & onehotdf[eventb]).mean()
        
        # Confidence
        confiab = suptab / supta if supta > 0 else 0
        confiba = suptab / suptb if suptb > 0 else 0
        
        # Lift
        liftab = confiab / suptb if suptb > 0 else 0
        liftba = confiba / supta if supta > 0 else 0
        
        # Append the rules, supports, confidences, and lifts to the list
        rules.append([eventa, eventb, suptab, confiab, liftab])
        rules.append([eventb, eventa, suptab, confiba, liftba])

# Convert the list of rules to a DataFrame
rules = pd.DataFrame(rules, columns=['Antecedent', 'Consequent', 'Support', 'Confidence', 'Lift'])

# delete minimum support, confidence, and lift values
rules = rules[(rules['Support'] > 0.01) &
                             (rules['Confidence'] > 0.1) &
                             (rules['Lift'] > 1)]

In [9]:
rules.sort_values(by='Support', ascending=False, inplace=True)

# Display the top rules based on lift
rules

Unnamed: 0,Antecedent,Consequent,Support,Confidence,Lift
2951563,participant_type_Athlete,event_gender_Men,0.471707,0.676239,1.053375
2951562,event_gender_Men,participant_type_Athlete,0.471707,0.734776,1.053375
2952734,medal_type_BRONZE,participant_type_Athlete,0.246423,0.710162,1.018089
2952735,participant_type_Athlete,medal_type_BRONZE,0.246423,0.353272,1.018089
2951556,event_gender_Men,medal_type_BRONZE,0.224638,0.349917,1.008421
...,...,...,...,...,...
389552,slug_game_munich-1972,medal_type_BRONZE,0.010154,0.349206,1.006372
246174,discipline_title_Swimming,country_name_Australia,0.010154,0.124787,4.231051
246175,country_name_Australia,discipline_title_Swimming,0.010154,0.344288,4.231051
98886,discipline_title_Diving,event_gender_Women,0.010108,0.476087,1.631330
