In [6]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from pathlib import Path
import numpy as np
# Load the dataset
notebook_dir = Path().resolve()
movilens = pd.read_csv( notebook_dir / 'movilens_dataset/movies.csv')

# Transform the dataset
movilens['genres'] = movilens['genres'].str.split('|')
te = TransactionEncoder()
te_ary = te.fit(movilens['genres']).transform(movilens['genres'])
movilens = pd.DataFrame(te_ary, columns=te.columns_).set_index(movilens['title'])

Calcular el soporte, confianza y lift de las siguientes reglas:  
- `Romance -> Drama`
- `Action, Adventure -> Thriller`
- `Crime, Action -> Thriller` 
- `Crime -> Action, Thriller`
- `Crime -> Children's`

In [18]:
# Supports
print("Confidences")
support_Ro_Dr = np.logical_and(movilens['Romance'], movilens['Drama']).mean()
print(f"Suppport (Romance -> Drama): {support_Ro_Dr}")
support_Ac_Ad_Th = np.logical_and(np.logical_and(movilens['Action'], movilens['Adventure']), movilens['Thriller']).mean()
print(f"Suppport (Action, Adventure -> Thriller): {support_Ac_Ad_Th}")
support_Cr_Ac_Th = np.logical_and(np.logical_and(movilens['Crime'], movilens['Action']), movilens['Thriller']).mean()
print(f"Suppport (Crime, Action -> Thriller): {support_Cr_Ac_Th}")
support_Cr_Ch = np.logical_and(movilens['Crime'], movilens['Children']).mean()
print(f"Suppport (Crime -> Children): {support_Cr_Ch}")

# Confidences
print("\nConfidences")
confidence_Ro_Dr = support_Ro_Dr / movilens['Romance'].mean()
print(f"Confidence (Romance -> Drama): {confidence_Ro_Dr}")
confidence_AcAd_Th = support_Ac_Ad_Th / np.logical_and(movilens['Action'], movilens['Adventure']).mean()
print(f"Confidence (Action, Adventure -> Thriller): {confidence_AcAd_Th}")
confidence_CrAc_Th = support_Cr_Ac_Th / np.logical_and(movilens['Crime'], movilens['Action']).mean()
print(f"Confidence (Crime, Action -> Thriller): {confidence_CrAc_Th}")
confidence_Cr_AcTh = support_Cr_Ac_Th / movilens['Crime'].mean()
print(f"Confidence (Crime -> Action, Thriller): {confidence_Cr_AcTh}")
confidence_Cr_Ch = support_Cr_Ch / movilens['Crime'].mean()
print(f"Confidence (Crime -> Children): {confidence_Cr_Ch}")

# Lifts
print("\nLifts")
lift_Ro_Dr = confidence_Ro_Dr / movilens['Drama'].mean()
print(f"Lift (Romance -> Drama): {lift_Ro_Dr}")
lift_AcAd_Th = confidence_AcAd_Th / movilens['Thriller'].mean()
print(f"Lift (Action, Adventure -> Thriller): {lift_AcAd_Th}")
lift_CrAc_Th = confidence_CrAc_Th / movilens['Thriller'].mean()
print(f"Lift (Crime, Action -> Thriller): {lift_CrAc_Th}")
lift_Cr_AcTh = confidence_Cr_AcTh / np.logical_and(movilens['Action'], movilens['Thriller']).mean()
print(f"Lift (Crime -> Action, Thriller): {lift_Cr_AcTh}")
lift_Cr_Ch = confidence_Cr_Ch / movilens['Children'].mean()
print(f"Lift (Crime -> Children): {lift_Cr_Ch}")

Confidences
Suppport (Romance -> Drama): 0.067854084603528
Suppport (Action, Adventure -> Thriller): 0.004350059941770851
Suppport (Crime, Action -> Thriller): 0.009624935776674087
Suppport (Crime -> Children): 0.0007535536907004624

Confidences
Confidence (Romance -> Drama): 0.5731507377760632
Confidence (Action, Adventure -> Thriller): 0.1814285714285714
Confidence (Crime, Action -> Thriller): 0.4596510359869138
Confidence (Crime -> Action, Thriller): 0.12084288990825687
Confidence (Crime -> Children): 0.009461009174311927

Lifts
Lift (Romance -> Drama): 1.4688926808519822
Lift (Action, Adventure -> Thriller): 1.3440261717475621
Lift (Crime, Action -> Thriller): 3.4051032721740544
Lift (Crime -> Action, Thriller): 3.713692811443747
Lift (Crime -> Children): 0.18332798418851995


In [51]:
def get_support(itemset: list, onehot_dataset: pd.DataFrame) -> float:
    logic_and_result = onehot_dataset[itemset[0]]
    for item in itemset[1:]:
        logic_and_result = np.logical_and(logic_and_result, onehot_dataset[item])
    return logic_and_result.mean()

In [52]:


def rule_metrics(antedecent: list, consequent: list, onehot_dataset: pd.DataFrame) -> dict[str, float | str]:
    itemset = antedecent + consequent
    support = get_support(itemset=itemset, onehot_dataset=onehot_dataset)
    confidence = support / get_support(itemset=antedecent, onehot_dataset=onehot_dataset)
    lift = confidence / get_support(itemset=consequent, onehot_dataset=onehot_dataset)
    
    metrics = {
        'rule': f"{antedecent} -> {consequent}",
        'support': support,
        'confidence': confidence,
        'lift': lift
    }
    return metrics

In [53]:
rule_metrics(['Romance', 'Thriller'], ['Drama'], movilens)

{'rule': "['Romance', 'Thriller'] -> ['Drama']",
 'support': 0.003756350973340184,
 'confidence': 0.5643224699828473,
 'lift': 1.4462672577453601}