In [1]:
import polars as pl
import mlxtend.frequent_patterns as fp
import mlxtend.preprocessing as pp
import pickle
import pandas as pd
from efficient_apriori import apriori as apriori_efficient
import numpy as np
import pyarrow as pa
from sklearn.model_selection import KFold
from joblib import Parallel, delayed

In [2]:
data = pl.read_csv(raw_data_path / 'bgg-26m-reviews.csv')
print(data.head())

shape: (5, 6)
┌─────┬─────────────┬────────┬─────────────────────────────────┬─────┬───────┐
│     ┆ user        ┆ rating ┆ comment                         ┆ ID  ┆ name  │
│ --- ┆ ---         ┆ ---    ┆ ---                             ┆ --- ┆ ---   │
│ i64 ┆ str         ┆ f64    ┆ str                             ┆ i64 ┆ str   │
╞═════╪═════════════╪════════╪═════════════════════════════════╪═════╪═══════╡
│ 0   ┆ sidehacker  ┆ 10.0   ┆ null                            ┆ 13  ┆ CATAN │
│ 1   ┆ Varthlokkur ┆ 10.0   ┆ null                            ┆ 13  ┆ CATAN │
│ 2   ┆ dougthonus  ┆ 10.0   ┆ Currently, this sits on my lis… ┆ 13  ┆ CATAN │
│ 3   ┆ cypar7      ┆ 10.0   ┆ I know it says how many plays,… ┆ 13  ┆ CATAN │
│ 4   ┆ ssmooth     ┆ 10.0   ┆ null                            ┆ 13  ┆ CATAN │
└─────┴─────────────┴────────┴─────────────────────────────────┴─────┴───────┘


## Filtering

In [3]:
data = data.filter(pl.col('rating') >= 8)
print(data.head())

shape: (5, 6)
┌─────┬─────────────┬────────┬─────────────────────────────────┬─────┬───────┐
│     ┆ user        ┆ rating ┆ comment                         ┆ ID  ┆ name  │
│ --- ┆ ---         ┆ ---    ┆ ---                             ┆ --- ┆ ---   │
│ i64 ┆ str         ┆ f64    ┆ str                             ┆ i64 ┆ str   │
╞═════╪═════════════╪════════╪═════════════════════════════════╪═════╪═══════╡
│ 0   ┆ sidehacker  ┆ 10.0   ┆ null                            ┆ 13  ┆ CATAN │
│ 1   ┆ Varthlokkur ┆ 10.0   ┆ null                            ┆ 13  ┆ CATAN │
│ 2   ┆ dougthonus  ┆ 10.0   ┆ Currently, this sits on my lis… ┆ 13  ┆ CATAN │
│ 3   ┆ cypar7      ┆ 10.0   ┆ I know it says how many plays,… ┆ 13  ┆ CATAN │
│ 4   ┆ ssmooth     ┆ 10.0   ┆ null                            ┆ 13  ┆ CATAN │
└─────┴─────────────┴────────┴─────────────────────────────────┴─────┴───────┘


## Grouping into baskets

In [4]:
baskets = (
    data
    .group_by(['user'])
    .agg(
        pl.col('name').alias('games')
    )
)

### Training and test split

In [5]:
# might be worth trying a 30/70 split and compare the performance
baskets_train = baskets.head(int(0.95 * len(baskets)))
baskets_test = baskets.tail(len(baskets) - int(0.95 * len(baskets)))
baskets_test

user,games
str,list[str]
"""MReddi""","[""Terraforming Mars""]"
"""Cybajoe27""","[""Pandemic"", ""Puerto Rico"", … ""Wallenstein (Second Edition)""]"
"""zephyr_warbler""","[""Ticket to Ride"", ""Agricola"", … ""Cathedral""]"
"""chubs25991""","[""Patchwork"", ""Scythe"", … ""Monopoly Free Parking Mini Game""]"
"""MauriceDelTaco""","[""Carcassonne"", ""Azul"", … ""Silver Bullet""]"
…,…
"""qbie""","[""Codenames"", ""Takenoko"", … ""Grove""]"
"""Rumples""","[""Spyfall""]"
"""Jmcontre8""","[""Terraforming Mars"", ""Blood Rage"", … ""Betrayal: Deck of Lost Souls""]"
"""jacxel""","[""Cosmic Encounter""]"


In [6]:
baskets_test = baskets_test.filter(pl.col('games').list.len() >= 3)
baskets_test = baskets_test.to_pandas()

baskets_test['test'] = baskets_test['games'].apply(lambda x: x[0:int(np.ceil(0.3*len(x)))])
baskets_test['games'] = baskets_test['games'].apply(lambda x: x[int(np.ceil(0.3*len(x))):])

In [7]:
baskets_test_training = baskets_test[['user', 'games']]

In [8]:
baskets_train = baskets_train.to_pandas()
baskets_train = pd.concat([baskets_train, baskets_test_training], ignore_index=True)

In [9]:
baskets_train.shape

(528626, 2)

## Getting frequent itemsets

In [10]:
baskets_train_as_tuples = [tuple(row) for row in baskets_train['games']]
itemsets, rules = apriori_efficient(baskets_train_as_tuples, min_support=0.01, min_confidence=0.7)

In [11]:
pickle.dump((itemsets, rules), open(generated_data_path / 'freq_itemsets_approach2_0.005_0.8.pkl', 'wb'))

In [12]:
itemsets, rules = pickle.load(open(generated_data_path / 'freq_itemsets_approach2_0.005_0.8.pkl', 'rb'))

In [13]:
len_transactions = len(baskets_train_as_tuples)
itemsets_flattened = []
supports = []
for i in itemsets.keys():
    itemsets_flattened.extend([item[0] for item in itemsets[i].items()])
    supports.extend([item[1]/len_transactions for item in itemsets[i].items()])

In [14]:
itemsets_dict = {
    'support': supports,
    'itemsets': itemsets_flattened
}
itemsets_df = pd.DataFrame(itemsets_dict)
itemsets_df.head()

Unnamed: 0,support,itemsets
0,0.083216,"(Agricola,)"
1,0.020491,"(It's a Wonderful World,)"
2,0.0137,"(Legends of Andor,)"
3,0.072189,"(Ticket to Ride,)"
4,0.085792,"(CATAN,)"


## Association rules

In [15]:
rules_df = fp.association_rules(itemsets_df, metric="confidence", min_threshold=0.6)
rules_df.tail()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
428,"(Wingspan, Lost Ruins of Arnak, Scythe)",(Terraforming Mars),0.014521,0.142696,0.010594,0.729547,5.112581,1.0,0.008521,3.169876,0.816257,0.07225,0.68453,0.401892
429,"(Wingspan, The Castles of Burgundy, Scythe)",(Terraforming Mars),0.014,0.142696,0.010629,0.759222,5.320541,1.0,0.008632,3.560552,0.82358,0.072771,0.719145,0.416856
430,"(Terraforming Mars, Viticulture Essential Edit...",(Scythe),0.01858,0.110014,0.011829,0.636632,5.786854,1.0,0.009785,2.449271,0.842855,0.101304,0.591715,0.372077
431,"(Terraforming Mars, Viticulture Essential Edit...",(Wingspan),0.018959,0.121619,0.011829,0.623927,5.130177,1.0,0.009523,2.335668,0.820633,0.091875,0.571857,0.360594
432,"(Viticulture Essential Edition, Wingspan, Scythe)",(Terraforming Mars),0.016834,0.142696,0.011829,0.702663,4.924185,1.0,0.009427,2.883275,0.810566,0.080086,0.653172,0.392779


In [66]:
rules_df.to_csv(generated_data_path / "rules.csv", index=True)

## Recommendation function

In [16]:
def recommender_association(rules_df, product_list, N=1):
    candidate_rules = rules_df[rules_df['antecedents'].apply(lambda x: x.issubset(product_list))]
    candidate_rules = candidate_rules.sort_values("confidence")
    recommendation_list = []
    for i in range(len(candidate_rules)):
        for item in candidate_rules.iloc[i]['consequents']:
            if item not in product_list and item not in recommendation_list:
                recommendation_list.append(item)
            if len(recommendation_list) >= N:
                break
        if len(recommendation_list) >= N:
            break

    return recommendation_list

## Evaluation

In [17]:
predictions = {
    'user': [],
    'recommended_items': [],
    'true_items': []
}

for index, row in baskets_test.iterrows():
    user = row['user']
    train_items = row['games']
    test_items = row['test']
    recommended_items = recommender_association(rules_df, train_items, N=5)

    predictions['user'].append(user)
    predictions['recommended_items'].append(recommended_items)
    predictions['true_items'].append(test_items)
    
predictions_df = pd.DataFrame(predictions)
predictions_df.head()

Unnamed: 0,user,recommended_items,true_items
0,Cybajoe27,[],"[Pandemic, Puerto Rico, Stone Age, Terra Mystica]"
1,zephyr_warbler,[],"[Ticket to Ride, Agricola]"
2,chubs25991,[],"[Patchwork, Scythe, Ticket to Ride: Europe, Te..."
3,MauriceDelTaco,[],"[Carcassonne, Azul, Dominion, Scythe, The Cast..."
4,dkoons927,[],"[Advanced Squad Leader, Strat-O-Matic Baseball]"


In [18]:
predictions_df.to_csv(generated_data_path / 'freq_itemsets_pred_df.csv')

#pickle.dump(predictions_df, open('data/freq_itemsets_pred_df.pkl', 'wb'))

In [19]:
predictions_df = pd.read_csv(generated_data_path / 'freq_itemsets_pred_df.csv')

#predictions_df = pickle.load(open('data/freq_itemsets_pred_df.pkl', 'rb'))

In [20]:
predictions_df.sample(20)

Unnamed: 0,user,recommended_items,true_items
2981,captainrobthepirate,[],"[Terraforming Mars, Exploding Kittens]"
18610,KLittle,[],"[Wingspan, Root]"
6785,Gaming w Sidekicks,[],[Blank Slate]
4515,wignall,[],"[King of Tokyo, Love Letter, Ticket to Ride, C..."
18478,bobxx21,[],"[7 Wonders Duel, Scythe, Pandemic Legacy: Seas..."
12875,huanpishi,[],"[Scythe, Twilight Struggle]"
12026,d925580,[],"[Puerto Rico, Small World, Splendor, Patchwork..."
1489,R3x21,[],"[Azul, Carcassonne, 7 Wonders Duel, Splendor, ..."
18598,flimsy25,[Terraforming Mars],"[King of Tokyo, 7 Wonders Duel, Carcassonne, T..."
1622,CIE6E,[Dominion],"[Scythe, Codenames, Dominion, Pandemic, Root, ..."


In [21]:
# instead of jaccard similarity, we'll use similiary measure which we talked about -- this will in turn, give a more generous evaluation of the recommender

## Precision
def precision_at_k(recommended_items, true_items, k):
    recommended_at_k = recommended_items if len(recommended_items) < k else recommended_items[:k]
    true_positives = len(set(recommended_at_k) & set(true_items))
    precision = true_positives / (len(recommended_at_k) if len(recommended_at_k) > 0 else 1)
    return precision

precisions = []
for i in range(len(predictions_df)):
    precisions.append(precision_at_k(predictions_df['recommended_items'][i], predictions_df['true_items'][i], k=5))

In [22]:
np.mean(precisions)

0.13295509415741188

In [23]:
## Recall
def recall_at_k(recommended_items, true_items, k):
    recommended_at_k = recommended_items if len(recommended_items) < k else recommended_items[:k]
    true_positives = len(set(recommended_at_k) & set(true_items))
    recall = true_positives / (len(true_items) if len(true_items) > 0 else 1)
    return recall

recalls = []
for i in range(len(predictions_df)):
    recalls.append(recall_at_k(predictions_df['recommended_items'][i], predictions_df['true_items'][i], k=5))

In [24]:
np.mean(recalls)

0.02594969010703004

## Tuning

In [25]:
def get_rules_df(baskets_tuples, support, confidence):
    itemsets, rules = apriori_efficient(baskets_tuples, min_support=support, min_confidence=confidence)

    len_transactions = len(baskets_tuples)
    itemsets_flattened = []
    itemsets_supports = []
    for i in itemsets.keys():
        itemsets_flattened.extend([item[0] for item in itemsets[i].items()])
        itemsets_supports.extend([item[1]/len_transactions for item in itemsets[i].items()])
    
    itemsets_dict = {
        'support': itemsets_supports,
        'itemsets': itemsets_flattened
    }
    itemsets_df = pd.DataFrame(itemsets_dict)

    rules = fp.association_rules(itemsets_df, metric="confidence", min_threshold=confidence)
    
    return rules

In [26]:
def process_row(row):
    user = row['user']
    train_items = row['games']
    test_items = row['test']
    recommended_items = recommender_association(rules, train_items, N=10)
    return user, recommended_items, test_items

def get_predictions_from_rules(rules_df, baskets_test):
    predictions = {
        'user': [],
        'recommended_items': [],
        'true_items': []
    }

    results = Parallel(n_jobs=-1, backend='loky')(
        delayed(process_row)(row) for _, row in baskets_test.iterrows()
    )   

    for user, recommended_items, true_items in results:
        predictions['user'].append(user)
        predictions['recommended_items'].append(recommended_items)
        predictions['true_items'].append(true_items)
        
    predictions_df = pd.DataFrame(predictions)
    return predictions_df

In [27]:
def evaluate_model(predictions_df, k):
    precisions = []
    recalls = []
    for i in range(len(predictions_df)):
        precisions.append(precision_at_k(predictions_df['recommended_items'][i], predictions_df['true_items'][i], k=k))
        recalls.append(recall_at_k(predictions_df['recommended_items'][i], predictions_df['true_items'][i], k=k))

    return np.mean(precisions), np.mean(recalls)

In [28]:
# grid search here is fine, but I do wonder if we could do better with a more systematic approach (would have to think about it)
# baskets_train_as_tuples = [tuple(row) for row in baskets_train['games']]
# supports = [0.01, 0.005]
# confidences = [0.6, 0.7, 0.8]
# ks = np.linspace(1, 10, 10, dtype=int)

# evaluation_results = {
#     'support': [],
#     'confidence': [],
#     'k': [],
#     'precision': [],
#     'recall': []
# }

# for support in supports:
#     for confidence in confidences:
#         rules_df = get_rules_df(baskets_train_as_tuples, support, confidence)
#         pickle.dump(rules_df, open(f'data/freq_itemsets_rules_s{support}_c{confidence}.pkl', 'wb'))
#         predictions_df = get_predictions_from_rules(rules_df, baskets_test)
#         pickle.dump(predictions_df, open(f'data/freq_itemsets_pred_s{support}_c{confidence}.pkl', 'wb'))
#         for k in ks:
#             precision, recall = evaluate_model(predictions_df, k)
#             print(f"Support: {support}, Confidence: {confidence}, K: {k} => Precision: {precision}, Recall: {recall}")
#             evaluation_results['support'].append(support)
#             evaluation_results['confidence'].append(confidence)
#             evaluation_results['k'].append(k)
#             evaluation_results['precision'].append(precision)
#             evaluation_results['recall'].append(recall)
           
# evaluation_results_df = pd.DataFrame(evaluation_results)
# evaluation_results_df

## Cross-validation

In [33]:
def get_test_and_train_set(baskets_test_before, baskets_train_before):
    baskets_test = baskets_test_before[baskets_test_before['games'].apply(len) >= 3]

    baskets_test['test'] = baskets_test['games'].apply(lambda x: x[0:int(np.ceil(0.3*len(x)))])
    baskets_test['games'] = baskets_test['games'].apply(lambda x: x[int(np.ceil(0.3*len(x))):])

    baskets_test_training = baskets_test[['user', 'games']]
    baskets_train = pd.concat([baskets_train_before, baskets_test_training], ignore_index=True)

    return baskets_test, baskets_train

baskets_df = baskets.to_pandas()
K=10
folds = KFold(n_splits=K, shuffle=True, random_state=42)
support = 0.01
confidence = 0.7
ks = np.linspace(1, 10, 5, dtype=int)

CV_results = {
    'support': [],
    'confidence': [],
    'k': [],
    'precision': [],
    'recall': []
}

for train_index, test_index in folds.split(baskets_df):
    baskets_train = baskets_df.iloc[train_index]
    baskets_test = baskets_df.iloc[test_index]
    baskets_test, baskets_train = get_test_and_train_set(baskets_test, baskets_train)

    baskets_train_as_tuples = [tuple(row) for row in baskets_train['games']]

    rules_df = get_rules_df(baskets_train_as_tuples, support, confidence)
    predictions_df = get_predictions_from_rules(rules_df, baskets_test)
    for k in ks:
        precision, recall = evaluate_model(predictions_df, k)
        print(f"Support: {support}, Confidence: {confidence}, K: {k} => Precision: {precision}, Recall: {recall}")
        CV_results['support'].append(support)
        CV_results['confidence'].append(confidence)
        CV_results['k'].append(k)
        CV_results['precision'].append(precision)
        CV_results['recall'].append(recall)

CV_results_df = pd.DataFrame(CV_results)
CV_results_df
    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['test'] = baskets_test['games'].apply(lambda x: x[0:int(np.ceil(0.3*len(x)))])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['games'] = baskets_test['games'].apply(lambda x: x[int(np.ceil(0.3*len(x))):])


Support: 0.01, Confidence: 0.7, K: 1 => Precision: 0.12505376344086022, Recall: 0.016644794168239788
Support: 0.01, Confidence: 0.7, K: 3 => Precision: 0.12905017921146952, Recall: 0.024489373635125686
Support: 0.01, Confidence: 0.7, K: 5 => Precision: 0.12928539426523297, Recall: 0.025144017958537303
Support: 0.01, Confidence: 0.7, K: 7 => Precision: 0.12930677163338453, Recall: 0.02518249795886589
Support: 0.01, Confidence: 0.7, K: 10 => Precision: 0.12930773169482848, Recall: 0.025183991387778677


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['test'] = baskets_test['games'].apply(lambda x: x[0:int(np.ceil(0.3*len(x)))])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['games'] = baskets_test['games'].apply(lambda x: x[int(np.ceil(0.3*len(x))):])


Support: 0.01, Confidence: 0.7, K: 1 => Precision: 0.12578803539056793, Recall: 0.016926388001897535
Support: 0.01, Confidence: 0.7, K: 3 => Precision: 0.12875077788901215, Recall: 0.024563116454971733
Support: 0.01, Confidence: 0.7, K: 5 => Precision: 0.12911559656195604, Recall: 0.025329859509256984
Support: 0.01, Confidence: 0.7, K: 7 => Precision: 0.12913260382483036, Recall: 0.025358312018502575
Support: 0.01, Confidence: 0.7, K: 10 => Precision: 0.12913260382483036, Recall: 0.025358312018502575


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['test'] = baskets_test['games'].apply(lambda x: x[0:int(np.ceil(0.3*len(x)))])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['games'] = baskets_test['games'].apply(lambda x: x[int(np.ceil(0.3*len(x))):])


Support: 0.01, Confidence: 0.7, K: 1 => Precision: 0.12401664652973554, Recall: 0.01631186007571569
Support: 0.01, Confidence: 0.7, K: 3 => Precision: 0.12737727659193626, Recall: 0.02384592447259017
Support: 0.01, Confidence: 0.7, K: 5 => Precision: 0.12760594263212063, Recall: 0.024546553525503792
Support: 0.01, Confidence: 0.7, K: 7 => Precision: 0.1276092668239265, Recall: 0.024572583640211965
Support: 0.01, Confidence: 0.7, K: 10 => Precision: 0.1276092668239265, Recall: 0.024572583640211965


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['test'] = baskets_test['games'].apply(lambda x: x[0:int(np.ceil(0.3*len(x)))])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['games'] = baskets_test['games'].apply(lambda x: x[int(np.ceil(0.3*len(x))):])


Support: 0.01, Confidence: 0.7, K: 1 => Precision: 0.1264791308089501, Recall: 0.016431153958315344
Support: 0.01, Confidence: 0.7, K: 3 => Precision: 0.12974218301778542, Recall: 0.02401375813519233
Support: 0.01, Confidence: 0.7, K: 5 => Precision: 0.1300299411933448, Recall: 0.024720666138911086
Support: 0.01, Confidence: 0.7, K: 7 => Precision: 0.1300433878370625, Recall: 0.024751149319469333
Support: 0.01, Confidence: 0.7, K: 10 => Precision: 0.1300433878370625, Recall: 0.024753218033887445


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['test'] = baskets_test['games'].apply(lambda x: x[0:int(np.ceil(0.3*len(x)))])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['games'] = baskets_test['games'].apply(lambda x: x[int(np.ceil(0.3*len(x))):])


Support: 0.01, Confidence: 0.7, K: 1 => Precision: 0.12556366759716556, Recall: 0.016770639539493582
Support: 0.01, Confidence: 0.7, K: 3 => Precision: 0.1283954262400687, Recall: 0.02444149578562514
Support: 0.01, Confidence: 0.7, K: 5 => Precision: 0.12864639252737814, Recall: 0.02512343389585524
Support: 0.01, Confidence: 0.7, K: 7 => Precision: 0.1286477985009765, Recall: 0.025155178948872037
Support: 0.01, Confidence: 0.7, K: 10 => Precision: 0.12864827781015775, Recall: 0.025157619068340287


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['test'] = baskets_test['games'].apply(lambda x: x[0:int(np.ceil(0.3*len(x)))])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['games'] = baskets_test['games'].apply(lambda x: x[int(np.ceil(0.3*len(x))):])


Support: 0.01, Confidence: 0.7, K: 1 => Precision: 0.12391876645355397, Recall: 0.01631438231230953
Support: 0.01, Confidence: 0.7, K: 3 => Precision: 0.12746019806944967, Recall: 0.024078544623088144
Support: 0.01, Confidence: 0.7, K: 5 => Precision: 0.12758645391214027, Recall: 0.024681262521503315
Support: 0.01, Confidence: 0.7, K: 7 => Precision: 0.12760525797381755, Recall: 0.024708694993329228
Support: 0.01, Confidence: 0.7, K: 10 => Precision: 0.12760525797381755, Recall: 0.024708694993329228


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['test'] = baskets_test['games'].apply(lambda x: x[0:int(np.ceil(0.3*len(x)))])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['games'] = baskets_test['games'].apply(lambda x: x[int(np.ceil(0.3*len(x))):])


Support: 0.01, Confidence: 0.7, K: 1 => Precision: 0.12639124693454065, Recall: 0.016962656107726272
Support: 0.01, Confidence: 0.7, K: 3 => Precision: 0.1295757314433036, Recall: 0.02471335652896856
Support: 0.01, Confidence: 0.7, K: 5 => Precision: 0.1298515104966718, Recall: 0.025404967617281754
Support: 0.01, Confidence: 0.7, K: 7 => Precision: 0.12985189548371698, Recall: 0.025415946651795862
Support: 0.01, Confidence: 0.7, K: 10 => Precision: 0.12985189548371698, Recall: 0.025415946651795862


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['test'] = baskets_test['games'].apply(lambda x: x[0:int(np.ceil(0.3*len(x)))])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['games'] = baskets_test['games'].apply(lambda x: x[int(np.ceil(0.3*len(x))):])


Support: 0.01, Confidence: 0.7, K: 1 => Precision: 0.12438077386530995, Recall: 0.016489750027002925
Support: 0.01, Confidence: 0.7, K: 3 => Precision: 0.12882581336189583, Recall: 0.02460939548769188
Support: 0.01, Confidence: 0.7, K: 5 => Precision: 0.1290801981523631, Recall: 0.02522347977137074
Support: 0.01, Confidence: 0.7, K: 7 => Precision: 0.12909116411325544, Recall: 0.025247441730922675
Support: 0.01, Confidence: 0.7, K: 10 => Precision: 0.12909116411325544, Recall: 0.025247441730922675


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['test'] = baskets_test['games'].apply(lambda x: x[0:int(np.ceil(0.3*len(x)))])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['games'] = baskets_test['games'].apply(lambda x: x[int(np.ceil(0.3*len(x))):])


Support: 0.01, Confidence: 0.7, K: 1 => Precision: 0.1276280722534794, Recall: 0.016705667452153874
Support: 0.01, Confidence: 0.7, K: 3 => Precision: 0.1303335397205696, Recall: 0.024309996512759443
Support: 0.01, Confidence: 0.7, K: 5 => Precision: 0.130452885383297, Recall: 0.02493542348008114
Support: 0.01, Confidence: 0.7, K: 7 => Precision: 0.13046993476368662, Recall: 0.02496724661141787
Support: 0.01, Confidence: 0.7, K: 10 => Precision: 0.13046993476368662, Recall: 0.02496724661141787


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['test'] = baskets_test['games'].apply(lambda x: x[0:int(np.ceil(0.3*len(x)))])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baskets_test['games'] = baskets_test['games'].apply(lambda x: x[int(np.ceil(0.3*len(x))):])


Support: 0.01, Confidence: 0.7, K: 1 => Precision: 0.12452535480569843, Recall: 0.01653629097411894
Support: 0.01, Confidence: 0.7, K: 3 => Precision: 0.1282731442831623, Recall: 0.024226936620371394
Support: 0.01, Confidence: 0.7, K: 5 => Precision: 0.1285648883742224, Recall: 0.024968903954694664
Support: 0.01, Confidence: 0.7, K: 7 => Precision: 0.12858450896584095, Recall: 0.02500868887521236
Support: 0.01, Confidence: 0.7, K: 10 => Precision: 0.12858450896584095, Recall: 0.02500868887521236


Unnamed: 0,support,confidence,k,precision,recall
0,0.01,0.7,1,0.125054,0.016645
1,0.01,0.7,3,0.12905,0.024489
2,0.01,0.7,5,0.129285,0.025144
3,0.01,0.7,7,0.129307,0.025182
4,0.01,0.7,10,0.129308,0.025184
5,0.01,0.7,1,0.125788,0.016926
6,0.01,0.7,3,0.128751,0.024563
7,0.01,0.7,5,0.129116,0.02533
8,0.01,0.7,7,0.129133,0.025358
9,0.01,0.7,10,0.129133,0.025358
