In [None]:
import os
from pprint import pprint
from itertools import accumulate

from dotenv import load_dotenv
from itables import show
import pymongo

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox

from PIL import Image

from maths import compute_ppmi

from mlxtend.preprocessing import TransactionEncoder

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [None]:
load_dotenv()

# for the actual crawler check https://github.com/TheRaphael0000/league_crawler_mongodb 
mongo_connection = {
    "host": os.getenv("MONGO_HOST"),
    "port": int(os.getenv("MONGO_PORT")),
    "username": os.getenv("MONGO_USERNAME"),
    "password": os.getenv("MONGO_PASSWORD"),
}

client = pymongo.MongoClient(**mongo_connection)
db = client.league
matches = db.matches

PATH_REG = {"$regex": r"^16\.1\..*$"}

In [None]:
print("matches count:", matches.count_documents({"info.gameVersion": PATH_REG, "info.platformId": "EUW1" }))

In [None]:
dataset = []

itemkey = [f"info.participants.item{i}" for i in range(0, 7)]

matches_ = matches.find({"info.gameVersion": PATH_REG}, {*itemkey}).to_list()
for m in matches_:
    m_items = m.get("info").get("participants")
    # print(m_items)
    for item_set in m_items:
        item_set_values = list(item_set.values())
        item_set_values_filtered = list(
            filter(lambda v: v != 0, item_set_values))
        
        if len(item_set_values_filtered) > 0: # ignore empty item sets just to a better hestimation of the itemsets count
            dataset.append(item_set_values_filtered)

print("item sets:", len(dataset))
# show(pd.DataFrame(dataset))
# print(pd.DataFrame(dataset))

In [None]:
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)

df = pd.DataFrame(te_ary, columns=te.columns_).astype(float)
co_occurrence_similarity = df.T.dot(df)

# compute ppmi to "ignore" frequency
ppmi_matrix = compute_ppmi(co_occurrence_similarity)

print(te.columns_)
print(ppmi_matrix.shape)

In [None]:
def compute_PCA(ppmi_matrix):
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(ppmi_matrix)

    pca = PCA(n_components=ppmi_matrix.shape[0])
    pca_results = pca.fit_transform(scaled_data)
    pca_df = pd.DataFrame(data=pca_results, index=ppmi_matrix.index)

    return pca, pca_df

def plot_scree_plot(pca):
    plt.figure()
    plt.title("Scree plot")
    plt.ylabel("Explained Variance Ratio")
    plt.xlabel("Componant number")
    plt.plot(range(len(pca.explained_variance_ratio_)), pca.explained_variance_ratio_)
    plt.show()


    plt.figure()
    plt.title("Cumulative Explained Variance")
    plt.ylabel("Cumulative Explained Variance")
    plt.xlabel("Componant number")
    plt.plot(range(len(pca.explained_variance_ratio_)), list(accumulate(pca.explained_variance_ratio_)))
    plt.show()

    for i, v in enumerate(pca.explained_variance_ratio_):
        print(f"PC{i} ({v:.2%} variance)")

    # print("Sum PCX", sum(pca.explained_variance_ratio_))

def plot_PCA(pca_df, title, figsize):
    plt.figure(figsize=figsize)
    plt.title(title)
    # still need to scatter the points even though we dont show them
    plt.scatter(pca_df[0], pca_df[1], s=0)

    ax = plt.gca()

    for i, txt in enumerate(pca_df.index):
        path = f"item/{txt}.png"

        img = Image.open(path)
        img = img.resize((64, 64))
        img.putalpha(170)
        img = np.asarray(img)

        imagebox = OffsetImage(img, zoom=1)

        x0, y0 = pca_df[0].iloc[i], pca_df[1].iloc[i]
        ab = AnnotationBbox(imagebox, (x0, y0), frameon=False)

        ax.add_artist(ab)
    plt.tight_layout()


In [None]:
# "scrapped" from lolshop.gg
# js query for each tab: console.log(String(Array(...document.querySelector(".Grid_grid__IyzWn").children).map(a => a.dataset.itemId)))

items_by_role = {
    "Fighter": [3078, 2501, 3074, 3748, 6333, 6631, 3026, 3053, 3153, 3156, 3161, 6609, 6610, 2517, 3071, 3073, 3181, 3302, 6694, 3004, 3742, 6692, 3091],
    "Marksman": [3031, 3072, 3036, 3026, 3139, 3153, 3032, 3156, 3033, 3124, 3302, 6672, 6673, 6676, 6694, 3004, 3115, 3508, 2523, 3091, 3087, 2512, 3046, 3085, 3094, 6675],
    "Assassin": [3074, 2520, 3026, 3156, 6609, 6610, 3071, 3146, 3814, 6676, 6694, 6697, 6699, 3004, 6698, 3142, 3179, 6696, 6701, 6695],
    "Mage": [3089, 3157, 4645, 2510, 2522, 4633, 3102, 3135, 3137, 3146, 4629, 6653, 3003, 3100, 3115, 3165, 2503, 4646, 6655, 3118, 4628, 3152, 3116, 6657, 3041, 3869, 3870, 3871, 3876, 3877],
    "Tank": [2501, 3748, 3053, 6665, 3083, 3084, 2504, 3742, 6662, 2502, 4401, 6664, 3065, 3068, 3143, 8020, 2525, 3110, 3075, 3002, 3119, 3121, 3109, 3050, 3190],
    "Support": [3165, 4401, 8020, 3110, 6621, 3075, 3002, 3107, 3109, 3222, 2526, 2530, 4005, 6616, 2065, 3050, 3190, 3504, 6617, 6620, 2524, 3869, 3870, 3871, 3876, 3877]
}

title_str = "PCA on items co-occurrence"
data_str = "Data: Patch 16.1.*, ~93k games, Ranked Solo/Duo Queue, EUW, Randomwalk"
source_str = "Data processing / graphics source code: https://github.com/TheRaphael0000/league_playground/blob/main/src/items_pca.ipynb"

for k, v in items_by_role.items():

    filtered_ppmi_matrix = ppmi_matrix.loc[items_by_role[k], items_by_role[k]]

    pca, pca_df = compute_PCA(filtered_ppmi_matrix)
    # plot_scree_plot(pca)
    plot_PCA(pca_df, f"{title_str}\n{k} items\n{data_str}\n{source_str}", (15, 12))
    plt.savefig(f"output/{k}.png")
    # plt.show()

pca, pca_df = compute_PCA(ppmi_matrix)
plot_PCA(pca_df, f"{title_str}\nAll items\n{data_str}\n{source_str}", (50, 40))
plt.savefig(f"output/all.png")
# plt.show()