In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import module_mma
import pandas as pd
import numpy as np
import networkx as nx
from node2vec import Node2Vec
from gensim.models import KeyedVectors

# viz
import umap
import holoviews as hv
import bokeh
hv.extension('bokeh')
from bokeh.io import export_png

In [139]:
# importing fight data
df_fights = pd.read_csv("data\data.csv")

# creating df_fighter with title_bout information
df = df_fights[["R_fighter","B_fighter", "title_bout"]]
df_fighters = pd.concat([df.loc[:,["R_fighter", "title_bout"]].rename(columns={"R_fighter":"fighter"}),
    df.loc[:,["B_fighter", "title_bout"]].rename(columns={"B_fighter":"fighter"})]).groupby("fighter").agg({"title_bout":lambda u : sum(u)>0}).reset_index().rename(columns={"title_bout":"had_title_bout"})
del df

# add weight-class information
df = pd.melt(df_fights[["R_fighter", "B_fighter", "weight_class"]],
                 id_vars="weight_class"
                 ).drop(columns="variable")
weight_classes = df.groupby("value").agg(pd.Series.mode).reset_index()
weight_classes["weight_class"] = weight_classes["weight_class"].apply(lambda u : "-".join(u) if isinstance(u,np.ndarray) else u)
df_fighters = df_fighters.merge(weight_classes.set_index("value"), how="left", left_on="fighter", right_index=True)

In [4]:
G = nx.from_edgelist(df_fights[["R_fighter","B_fighter"]].values)
nx.set_node_attributes(G, dict(df_fighters.values), "had_title_bout")

In [38]:
# Export
nx.write_gexf(G, "data/mma_fromnetworkx.gexf")

## Node2Vec

In [190]:
# parameters
DIM = 10
WINDOW = 16

In [191]:
node2vec = Node2Vec(G, dimensions=20, walk_length=20, num_walks=50, workers=4)
model = node2vec.fit(window=4, min_count=1)
node_embeddings = model.wv
# node_embeddings.save("node2vec.embeddings")
# node_embeddings = KeyedVectors.load("node2vec.embeddings")

Computing transition probabilities:   0%|          | 0/2139 [00:00<?, ?it/s]

In [192]:
# embeddings_2d = node_embeddings.vectors
reducer = umap.UMAP()
embeddings_2d = reducer.fit_transform(node_embeddings.vectors)

In [193]:
df = pd.DataFrame({"x":embeddings_2d[:,0],"y":embeddings_2d[:,1], "name":node_embeddings.index_to_key})
df = df.merge(df_fighters.set_index("fighter"), how="left", left_on="name", right_index=True)

In [194]:
points = hv.Points(df, kdims=["x","y"], vdims=["had_title_bout", "name", "weight_class"])
tooltips = [("Name","@name"), ("had_title_bout", "@had_title_bout"), ("weight_class", "@weight_class")]
hover = bokeh.models.HoverTool(tooltips=tooltips)
plot = points.opts(color="weight_class", cmap='Category20', tools=[hover], width=600, height=600, show_legend=False)
plot

In [195]:
export_png(hv.render(plot), filename=f"images/node2vec_embeddings_weight_class/DIM{DIM}_WINDOW{WINDOW}.png")

'c:\\Users\\leroy\\OneDrive\\Bureau\\K\\Apprentissage\\mma-graph\\images\\node2vec_embeddings_weight_class\\DIM10_WINDOW16.png'

It seems that learning embeddings with node2vec won't help for node classification since title contenders and others are confounded. This might be because node2vec captures the overall structure, that is to say weight classes because it is more prominent.

# <>