In [13]:
import polars as pl
import matplotlib.pyplot as plt
import networkx as nx
from pyvis.network import Network

In [69]:
challenge_df = pl.read_csv("./data/challenges.csv").filter(pl.col("challenge_is_successful") == True).select(['challenge_play_id', 'challenge_on_id']).unique()
challenge_df

challenge_play_id,challenge_on_id
i64,i64
33,29
127,126
154,146
229,228
253,251
…,…
828,824
870,864
1002,1001
1156,1155


In [70]:
play_df = pl.read_csv("./data/plays.csv", ignore_errors=True).select(['play_id','play_name','play_category'])
play_df

play_id,play_name,play_category
i64,str,str
1,"""Donna Summer""","""Seasons"""
2,"""Richard E. Gra…","""Verbs as surna…"
3,"""Gary Lineker""","""Difficult to s…"
4,"""Rod Hull""","""Long and thin …"
5,"""Neil Armstrong…","""Americans"""
…,…,…
1198,"""Eileen Drewery…","""Shown with Eng…"
1199,"""Kerem Topuz""","""Have met / are…"
1200,"""June Sarpong""","""Forenames shor…"
1201,"""Margaret Atwoo…","""Been to Canada…"


In [72]:
join_df = challenge_df.join(play_df, how="left", left_on="challenge_play_id", right_on="play_id")\
                        .drop(["challenge_play_id", "play_category"])\
                        .rename({"play_name": "challenged_name"})

join_df = join_df.join(play_df, how="left", left_on="challenge_on_id", right_on="play_id")\
                    .rename({"play_name": "category_name", "play_category": "challenge_category"})\
                    .drop(["challenge_on_id"])

join_df

challenged_name,category_name,challenge_category
str,str,str
"""Rob Lee""","""Steve Oram""","""Surnames conta…"
"""Darth Vader""","""James Bond""","""People in film…"
"""Steve Cram""","""Mark Watson""","""Left handers"""
"""Michaela Coel""","""Angela Merkel""","""Definite visit…"
"""Clive James""","""Tim Key""","""Russian speake…"
…,…,…
"""Carol Vorderma…","""Noel Gallagher…","""Christmas-rela…"
"""Cherie Blair""","""Hamburglar""","""Appearances in…"
"""Mike Bubbins""","""Hal Robson-Kan…","""Shown in red s…"
"""Sadio Mané""","""Mert Aksac""","""Initials share…"


In [73]:
df_pd = join_df#.to_pandas()

nt = Network('1000px', '100%', directed=True, cdn_resources="in_line")

sources = df_pd["category_name"]
dests = df_pd["challenged_name"]
categories = df_pd["challenge_category"]

edges = zip(sources, dests, categories)

for src, dst, cat in edges:
    nt.add_node(src, color='#ACDBC9',borderWidth=0)
    nt.add_node(dst, color='#ACDBC9',borderWidth=0)
    nt.add_edge(src, to=dst, title=cat, arrows={"to": {"enabled": True}})


nt.show('nx.html', notebook=False)

nx.html
