In [1]:
import pandas as pd
import numpy as np
import functions as f
import networkx as nx
import matplotlib.pyplot as plt

from bokeh.io import show, output_file
from bokeh.models import Plot, Range1d, MultiLine, Circle, HoverTool, TapTool, BoxSelectTool, BoxZoomTool, ResetTool
from bokeh.models.graphs import from_networkx, NodesAndLinkedEdges, EdgesAndLinkedNodes
from bokeh.palettes import Spectral4
from bokeh.plotting import gridplot

### Getting data from website (residentevildatabase.com)

In [3]:
df = f.get_all_personas()

### Transforming data to transacional dataset

In [16]:
def sort_names( row ):
    'Method to drop same relationship, i.e., persona_1 -> persona_2, persona_2 -> persona_1'
    names = [ row["persona_1"], row["persona_2"] ]
    names.sort()
    names = ", ".join(names)
    return names

def fix_persona( person_name ):
    name = person_name.split("-")
    name = " ".join( [i[0].upper() + i[1:] for i in name  ] )
    return name

def plot_df(df, persona=None):
    G = nx.from_pandas_edgelist(df=df, source="persona_1", target="persona_2",
                                edge_attr=["persona_1", "persona_2", "first_game", "last_game", 'weight'],
                                create_using=nx.MultiGraph())

    plot = Plot(plot_width=1300, plot_height=1300,
                x_range=Range1d(-1.1,1.1), y_range=Range1d(-1.1,1.1))

    if persona:
        plot.title.text = "Resident Evil Graph - " + persona
    else:
        plot.title.text = "Resident Evil Graph"
    
    plot.add_tools(HoverTool(tooltips=[("Persona 1", "@persona_1"),
                                       ("Persona 2", "@persona_2"),
                                       ("First Game", "@first_game"),
                                       ("Last Game", "@last_game"),
                                       ("Interactions", "@weight")]), TapTool(), BoxSelectTool(), BoxZoomTool(), ResetTool())

    graph_renderer = from_networkx(G, nx.spring_layout, scale=1, center=(0,0))

    graph_renderer.node_renderer.glyph = Circle(size=15, fill_color=Spectral4[0])
    graph_renderer.node_renderer.selection_glyph = Circle(size=15, fill_color=Spectral4[2])
    graph_renderer.node_renderer.hover_glyph = Circle(size=15, fill_color=Spectral4[1])

    graph_renderer.edge_renderer.glyph = MultiLine(line_color="#CCCCCC", line_alpha=0.8, line_width=5)
    graph_renderer.edge_renderer.selection_glyph = MultiLine(line_color=Spectral4[2], line_width=5)
    graph_renderer.edge_renderer.hover_glyph = MultiLine(line_color=Spectral4[1], line_width=5)

    graph_renderer.selection_policy = NodesAndLinkedEdges()
    graph_renderer.inspection_policy = EdgesAndLinkedNodes()

    plot.renderers.append( graph_renderer )

    if persona:
        output_file( persona + "network.html" )
    else:
        output_file( "graph.html" )
    return G,plot

def plot_persona_network(df, persona):
    personas = np.append( df["persona_2"][ df["persona_1"] == persona ].unique() , df["persona_1"][ df["persona_2"] == persona ].unique() )
    personas = np.unique( personas )
    df_new = df[ (df[ 'persona_1' ].isin( personas )) & (df[ 'persona_2' ].isin( personas )) ].copy()
    
    G, plot = plot_df(df_new, persona)
    
    return G, plot

In [22]:
df_filter = df[df["apparition"]!='Biohazard - Resident Evil: Operation Raccoon City']

df_analysis = pd.merge( left=df_filter, right=df_filter,
                        how="left", on="apparition",
                        suffixes=("_1", "_2") )

df_analysis = df_analysis[ df_analysis["persona_1"] != df_analysis["persona_2"] ]

df_analysis["names"] = df_analysis.apply(sort_names, axis=1)
df_analysis = df_analysis.drop_duplicates(subset=["names", "apparition"])

df_analysis = df_analysis[["persona_1", "persona_2", "apparition"]]

df_analysis["persona_1"] = df_analysis["persona_1"].apply( fix_persona )
df_analysis["persona_2"] = df_analysis["persona_2"].apply( fix_persona )

group = df_analysis.groupby(by=["persona_1", "persona_2"])

df_1 = group["apparition"].count().reset_index()
df_1.rename(columns={"apparition":"weight"}, inplace=True)

df_2 = group["apparition"].apply( lambda x: [i for i in x][0]  ).reset_index()
df_2.rename(columns={"apparition":"first_game"}, inplace=True)

df_3 = group["apparition"].apply( lambda x: [i for i in x][-1]  ).reset_index()
df_3.rename(columns={"apparition":"last_game"}, inplace=True)

df_analysis = pd.merge(left=df_1, right=df_2, how='inner')
df_analysis = pd.merge(left=df_analysis, right=df_3, how='inner')

df_analysis.head()

Unnamed: 0,persona_1,persona_2,weight,first_game,last_game
0,Ada Wong,Adam Benford,1,Biohazard - Resident Evil 6,Biohazard - Resident Evil 6
1,Ada Wong,Albert Wesker,1,Biohazard - Resident Evil 4,Biohazard - Resident Evil 4
2,Ada Wong,Alexander Ashford,1,Biohazard - Resident Evil: The Darkside Chroni...,Biohazard - Resident Evil: The Darkside Chroni...
3,Ada Wong,Alexander Buddy Kozachenko,1,Biohazard - Resident Evil: Damnation,Biohazard - Resident Evil: Damnation
4,Ada Wong,Alexia Ashford,1,Biohazard - Resident Evil: The Darkside Chroni...,Biohazard - Resident Evil: The Darkside Chroni...


In [23]:
best_persons = ['Jill Valentine', 'Ada Wong', 'Leon S Kennedy', 'Claire Redfield', 'Chris Redfield', 'Albert Wesker']

plots = [ plot_persona_network(df_analysis, i)[-1] for i in best_persons ]

p = gridplot( [ [plot_df(df_analysis)[-1]] + plots] )
show(p)