# The Fairy Network!

In [99]:
import pandas as pd
import numpy as np
import itertools
from pyvis import network as net
from pyvis.network import Network
import networkx as nx
import igraph

Fortunately, because of the way the transcripts are written, every dialogue is written in a single line. Therefore, we can read the file line by line to get the dialogues. However, it is important to note that a single line may not be a dialogue but a scene description, or in other cases dialogues may have scene descrptions inside of them, meaning that just because a name is present in a line doesn't mean that somebody is referring to that character. (maybe strip those away?)

In [6]:
Transcripts = {}
for i in range(1,15):
    with open(f"Transcripts/Episode {i}.txt", "r", encoding= "ISO-8859-1") as file:
        try:
            Transcripts["Episode " + f"{i}"] = file.readlines()
        except UnicodeDecodeError:
            print(f"Decoding error in Episode {i}")


# Note: Episodes 13 and 14 couldn't be read due to decoding issues 
# under the standard encoding UTF-8. Encoding ISO-8859-1 which is also
# common seemed to work properly.
 

In [7]:
Transcripts["Episode 1"][0:5]

['(Bugle playing Reveille is heard; Timmy is asleep, snoring; camera points to Cosmo and Wanda in fish form)\n',
 'Wanda: Ready, Cosmo?\n',
 'Cosmo: Ready, Wanda.\n',
 'Cosmo and Wanda: 1, (turn to their fairy form) 2, 3!\n',
 'Cosmo: (flies near Timmy) Wakey-wakey, Timmy!\n']

## Creating the Network

### Nodes
First we need to find all the potential nodes for the network, and to do so we have to keep the following in mind:

 - There will be dialogues of unimportant characters which we don't want to take into account.
 - Some characters may have more than one way to refer to them (e.g. Mr. Turner is called "Dad" by Timmy)
 - Coupled with the previous one, there may be inconsistencies in the way characters are referred to 
by the writers between episodes.

In order to get all the possible candidates for nodes, notice that every line that corresponds to a dialogue starts with the name of the character that speaks, followed by a colon and then the actual dialogue. We will use this to find all characters that speak.

In [None]:
# Find the first instance of a colon (":") and everything that comes before that may be considered a potential node.

def Retrieve_speaker(dialogue):
    colon = dialogue.find(":")
    if colon != -1:
        character = dialogue[:colon]
    else:
        character = None
    return character

def Characters_in_episode(episode_num):
    episode = Transcripts["Episode " + str(episode_num)]
    characters = []
    for line in episode:
        speaker = Retrieve_speaker(line)
        if speaker not in characters and speaker!= None:
            characters.append(speaker)
    return characters

Characters_in_episode(11)
# We may be missing Chompy in this episode as well as Phillip in other ones.



In [12]:
def Possible_Characters():
    characters = []
    for i in range(1,15):
        characters += Characters_in_episode(i)
    characters = list(set(characters)) # drop duplicate instances of characters
    return characters
characters_clean = Possible_Characters()
len(characters_clean)

165

### Edges

The graph will be directed, and an edge will be considered from node A to node B when character A names character B in a dialogue.

In [94]:
#edges = []
edges_clean = pd.DataFrame({"Character A": [], "Character B": []})
for i in range(1, 15):
    edges_clean[f"Weight Ep. {i}"] = []



def Single_Edge(dialogue):
    char_A = Retrieve_speaker(dialogue)
    char_B = [] # There may be more than one character referenced to in the dialogue
    for character in characters_clean:
        if dialogue.find(character) != -1:
            char_B.append(character)

    try:
        char_B.remove(char_A)  # Self reference will not be considered
    except ValueError:
        pass

    if len(char_B) > 0 and char_A != None:
        char_B = list(itertools.product([char_A], char_B)) # make all directed pairs
    else:
        char_B = []
    return char_B


def Edges_episode(episode_num):
    edges_ep = []
    for dialogue in Transcripts[f"Episode {episode_num}"]:
        edges_ep += Single_Edge(dialogue)
    return edges_ep

for i in range(1,15):
    for edge in Edges_episode(i):
        # First, check using a boolean mask whether a particular connection 
        # is already on the edges dataframe
        mask = (edges_clean["Character A"] == edge[0]) & (edges_clean["Character B"] == edge[1])
        if not edges_clean[mask].any(axis = None):
            # If it isn't, then add it to the df
            new_row = pd.DataFrame([{ "Character A": edge[0], "Character B": edge[1],
                                      **{f"Weight Ep. {j}": 1 if j == i else 0 for j in range(1, 15)}}])
            edges_clean = pd.concat([edges_clean, new_row], ignore_index=True)
        else:
            # If it is, add to the weight of the particular episode
            edges_clean.loc[mask, f"Weight Ep. {i}"] += 1    


# def Edges():
#     edges = []
#     for i in range(1,15):
#         edges += Edges_episode(Transcripts[f"Episode {i}"])
#     return edges

In [95]:
edges_clean

Unnamed: 0,Character A,Character B,Weight Ep. 1,Weight Ep. 2,Weight Ep. 3,Weight Ep. 4,Weight Ep. 5,Weight Ep. 6,Weight Ep. 7,Weight Ep. 8,Weight Ep. 9,Weight Ep. 10,Weight Ep. 11,Weight Ep. 12,Weight Ep. 13,Weight Ep. 14
0,Wanda,Cosmo,3.0,5.0,2.0,1.0,0.0,0.0,0.0,1.0,1.0,10.0,2.0,2.0,0.0,0.0
1,Cosmo,Wanda,4.0,1.0,1.0,0.0,3.0,3.0,1.0,2.0,4.0,8.0,3.0,2.0,1.0,0.0
2,Cosmo and Wanda,Cosmo,4.0,1.0,0.0,0.0,1.0,2.0,3.0,3.0,0.0,1.0,0.0,0.0,1.0,1.0
3,Cosmo and Wanda,Wanda,4.0,1.0,0.0,0.0,1.0,2.0,3.0,3.0,0.0,1.0,0.0,0.0,1.0,1.0
4,Cosmo,Timmy,4.0,1.0,4.0,2.0,2.0,10.0,4.0,4.0,1.0,2.0,5.0,6.0,2.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,"[At the sunset, the golden text reads",Cosmo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
361,"[At the sunset, the golden text reads",The April Fool,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
362,"[At the sunset, the golden text reads",Baby New Year,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
363,"[At the sunset, the golden text reads",Timmy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [102]:
G = nx.from_pandas_edgelist(edges_clean, "Character A", "Character B", "Weight Ep. 1")
nt = net.Network(notebook= True, cdn_resources='remote')
nt.from_nx(G)

nt.show("Fairy Network ep 1.html")

Fairy Network ep 1.html
