In [1]:
import pandas as pd

In [2]:
data_dict = pd.read_csv('Harry_Potter_Movies/Data_Dictionary.csv')
data_dict

Unnamed: 0,Table,Field,Description
0,Movies,Movie ID,Unique identifier for each movie
1,,Movie Title,Full movie name
2,,Release Year,Year the movie was released in theaters
3,,Runtime,Length of the movie in minutes
4,,Budget,Budget for the movie is US Dollars
5,,Box Office,Box office revenue for the movie in US Dollars
6,Chapters,Chapter ID,Unique identifier for each chapter
7,,Chapter Name,Name of the chapter in the movie script
8,,Movie ID,Foreign key to match with Movies table
9,,Movie Chapter,Chapter number within each movie script


## Location Significance
This section explores the importance of various places / settings where the scenes are filmed/stage

**Data Needed**:

    - Places
    - Dialogue

In [3]:
places = pd.read_csv('Harry_Potter_Movies/Places.csv')
places_cat = list(set(places['Place Category']))

dialogue = pd.read_csv('Harry_Potter_Movies/Dialogue.csv', encoding='unicode_escape')
dialogue

Unnamed: 0,Dialogue ID,Chapter ID,Place ID,Character ID,Dialogue
0,1,1,8,4,I should have known that you would be here...P...
1,2,1,8,7,"Good evening, Professor Dumbledore. Are the ru..."
2,3,1,8,4,"I'm afraid so, Professor. The good, and the bad."
3,4,1,8,7,And the boy?
4,5,1,8,4,Hagrid is bringing him.
...,...,...,...,...,...
7439,7440,234,71,1,Then Slytherin House will have gained a wonder...
7440,7441,234,71,84,Really?
7441,7442,234,71,1,Really.
7442,7443,234,71,1,Ready?


In [4]:
# Merge on Place ID
dialogue_place = pd.merge(places, dialogue, on="Place ID")
dialogue_place

Unnamed: 0,Place ID,Place Name,Place Category,Dialogue ID,Chapter ID,Character ID,Dialogue
0,1,Flourish & Blotts,Diagon Alley,1040,39,5,"Yer a mess, Harry. Skulkin' 'round Knockturn A..."
1,1,Flourish & Blotts,Diagon Alley,1041,39,1,"I was lost, I- hang on. What were you doing do..."
2,1,Flourish & Blotts,Diagon Alley,1042,39,5,"Me? Oh, I was um I was lookin' for Flesh-Eatin..."
3,1,Flourish & Blotts,Diagon Alley,1043,39,3,Harry! Hagrid!
4,1,Flourish & Blotts,Diagon Alley,1044,39,5,"Hello, Hermione!"
...,...,...,...,...,...,...,...
7439,74,Unknown,Other Magical Locations,6930,214,2,'Fraid he's right. One problem: Snape's Headma...
7440,74,Unknown,Other Magical Locations,6931,215,1,We'll go to Hogsmeade. To Honeydukes. Take the...
7441,74,Unknown,Other Magical Locations,6932,215,1,"There's something wrong with him. In the past,..."
7442,74,Unknown,Other Magical Locations,6933,215,2,Maybe it's because of the Horcruxes. Maybe he'...


In [5]:
# Find the number of times a place name was featured, as well as the place category
placeNameFreq = dialogue_place.groupby(['Place Name'])['Dialogue'].count().reset_index()
placeNameFreq.rename({'Dialogue': 'Frequency'}, axis=1, inplace=True)
placeNameFreq.sort_values(['Frequency'], ascending=False)

Unnamed: 0,Place Name,Frequency
19,Great Hall,557
1,4 Privet Drive,347
22,Griffindor Common Room,343
37,Ministry of Magic,327
24,Hagrid's Hut,313
...,...,...
39,Ollivanders,10
31,Knockturn Alley,8
11,Detention Room,7
49,Restricted Section,7


## Character Relation
This section explores the importance and relationship between characters

Dataset Needed:

    - relations.csv
    - characters.csv

In [6]:
relations = pd.read_csv('Harry_Potter_Universe/relations.csv')
characters = pd.read_csv('Harry_Potter_Universe/characters.csv')

In [7]:
# Merge relations with character
character_dict = {}
for index, row in characters.iterrows():
    character_dict[row['id']] = row['name']
    
connections = relations.copy()
connections['source_name'] = connections['source'].apply(lambda x: character_dict[x])
connections['target_name'] = connections['target'].apply(lambda x: character_dict[x])
connections

Unnamed: 0,source,target,type,source_name,target_name
0,0,1,-,Regulus Arcturus Black,Sirius Black
1,0,25,-,Regulus Arcturus Black,Bellatrix Lestrange
2,0,45,-,Regulus Arcturus Black,Lord Voldemort
3,1,0,-,Sirius Black,Regulus Arcturus Black
4,1,11,+,Sirius Black,Albus Dumbledore
...,...,...,...,...,...
508,63,58,-,Aragog,Ron Weasley
509,64,21,+,Grawp,Hermione Granger
510,64,22,+,Grawp,Rubeus Hagrid
511,64,39,+,Grawp,Harry Potter


In [8]:
character_links = connections.groupby(['source_name'])['target_name'].apply(lambda x: ','.join(set(x))).reset_index()

# All characters per line
character_links['All'] = character_links['source_name'] + ',' + character_links['target_name']
character_links

Unnamed: 0,source_name,target_name,All
0,"Alastor ""Mad-Eye"" Moody","Molly Weasley,Bartemius ""Barty"" Crouch Jr.,Alb...","Alastor ""Mad-Eye"" Moody,Molly Weasley,Bartemiu..."
1,Alberforth Dumbledore,"Albus Dumbledore,Dobby,Ron Weasley,Hermione Gr...","Alberforth Dumbledore,Albus Dumbledore,Dobby,R..."
2,Albus Dumbledore,"Lily Potter,Sirius Black,Ginny Weasley,Bill We...","Albus Dumbledore,Lily Potter,Sirius Black,Ginn..."
3,Alice Longbottom,"Frank Longbottom,Neville Longbottom","Alice Longbottom,Frank Longbottom,Neville Long..."
4,Aragog,"Rubeus Hagrid,Ron Weasley,Hermione Granger,Har...","Aragog,Rubeus Hagrid,Ron Weasley,Hermione Gran..."
...,...,...,...
60,Vernon Dursley,"Albus Dumbledore,Dudley Dursley,Harry Potter,P...","Vernon Dursley,Albus Dumbledore,Dudley Dursley..."
61,Viktor Krum,"Igor Karkaroff,Fleur Delacour,Cedric Diggory,H...","Viktor Krum,Igor Karkaroff,Fleur Delacour,Cedr..."
62,Vincent Crabbe,"Dolores Janes Umbridge,Gregory Goyle,Draco Mal...","Vincent Crabbe,Dolores Janes Umbridge,Gregory ..."
63,Vincent Crabbe Sr.,"Bellatrix Lestrange,Lucius Malfoy,Severus Snap...","Vincent Crabbe Sr.,Bellatrix Lestrange,Lucius ..."


In [9]:
# Write to textfile
links = list(character_links['All'])
with open('characters_link.txt', 'w') as f:
    for line in links:
        f.write(line)
        f.write('\n')

In [10]:
import pandas as pd
import seaborn as sns
import networkprocessing as np

In [11]:
# Read in the text file and process the network
np.make_network("characters_link.txt")

Processing characters_link.txt


In [12]:
# Examine the dataset
networkinfo = pd.read_excel('characters_link_analysis/networkInfo.xlsx')
nodeInfo = pd.read_excel('characters_link_analysis/nodeInfo.xlsx')

In [13]:
networkinfo

Unnamed: 0,network_id,nodes,edges,avg path length,avg degree,avg weighted degree,diameter,radius,density
0,characters_link,65,962,1.566827,29.6,90.923077,3,2,0.4625


In [14]:
nodeInfo

Unnamed: 0.1,Unnamed: 0,label,degree,weighted_degree,betweenness,normalized_betweenness,eccentricity,closeness,network
0,0,"Alastor ""Mad-Eye"" Moody",46,143,18.843783,0.009347,2,0.780488,characters_link
1,1,Molly Weasley,43,168,8.069038,0.004002,2,0.752941,characters_link
2,2,"Bartemius ""Barty"" Crouch Jr.",15,20,1.532490,0.000760,3,0.561404,characters_link
3,3,Albus Dumbledore,49,180,99.274102,0.049243,2,0.810127,characters_link
4,4,Lord Voldemort,57,322,213.804573,0.106054,2,0.901408,characters_link
...,...,...,...,...,...,...,...,...,...
60,60,Moaning Myrtle,1,1,0.000000,0.000000,3,0.481203,characters_link
61,61,Nicolas Flamel,1,1,0.000000,0.000000,3,0.450704,characters_link
62,62,Olympe Maxime,14,16,0.756818,0.000375,3,0.556522,characters_link
63,63,Regulus Arcturus Black,24,32,3.219636,0.001597,3,0.609524,characters_link
