<a href="https://colab.research.google.com/github/Guimol/Star-Wars-Characters-Relations/blob/main/Star_Wars_Characters_Relations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing Libraries

In [None]:
from io import open
import requests

In [None]:
import re

# Files obtention
* Path for the data file (external link): [Star Wars Movie Scripts](https://www.kaggle.com/datasets/xvivancos/star-wars-movie-scripts)
* GitHub repository: [Star Wars Characters Relations](https://github.com/Guimol/Star-Wars-Characters-Relations)

In [None]:
movie_files = {
  "movieIV": "https://raw.githubusercontent.com/Guimol/Star-Wars-Characters-Relations/main/datasets/SW_EpisodeIV.txt",
  "movieV": "https://raw.githubusercontent.com/Guimol/Star-Wars-Characters-Relations/main/datasets/SW_EpisodeV.txt",
  "movieVI": "https://raw.githubusercontent.com/Guimol/Star-Wars-Characters-Relations/main/datasets/SW_EpisodeVI.txt",
}

# Initializing movie dictionaries
movies = dict()

# Creating local files for the corpus and opening them
for title, link in movie_files.items():
  # Access a link
  r = requests.get(link, allow_redirects=True)

  # Read file in the link and store it locally
  open(title + '.txt', 'wb').write(r.content)
  
  # Fill the dictionary with data obtained in the local file
  movies[title] = {"raw": open(title + '.txt', 'r').readlines()}

# Text Preprocessing

Making the dialogs lower case

In [None]:
for title in movies:
  movies[title]['lower'] = [line.lower() for line in movies[title]['raw']]

# Characters Identification

Character Class, stores all information regarding a character:
* Name
* Dialogs
* Connections

In [30]:
class Character:
  def __init__(self, name: str):
    self.name = name
    self.dialogs = dict()
    self.relation = dict()

  def __str__(self):
    return f"{self.name}"

  def __repr__(self):
    return f"Class Character(name={self.name})"

  def __eq__(self, comparison):
    if isinstance(comparison, Character):
      return self.name == comparison.name
    else:
      return self.name == comparison

  def init_relation(self, character: str):
    self.relation[character] = {'positive': 0, 'negattive': 0}

  def add_line(self, line_id: int, line: str):
    self.dialogs[line_id] = line

  def clear_dialogs(self):
    self.dialogs.clear()

Iterate over a movie and adds each dialog associating them to a Character class

In [31]:
character_list = list()

for line in movies['movieIV']['lower']:

  # RegEx to match text: "text" "other text" "third text" -> [text, other text, third text]
  text = re.split('\"(.*?)\"', line.strip())

  # Remove unwanted strings obtained by RegEx
  text = list(filter(lambda x: x not in ['', ' '], text))

  # Flag to create a new character
  found = False
  
  # If line in the pattern: "<line_number>" "<character_name>" "<dialog>"
  if len(text) == 3:
    #Remove " from the preprocessed text
    text = [x.replace("\"", "") for x in text]
    character_name = text[1]

    # Look for already created Character
    for index, element in enumerate(character_list):
      if character_name == element:
        character_list[index].add_line(int(text[0]), text[2])
        found = True
    # Create new Character
    if not found:
      character_list.append(Character(character_name))

# Initialize relations

For each character initialize a dict showing if their relation is positive or negative

In [32]:
for character in character_list:
  for relation_character in character_list:
    if not character == relation_character:
      character.init_relation(relation_character.name)

In [None]:
character_list[0].relation.keys()

dict_keys(['luke', 'imperial officer', 'vader', 'rebel officer', 'trooper', 'chief pilot', 'captain', 'woman', 'fixer', 'camie', 'biggs', 'deak', 'leia', 'commander', 'second officer', 'first trooper', 'second trooper', 'beru', 'owen', 'aunt beru', 'ben', 'tagge', 'motti', 'tarkin', 'bartender', 'creature', 'human', 'han', 'greedo', 'jabba', 'officer cass', 'voice over death star intercom', 'officer', 'voice', 'gantry officer', 'intercom voice', 'trooper voice', 'first officer', 'willard', 'death star intercom voice', 'dodonna', 'gold leader', 'wedge', 'man', 'red leader', 'chief', 'massassi intercom voice', 'red ten', 'red seven', 'porkins', 'red nine', 'red eleven', 'astro-officer', 'control officer', 'gold five', 'gold two', 'wingman', 'base voice', 'technician'])