# Load dependencies and lexicons

In [1]:
import pandas as pd
import os
import pathlib
import re
import itertools

import contractions
import nltk
import inflect
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from collections import Counter

lemmatizer = WordNetLemmatizer()

actorList = ['MATT', 'MARISHA', 'TRAVIS', 'LAURA', 'SAM', 'LIAM', 'ASHLEY', 'TALIESIN', 'ORION']

characterDict = {'MATT': '\s?(dm|matt|matthew|mercer)\s?'
                 , 'MARISHA': '\s?(kiki|keykey|keyleth|antlers)\s?'
                , 'TRAVIS': '\s?(grog|bigman|big man|jumbo|greg)\s?'
                , 'LAURA': "\s?(vex|vex'ahlia|stubby|little elf girl)\s?"
                , 'SAM': '\s?(scanlan|scanman|taryon|tary jumbo|goldie)\s?'
                , 'LIAM': "\s?(vax|vaxy|vax'ildan|scrawny|little elf boy|string bean)\s?"
                , 'ASHLEY': '\s?(pike|clanky|pickle)\s?'
                , 'TALIESIN': '\s?(percy|whitey|freddy|foureyes|four eyes)\s?'
                , 'ORION': '\s?(tiberius|tibby|tibsy)\s?'}

emotionsList = ['anger', 'anticipation', 'disgust', 'fear',
                'joy', 'sadness', 'surprise', 'trust']

In [2]:
#SENTIMENT ANALYSIS OF EMOTIONS ON THEIR VOCABULARIES

#Load up sentiment lexicons

filepath = ('data/'
            'NRC Lexicons/'
            'NRC Emotion Intensity Lexicon/'
            'NRC-Emotion-Intensity-Lexicon-v1.txt')

lexiconDf = pd.read_csv(filepath,
                        names=["word", "emotion", "association"],
                        sep='\t')

lexiconDf.drop(0, inplace = True)

lexiconDf['association'] = lexiconDf['association'].astype(float)

lexiconPivot = pd.pivot_table(lexiconDf,
               index = 'word',
               columns = 'emotion',
               values = 'association').reset_index()

# Define functions to treat the texts

In [3]:
def remove_between_parentheses(text):
    return re.sub('\([^()]*\)', '', text)

def remove_dice_rolls(text):
    return re.sub('\s?[0-9]+d[0-9]+\s?', '', text)

def remove_numbers(text):
    return re.sub('[0-9]+', text)

def replace_numbers(words):
    """Replace all interger occurrences 
    in list of tokenized words with textual representation"""
    p = inflect.engine()
    new_words = []
    for word in words:
        if word.isdigit():
            new_word = p.number_to_words(word)
            new_words.append(new_word)
        else:
            new_words.append(word)
    return new_words

def remove_stopwords(words):
    """Remove stop words from list of tokenized words"""
    new_words = []
    for word in words:
        if word not in stopwords.words('english'):
            new_words.append(word)
    return new_words

# Remove punctuation, convert to lowercase
Saves the files to a separate directory of folders named 'ACTOR TREATED'

In [4]:
for actor in actorList:
    #Check if folder has been created
    folderPath = 'data/ACTOR TREATED/{actor}'.format(actor = actor)

    try:  
        # Create directory
        os.mkdir(folderPath)
        print("Directory '{folderPath}' was created.".format(folderPath = folderPath))


    except FileExistsError:
        print("Directory '{folderPath}' already exists.".format(folderPath = folderPath))


for actor in actorList:
    folder = 'data/ACTOR TRANSCRIPTS/{actor}'.format(actor = actor)
    
    #For each file in the file directory,
    for path in pathlib.Path(folder).iterdir():
        if path.is_file():
            print("\n{path}".format(path = path))
            
            #Open the file
            current_file = open(path, "r", encoding = 'utf8')
        
            #Read the file
            content = current_file.read()
            current_file.close()
        
            #Remove punctuation and different symbols
            noSymbols = re.sub(r'[!#?,.:";]',r'',content)
        
            #Convert upper case to lower case
            noSymbolsLower = noSymbols.lower()
        
            #Save to another folder
            save_file = open('data/ACTOR TREATED/{actor}/{filename}_treated.txt'.format(actor = actor, filename = path.stem), 'w+b')
            save_file.write(noSymbolsLower.encode('utf8'))
            save_file.close()        
        
        print("Saved treated transcript to {actor}/{filename}_treated.txt!".format(actor = actor, filename = path.stem))


Directory 'data/ACTOR TREATED/MATT' already exists.
Directory 'data/ACTOR TREATED/MARISHA' already exists.
Directory 'data/ACTOR TREATED/TRAVIS' already exists.
Directory 'data/ACTOR TREATED/LAURA' already exists.
Directory 'data/ACTOR TREATED/SAM' already exists.
Directory 'data/ACTOR TREATED/LIAM' already exists.
Directory 'data/ACTOR TREATED/ASHLEY' already exists.
Directory 'data/ACTOR TREATED/TALIESIN' already exists.
Directory 'data/ACTOR TREATED/ORION' already exists.

data\ACTOR TRANSCRIPTS\MATT\C1E001_FINAL_V2_MATT.txt
Saved treated transcript to MATT/C1E001_FINAL_V2_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C1E002_FINAL_MATT.txt
Saved treated transcript to MATT/C1E002_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C1E003_FINAL_MATT.txt
Saved treated transcript to MATT/C1E003_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C1E004_FINAL_MATT.txt
Saved treated transcript to MATT/C1E004_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C1E005_FINAL_MATT.txt
Sav

Saved treated transcript to MATT/C1E067_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C1E068_FINAL_MATT.txt
Saved treated transcript to MATT/C1E068_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C1E069_FINAL_MATT.txt
Saved treated transcript to MATT/C1E069_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C1E070_FINAL_MATT.txt
Saved treated transcript to MATT/C1E070_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C1E071_FINAL_MATT.txt
Saved treated transcript to MATT/C1E071_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C1E072_FINAL_MATT.txt
Saved treated transcript to MATT/C1E072_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C1E073_FINAL_MATT.txt
Saved treated transcript to MATT/C1E073_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C1E074_FINAL_MATT.txt
Saved treated transcript to MATT/C1E074_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C1E075_FINAL_MATT.txt
Saved treated transcript to MATT/C1E075_FINAL_MATT_treated.txt!

data\ACTOR TRAN

Saved treated transcript to MATT/C2E035_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C2E036_FINAL_MATT.txt
Saved treated transcript to MATT/C2E036_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C2E037_REVISED_MATT.txt
Saved treated transcript to MATT/C2E037_REVISED_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C2E038_FINAL_MATT.txt
Saved treated transcript to MATT/C2E038_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C2E039_FINAL_MATT.txt
Saved treated transcript to MATT/C2E039_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C2E040_FINAL_MATT.txt
Saved treated transcript to MATT/C2E040_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C2E041_FINAL_MATT.txt
Saved treated transcript to MATT/C2E041_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C2E042_FINAL_MATT.txt
Saved treated transcript to MATT/C2E042_FINAL_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\C2E043_FINAL_MATT.txt
Saved treated transcript to MATT/C2E043_FINAL_MATT_treated.txt!

data\ACTOR 

Saved treated transcript to MATT/CR-Scope_Matthew Mercer  Marisha Ray - The Vestiges of Critmas Unpacking  QA  Home Spoilers E51.en_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\CR-Scope_momocon_MATT.txt
Saved treated transcript to MATT/CR-Scope_momocon_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\CR-Scope_panel w Matt Marisha Laura  Travis  AnimeMilwaukee 2017 Spoilers E86.en_MATT.txt
Saved treated transcript to MATT/CR-Scope_panel w Matt Marisha Laura  Travis  AnimeMilwaukee 2017 Spoilers E86.en_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\CR-Scope_Panel with Matt  Taliesin  Another Anime Con 2016 Manchester NH Spoilers E71.en_MATT.txt
Saved treated transcript to MATT/CR-Scope_Panel with Matt  Taliesin  Another Anime Con 2016 Manchester NH Spoilers E71.en_MATT_treated.txt!

data\ACTOR TRANSCRIPTS\MATT\CR-Scope_Part 2 of Matthew Mercer  Marisha Rays Post-Critmas Mini Unboxing  Gen thank you Spoilers E41.en_MATT.txt
Saved treated transcript to MATT/CR-Scope_Part 2 of Matthew Merce


data\ACTOR TRANSCRIPTS\MARISHA\C1E055_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C1E055_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C1E056_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C1E056_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C1E057_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C1E057_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C1E058_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C1E058_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C1E059_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C1E059_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C1E060_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C1E060_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C1E061_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C1E061_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C1E062_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C1E062_FINAL_MARIS

Saved treated transcript to MARISHA/C2E013_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C2E014_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C2E014_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C2E015_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C2E015_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C2E016_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C2E016_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C2E017_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C2E017_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C2E018_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C2E018_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C2E019_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C2E019_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C2E020_FINAL_MARISHA.txt
Saved treated transcript to MARISHA/C2E020_FINAL_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\C2E021_FI

Saved treated transcript to MARISHA/CR-Scope_Critical Role panel w Matt Mercer  Marisha Ray  HAVEN Mackay Australia 2016-07-02 Spoilers E58.en_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\CR-Scope_Critical Role panel with Matt Laura Travis  Marisha  GenCon 2016 Spoilers E61.en_MARISHA.txt
Saved treated transcript to MARISHA/CR-Scope_Critical Role panel with Matt Laura Travis  Marisha  GenCon 2016 Spoilers E61.en_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\CR-Scope_Critical Role Talks Machina style w Matt Taliesin Laura Sam  GenCon2017 SPOILERS E108.en_MARISHA.txt
Saved treated transcript to MARISHA/CR-Scope_Critical Role Talks Machina style w Matt Taliesin Laura Sam  GenCon2017 SPOILERS E108.en_MARISHA_treated.txt!

data\ACTOR TRANSCRIPTS\MARISHA\CR-Scope_Critical Role Talks Machina Style with Matt Marisha Taliesin  Brian  MomoCon 2017 Spoilers E98.en_MARISHA.txt
Saved treated transcript to MARISHA/CR-Scope_Critical Role Talks Machina Style with Matt Marisha Taliesin  

Saved treated transcript to TRAVIS/C1E033-1_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E033-2_FINAL_TRAVIS.txt
Saved treated transcript to TRAVIS/C1E033-2_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E034_FINAL_TRAVIS.txt
Saved treated transcript to TRAVIS/C1E034_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E035-1_FINAL_TRAVIS.txt
Saved treated transcript to TRAVIS/C1E035-1_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E035-2_FINAL_TRAVIS.txt
Saved treated transcript to TRAVIS/C1E035-2_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E036_FINAL_TRAVIS.txt
Saved treated transcript to TRAVIS/C1E036_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E037_FINAL_TRAVIS.txt
Saved treated transcript to TRAVIS/C1E037_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E038_FINAL_TRAVIS.txt
Saved treated transcript to TRAVIS/C1E038_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E039_FINAL_TRAVIS.txt
Sa

Saved treated transcript to TRAVIS/C1E110_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E111_FINAL_TRAVIS.txt
Saved treated transcript to TRAVIS/C1E111_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E112_FINAL_TRAVIS.txt
Saved treated transcript to TRAVIS/C1E112_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E113-1_FINAL_TRAVIS.txt
Saved treated transcript to TRAVIS/C1E113-1_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E113-2_FINAL_TRAVIS.txt
Saved treated transcript to TRAVIS/C1E113-2_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E113_FINAL_TRAVIS.txt
Saved treated transcript to TRAVIS/C1E113_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E114_FINAL_TRAVIS.txt
Saved treated transcript to TRAVIS/C1E114_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E115-GROG_FINAL_TRAVIS.txt
Saved treated transcript to TRAVIS/C1E115-GROG_FINAL_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\C1E115-VAMP1_FINAL_TRA


data\ACTOR TRANSCRIPTS\TRAVIS\CR-Scope_Critical Roles Impromptu QA during technical difficulties at GeekSundry Spoilers E50.en_TRAVIS.txt
Saved treated transcript to TRAVIS/CR-Scope_Critical Roles Impromptu QA during technical difficulties at GeekSundry Spoilers E50.en_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\CR-Scope_Liam  Travis secret plan to win the fight in E52 Spoilers E52.en_TRAVIS.txt
Saved treated transcript to TRAVIS/CR-Scope_Liam  Travis secret plan to win the fight in E52 Spoilers E52.en_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\CR-Scope_panel w Matt Marisha Laura  Travis  AnimeMilwaukee 2017 Spoilers E86.en_TRAVIS.txt
Saved treated transcript to TRAVIS/CR-Scope_panel w Matt Marisha Laura  Travis  AnimeMilwaukee 2017 Spoilers E86.en_TRAVIS_treated.txt!

data\ACTOR TRANSCRIPTS\TRAVIS\CR-Scope_Short Periscope before the start of Critical Role E54 Spoilers E54.en_TRAVIS.txt
Saved treated transcript to TRAVIS/CR-Scope_Short Periscope before the start of Crit

Saved treated transcript to LAURA/C1E065_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C1E066_FINAL_LAURA.txt
Saved treated transcript to LAURA/C1E066_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C1E067_FINAL_LAURA.txt
Saved treated transcript to LAURA/C1E067_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C1E068_FINAL_LAURA.txt
Saved treated transcript to LAURA/C1E068_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C1E069_FINAL_LAURA.txt
Saved treated transcript to LAURA/C1E069_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C1E070_FINAL_LAURA.txt
Saved treated transcript to LAURA/C1E070_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C1E071_FINAL_LAURA.txt
Saved treated transcript to LAURA/C1E071_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C1E072_FINAL_LAURA.txt
Saved treated transcript to LAURA/C1E072_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C1E073_FINAL_LAURA.txt
Saved treated transcript to LAURA/C1E073_FINAL_L

Saved treated transcript to LAURA/C2E032_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C2E033_FINAL_LAURA.txt
Saved treated transcript to LAURA/C2E033_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C2E034_FINAL_LAURA.txt
Saved treated transcript to LAURA/C2E034_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C2E035_FINAL_LAURA.txt
Saved treated transcript to LAURA/C2E035_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C2E036_FINAL_LAURA.txt
Saved treated transcript to LAURA/C2E036_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C2E037_REVISED_LAURA.txt
Saved treated transcript to LAURA/C2E037_REVISED_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C2E038_FINAL_LAURA.txt
Saved treated transcript to LAURA/C2E038_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C2E039_FINAL_LAURA.txt
Saved treated transcript to LAURA/C2E039_FINAL_LAURA_treated.txt!

data\ACTOR TRANSCRIPTS\LAURA\C2E041_FINAL_LAURA.txt
Saved treated transcript to LAURA/C2E041_FIN


data\ACTOR TRANSCRIPTS\SAM\C1E015_FINAL_SAM.txt
Saved treated transcript to SAM/C1E015_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E016_FINAL_SAM.txt
Saved treated transcript to SAM/C1E016_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E017_FINAL_SAM.txt
Saved treated transcript to SAM/C1E017_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E018_FINAL_SAM.txt
Saved treated transcript to SAM/C1E018_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E019_FINAL_SAM.txt
Saved treated transcript to SAM/C1E019_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E023_FINAL_SAM.txt
Saved treated transcript to SAM/C1E023_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E024_FINAL_SAM.txt
Saved treated transcript to SAM/C1E024_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E025_FINAL_SAM.txt
Saved treated transcript to SAM/C1E025_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E026_FINAL_SAM.txt
Saved treated transcript to SAM/C1E026_FINAL_SAM_treated.txt!



Saved treated transcript to SAM/C1E098-5_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E098_FINAL_SAM.txt
Saved treated transcript to SAM/C1E098_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E099_FINAL_SAM.txt
Saved treated transcript to SAM/C1E099_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E100_FINAL_SAM.txt
Saved treated transcript to SAM/C1E100_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E101_FINAL_SAM.txt
Saved treated transcript to SAM/C1E101_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E102_FINAL_SAM.txt
Saved treated transcript to SAM/C1E102_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E103_FINAL_SAM.txt
Saved treated transcript to SAM/C1E103_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E104_FINAL_SAM.txt
Saved treated transcript to SAM/C1E104_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E105_FINAL_SAM.txt
Saved treated transcript to SAM/C1E105_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\C1E106_FINAL_SAM.txt


data\ACTOR TRANSCRIPTS\SAM\zcharacter-backgrounds_FINAL_SAM.txt
Saved treated transcript to SAM/zcharacter-backgrounds_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\SAM\zEP125_FINAL_SAM.txt
Saved treated transcript to SAM/zEP125_FINAL_SAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E001_FINAL_V2_LIAM.txt
Saved treated transcript to LIAM/C1E001_FINAL_V2_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E002_FINAL_LIAM.txt
Saved treated transcript to LIAM/C1E002_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E003_FINAL_LIAM.txt
Saved treated transcript to LIAM/C1E003_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E004_FINAL_LIAM.txt
Saved treated transcript to LIAM/C1E004_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E005_FINAL_LIAM.txt
Saved treated transcript to LIAM/C1E005_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E006_FINAL_LIAM.txt
Saved treated transcript to LIAM/C1E006_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E007_FINAL_LIAM.txt

Saved treated transcript to LIAM/C1E071_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E072_FINAL_LIAM.txt
Saved treated transcript to LIAM/C1E072_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E073_FINAL_LIAM.txt
Saved treated transcript to LIAM/C1E073_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E074_FINAL_LIAM.txt
Saved treated transcript to LIAM/C1E074_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E075_FINAL_LIAM.txt
Saved treated transcript to LIAM/C1E075_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E076_FINAL_LIAM.txt
Saved treated transcript to LIAM/C1E076_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E077_FINAL_LIAM.txt
Saved treated transcript to LIAM/C1E077_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E078_FINAL_LIAM.txt
Saved treated transcript to LIAM/C1E078_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C1E079_FINAL_LIAM.txt
Saved treated transcript to LIAM/C1E079_FINAL_LIAM_treated.txt!

data\ACTOR TRAN

Saved treated transcript to LIAM/C2E030_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C2E031_FINAL_LIAM.txt
Saved treated transcript to LIAM/C2E031_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C2E032_FINAL_LIAM.txt
Saved treated transcript to LIAM/C2E032_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C2E033_FINAL_LIAM.txt
Saved treated transcript to LIAM/C2E033_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C2E034_FINAL_LIAM.txt
Saved treated transcript to LIAM/C2E034_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C2E035_FINAL_LIAM.txt
Saved treated transcript to LIAM/C2E035_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C2E036_FINAL_LIAM.txt
Saved treated transcript to LIAM/C2E036_FINAL_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C2E037_REVISED_LIAM.txt
Saved treated transcript to LIAM/C2E037_REVISED_LIAM_treated.txt!

data\ACTOR TRANSCRIPTS\LIAM\C2E038_FINAL_LIAM.txt
Saved treated transcript to LIAM/C2E038_FINAL_LIAM_treated.txt!

data\ACTOR 

Saved treated transcript to ASHLEY/C1E041_FINAL_ASHLEY_treated.txt!

data\ACTOR TRANSCRIPTS\ASHLEY\C1E048_FINAL_ASHLEY.txt
Saved treated transcript to ASHLEY/C1E048_FINAL_ASHLEY_treated.txt!

data\ACTOR TRANSCRIPTS\ASHLEY\C1E050_FINAL_ASHLEY.txt
Saved treated transcript to ASHLEY/C1E050_FINAL_ASHLEY_treated.txt!

data\ACTOR TRANSCRIPTS\ASHLEY\C1E051_FINAL_ASHLEY.txt
Saved treated transcript to ASHLEY/C1E051_FINAL_ASHLEY_treated.txt!

data\ACTOR TRANSCRIPTS\ASHLEY\C1E052_FINAL_ASHLEY.txt
Saved treated transcript to ASHLEY/C1E052_FINAL_ASHLEY_treated.txt!

data\ACTOR TRANSCRIPTS\ASHLEY\C1E053_FINAL_ASHLEY.txt
Saved treated transcript to ASHLEY/C1E053_FINAL_ASHLEY_treated.txt!

data\ACTOR TRANSCRIPTS\ASHLEY\C1E054_FINAL_ASHLEY.txt
Saved treated transcript to ASHLEY/C1E054_FINAL_ASHLEY_treated.txt!

data\ACTOR TRANSCRIPTS\ASHLEY\C1E055_FINAL_ASHLEY.txt
Saved treated transcript to ASHLEY/C1E055_FINAL_ASHLEY_treated.txt!

data\ACTOR TRANSCRIPTS\ASHLEY\C1E056_FINAL_ASHLEY.txt
Saved treated tr

Saved treated transcript to TALIESIN/C1E004_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C1E005_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C1E005_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C1E006_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C1E006_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C1E007_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C1E007_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C1E008_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C1E008_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C1E009_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C1E009_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C1E010_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C1E010_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C1E011_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C1E011_FINAL_TALIESIN_treated.txt!

data\ACTOR


data\ACTOR TRANSCRIPTS\TALIESIN\C1E072_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C1E072_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C1E073_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C1E073_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C1E074_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C1E074_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C1E075_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C1E075_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C1E076_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C1E076_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C1E077_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C1E077_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C1E078_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C1E078_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C1E079_FINAL_TALIESIN.txt
Saved treated transcript

Saved treated transcript to TALIESIN/C2E026_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C2E027_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C2E027_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C2E028_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C2E028_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C2E029_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C2E029_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C2E030_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C2E030_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C2E031_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C2E031_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C2E032_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C2E032_FINAL_TALIESIN_treated.txt!

data\ACTOR TRANSCRIPTS\TALIESIN\C2E033_FINAL_TALIESIN.txt
Saved treated transcript to TALIESIN/C2E033_FINAL_TALIESIN_treated.txt!

data\ACTOR

Saved treated transcript to ORION/C1E003_FINAL_ORION_treated.txt!

data\ACTOR TRANSCRIPTS\ORION\C1E004_FINAL_ORION.txt
Saved treated transcript to ORION/C1E004_FINAL_ORION_treated.txt!

data\ACTOR TRANSCRIPTS\ORION\C1E005_FINAL_ORION.txt
Saved treated transcript to ORION/C1E005_FINAL_ORION_treated.txt!

data\ACTOR TRANSCRIPTS\ORION\C1E006_FINAL_ORION.txt
Saved treated transcript to ORION/C1E006_FINAL_ORION_treated.txt!

data\ACTOR TRANSCRIPTS\ORION\C1E007_FINAL_ORION.txt
Saved treated transcript to ORION/C1E007_FINAL_ORION_treated.txt!

data\ACTOR TRANSCRIPTS\ORION\C1E008_FINAL_ORION.txt
Saved treated transcript to ORION/C1E008_FINAL_ORION_treated.txt!

data\ACTOR TRANSCRIPTS\ORION\C1E009_FINAL_ORION.txt
Saved treated transcript to ORION/C1E009_FINAL_ORION_treated.txt!

data\ACTOR TRANSCRIPTS\ORION\C1E010_FINAL_ORION.txt
Saved treated transcript to ORION/C1E010_FINAL_ORION_treated.txt!

data\ACTOR TRANSCRIPTS\ORION\C1E011_FINAL_ORION.txt
Saved treated transcript to ORION/C1E011_FINAL_O

In [5]:
completeDf = pd.DataFrame()

#For each actor,
for act in actorList:
    
#Open the folder
    folderPath = 'data/ACTOR TREATED/{actor}'.format(actor = act)
     
    #For each file in the folder,
    for path in pathlib.Path(folderPath).iterdir():
        
        if path.is_file():
            print("\n{path}".format(path = path))
            
            #Open the file
            current_file = open(path, "r", encoding = 'utf8')
            
            #Read the file
            content = current_file.read()
            current_file.close()
            
            
            #Remove parentheses
            content_noActions = remove_between_parentheses(content)

            #Remove dicerolls
            content_noDice = remove_dice_rolls(content_noActions)

            #Remove double hyphens
            content_noHyphens = content_noDice.replace('--', '')

            #Replace contractions
            content_full = contractions.fix(content_noHyphens)

            #Tokenize the content
            tokens = nltk.word_tokenize(content_full)

            #Remove stopwords
            tokens_noStops = []
            for word in tokens:
                if word not in stopwords.words('english'):
                    tokens_noStops.append(word)


            #Lemmatize each token in the string
            lemmaTokens = []

            for t in tokens_noStops:
                temp_t = lemmatizer.lemmatize(t)
                lemmaTokens.append(temp_t)

            #Convert integers to numbers
            tokens_final = replace_numbers(lemmaTokens)

    
            #Number of words spoken - len(lemmaTokens)
            totalWords = len(tokens)

            #Vocabulary without stopwords
            vocabulary = len(set(tokens_final))

            # ----- CREATE DICTIONARY ----- #
            dataDict = dict()
            dataDict['file'] = os.path.basename(path)
            dataDict['actor'] = act
            dataDict['word_count'] = totalWords
            dataDict['vocabulary'] = vocabulary

            vocabDf = pd.DataFrame.from_dict(dataDict, orient = 'index').T

            
            # ----- WHEN ARE ACTORS MENTIONED ----- #
            
            characterMatches = dict()
            
            #For each other actor,
            for actor in actorList:
                
                #Find how many times they mentioned that specific actor
                numMatches = len(re.findall(characterDict[actor], content_full))
                
                #Put into a dictionary
                characterMatches[actor] = numMatches

            #Create dataframe of character matches that episode
            characterMatchDf = pd.DataFrame.from_dict(characterMatches, orient = 'index').T
            characterMatchDf.columns = [str(col).lower() + '_cnt' for col in characterMatchDf.columns]
            
            # ----- SENTIMENT / EMOTION ANALYSIS ----- #
            
            #Find number of times each word appears
            wordCount = Counter(tokens_final)
            
            try:
                #Put that into a dataframe
                wordsDf = pd.DataFrame.from_dict(wordCount, orient = 'index')
                wordsDf.reset_index(inplace = True)
                wordsDf.columns = ['word', 'count']
            
                #Merge with the NRC emotion intensity lexicon
                wordsMerge = pd.merge(wordsDf, lexiconPivot, how = 'left')
                wordsMerge = wordsMerge.dropna(how = 'all', subset = ['anger', 'anticipation', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'trust'])

                wordsMerge['norm'] = wordsMerge['count'] / wordsMerge['count'].sum()

                for emo in emotionsList:
                    wordsMerge['{emo}_norm'.format(emo = emo)] = wordsMerge['{emo}'.format(emo = emo)] * wordsMerge['norm']


                wordsTotal = pd.DataFrame(
                            wordsMerge[['anger_norm', 'anticipation_norm', 'disgust_norm',
                                   'fear_norm', 'joy_norm', 'sadness_norm',
                                   'surprise_norm', 'trust_norm']].sum()).T

                emotion_total = wordsTotal.sum(axis = 1)

                for emo in emotionsList:
                    wordsTotal['{emo}_norm'.format(emo = emo)] = wordsTotal['{emo}_norm'.format(emo = emo)] / emotion_total
                
            except ValueError:
                wordsTotal = pd.DataFrame.from_dict({'anger_norm': 0,
                       'anticipation_norm': 0,
                       'disgust_norm': 0,
                       'fear_norm': 0,
                       'joy_norm': 0,
                       'sadness_norm': 0,
                       'surprise_norm': 0,
                       'trust_norm': 0},
                       orient = 'index').T
                
            fullTempDf = vocabDf.join([wordsTotal, characterMatchDf])
            
            completeDf = pd.concat([completeDf, fullTempDf], axis=0)


data\ACTOR TREATED\MATT\C1E001_FINAL_V2_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E002_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E003_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E004_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E005_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E006_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E007_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E008_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E009_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E010_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E011_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E012_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E013_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E014_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E015-FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E015_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E016_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C1E017_FINAL_MATT_treated.txt

data\A


data\ACTOR TREATED\MATT\C2E024_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E025_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E026_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E027_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E028_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E029_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E030_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E031_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E032_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E033_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E034_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E035_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E036_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E037_REVISED_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E038_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E039_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E040_FINAL_MATT_treated.txt

data\ACTOR TREATED\MATT\C2E041_FINAL_MATT_treated.txt

data\AC


data\ACTOR TREATED\MARISHA\C1E033-2_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E034_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E035-1_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E035-2_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E036_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E037_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E038_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E039_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E040_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E041_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E042_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E043_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E044_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E045_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E046_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C1E047_FINAL_MARISHA_treated.txt

data\ACTOR TREATE


data\ACTOR TREATED\MARISHA\C2E046_FINAL_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C2E047.en_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C2E048.en_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C2E049.en_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\C2E050.en_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\CR-Extra_Critical Role Live at Comic-Con 2015_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\CR-Extra_Critical Role Q_A Feb 2016_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\CR-Extra_Critical Trolls for Extra Life.en_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\CR-Extra_Deadlands One-Shot for MDA Charity.en_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\CR-Extra_Episode 4 12 Dance Party_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\CR-Extra_Honey Heist 2 Electric Beargaloo.en_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\CR-Extra_One-Shot Epic Level Battle Royale_MARISHA_treated.txt

data\ACTOR TREATED\MARISHA\CR-Extra_Pants Optional Critmas_MARISHA_treated.t


data\ACTOR TREATED\TRAVIS\C1E061_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E062_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E063_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E064_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E065-5_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E065_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E066_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E067_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E068_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E069_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E070_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E071_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E072_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E073_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E074_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E075_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\C1E076_FINAL_TRAVIS_treated


data\ACTOR TREATED\TRAVIS\CR-Scope_Short Periscope before the start of Critical Role E54 Spoilers E54.en_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\CR-Scope_UPDATED Critical Role Panel  WonderCon LA Full Length SPOILERS E46.en_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\CR-Scope_Wizard World Portland Day 1.en_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\zcharacter-backgrounds_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\TRAVIS\zEP125_FINAL_TRAVIS_treated.txt

data\ACTOR TREATED\LAURA\C1E001_FINAL_V2_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C1E002_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C1E003_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C1E004_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C1E005_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C1E006_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C1E007_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C1E008_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C1E009_FINAL_LAURA_treated.txt

data\ACTOR TR


data\ACTOR TREATED\LAURA\C2E010_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E011_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E012_FINAL.en_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E013_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E015_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E016_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E017_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E018_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E019_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E020_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E021_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E022_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E023_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E024_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E029_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E030_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2E031_FINAL_LAURA_treated.txt

data\ACTOR TREATED\LAURA\C2


data\ACTOR TREATED\SAM\C1E071_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E072_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E073_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E074_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E075_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E076_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E077_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E078_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E079_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E080_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E081_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E083_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E084_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E085_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E086_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E087_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E088_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E089_FINAL_SAM_treated.txt

data\ACTOR TREATED\SAM\C1E090_FINAL_SAM_treat


data\ACTOR TREATED\LIAM\C1E026_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E027_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E028_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E029_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E030_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E031-1_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E031-2_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E032_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E033-1_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E033-2_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E034_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E035-1_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E035-2_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E036_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E037_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E038_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E039_FINAL_LIAM_treated.txt

data\ACTOR TREATED\LIAM\C1E040_FINAL_LIAM_treated.tx


data\ACTOR TREATED\LIAM\C2E050.en_LIAM_treated.txt

data\ACTOR TREATED\LIAM\CR-Extra_ClubOfMisfits.en_LIAM_treated.txt

data\ACTOR TREATED\LIAM\CR-Extra_Critical Role Live at Comic-Con 2015_LIAM_treated.txt

data\ACTOR TREATED\LIAM\CR-Extra_Critical Trolls for Extra Life.en_LIAM_treated.txt

data\ACTOR TREATED\LIAM\CR-Extra_Deadlands One-Shot for MDA Charity.en_LIAM_treated.txt

data\ACTOR TREATED\LIAM\CR-Extra_Episode 4 12 Dance Party_LIAM_treated.txt

data\ACTOR TREATED\LIAM\CR-Extra_Hearthstone One-Shot_LIAM_treated.txt

data\ACTOR TREATED\LIAM\CR-Extra_Honey Heist 2 Electric Beargaloo.en_LIAM_treated.txt

data\ACTOR TREATED\LIAM\CR-Extra_One-Shot Epic Level Battle Royale_LIAM_treated.txt

data\ACTOR TREATED\LIAM\CR-Extra_Pants Optional Critmas_LIAM_treated.txt

data\ACTOR TREATED\LIAM\CR-Extra_RPG Show QA and Battle Royale.en_LIAM_treated.txt

data\ACTOR TREATED\LIAM\CR-Extra_songofthelorelei_LIAM_treated.txt

data\ACTOR TREATED\LIAM\CR-Extra_Talks Machina Campaign Wrap-up_LIAM_tr


data\ACTOR TREATED\ASHLEY\zEP125_FINAL_ASHLEY_treated.txt

data\ACTOR TREATED\TALIESIN\C1E001_FINAL_V2_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C1E002_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C1E003_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C1E004_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C1E005_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C1E006_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C1E007_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C1E008_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C1E009_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C1E010_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C1E011_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C1E012_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C1E013_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C1E014_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C1E015-FINAL_TALIESIN_treat


data\ACTOR TREATED\TALIESIN\C2E005_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E006_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E007_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E008_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E009_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E010_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E011_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E012_FINAL.en_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E013_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E014_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E015_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E016_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E017_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E018_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E019_FINAL_TALIESIN_treated.txt

data\ACTOR TREATED\TALIESIN\C2E020_FINAL_TALIESIN_t

In [12]:
def get_campaign(x):
    if len(re.findall('^C[0-9]+', x)) >= 1:
        return "".join(re.findall('^C[0-9]+', x))
    
    else:
        return "Others"
    
def get_episode(x):
    if len(re.findall('E[0-9][0-9][0-9]_', x)) >= 1:
        rawString = "".join(re.findall('E[0-9][0-9][0-9]_', x))
        removeE = rawString.replace('E', '')
        removeUScore = removeE.replace('_', '')
        
        return int(removeUScore)
    
    else:
        return "Others"    

In [13]:
completeDf['campaign'] = completeDf['file'].apply(get_campaign)
completeDf['episode'] = completeDf['file'].apply(get_episode)

In [14]:
completeDfSorted = completeDf[['file', 'campaign', 'episode'
                              , 'actor', 'word_count', 'vocabulary'
                              , 'anger_norm', 'anticipation_norm'
                              , 'disgust_norm', 'fear_norm'
                              , 'joy_norm', 'sadness_norm'
                              , 'surprise_norm', 'trust_norm'
                              , 'matt_cnt', 'marisha_cnt'
                              , 'travis_cnt', 'laura_cnt'
                              , 'sam_cnt', 'liam_cnt'
                              , 'ashley_cnt', 'taliesin_cnt'
                              , 'orion_cnt']]

In [15]:
completeDfSorted.head()

Unnamed: 0,file,campaign,episode,actor,word_count,vocabulary,anger_norm,anticipation_norm,disgust_norm,fear_norm,...,trust_norm,matt_cnt,marisha_cnt,travis_cnt,laura_cnt,sam_cnt,liam_cnt,ashley_cnt,taliesin_cnt,orion_cnt
0,C1E001_FINAL_V2_MATT_treated.txt,C1,1,MATT,16201,2067,0.16238,0.109077,0.087347,0.174167,...,0.183918,6,9,11,4,7,3,8,4,14
0,C1E002_FINAL_MATT_treated.txt,C1,2,MATT,12690,1742,0.183639,0.098666,0.073454,0.153885,...,0.176107,2,13,40,6,14,6,2,9,13
0,C1E003_FINAL_MATT_treated.txt,C1,3,MATT,12197,1708,0.119209,0.097274,0.071381,0.183727,...,0.206906,3,2,13,3,6,3,2,2,10
0,C1E004_FINAL_MATT_treated.txt,C1,4,MATT,18461,2343,0.166411,0.106248,0.071372,0.171692,...,0.181523,12,5,24,7,24,5,19,12,9
0,C1E005_FINAL_MATT_treated.txt,C1,5,MATT,14968,1864,0.181923,0.086642,0.088653,0.190115,...,0.143972,2,13,39,6,24,8,20,10,12


In [16]:
completeDfSorted.to_csv(r'data/complete.csv', index = False)

In [12]:
completeDf.columns.tolist()

['file',
 'actor',
 'word_count',
 'vocabulary',
 'anger_norm',
 'anticipation_norm',
 'disgust_norm',
 'fear_norm',
 'joy_norm',
 'sadness_norm',
 'surprise_norm',
 'trust_norm',
 'matt_cnt',
 'marisha_cnt',
 'travis_cnt',
 'laura_cnt',
 'sam_cnt',
 'liam_cnt',
 'ashley_cnt',
 'taliesin_cnt',
 'orion_cnt',
 'campaign',
 'episode']

In [17]:
completeDf.head(100)

Unnamed: 0,file,actor,word_count,vocabulary,anger_norm,anticipation_norm,disgust_norm,fear_norm,joy_norm,sadness_norm,...,marisha_cnt,travis_cnt,laura_cnt,sam_cnt,liam_cnt,ashley_cnt,taliesin_cnt,orion_cnt,campaign,episode
0,C1E001_FINAL_V2_MATT_treated.txt,MATT,16201,2067,0.162380,0.109077,0.087347,0.174167,0.133800,0.088004,...,9,11,4,7,3,8,4,14,C1,1
0,C1E002_FINAL_MATT_treated.txt,MATT,12690,1742,0.183639,0.098666,0.073454,0.153885,0.148652,0.096897,...,13,40,6,14,6,2,9,13,C1,2
0,C1E003_FINAL_MATT_treated.txt,MATT,12197,1708,0.119209,0.097274,0.071381,0.183727,0.156596,0.099521,...,2,13,3,6,3,2,2,10,C1,3
0,C1E004_FINAL_MATT_treated.txt,MATT,18461,2343,0.166411,0.106248,0.071372,0.171692,0.137979,0.100250,...,5,24,7,24,5,19,12,9,C1,4
0,C1E005_FINAL_MATT_treated.txt,MATT,14968,1864,0.181923,0.086642,0.088653,0.190115,0.110744,0.122465,...,13,39,6,24,8,20,10,12,C1,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,C1E091_FINAL_MATT_treated.txt,MATT,14450,2119,0.107409,0.132883,0.083185,0.148562,0.147518,0.102788,...,12,5,0,11,1,3,2,0,C1,91
0,C1E092_FINAL_MATT_treated.txt,MATT,14322,1968,0.148333,0.117574,0.097088,0.163151,0.127951,0.100868,...,7,21,8,14,8,3,1,0,C1,92
0,C1E093_FINAL_MATT_treated.txt,MATT,19742,1988,0.237692,0.088824,0.110497,0.206333,0.069023,0.130444,...,52,53,22,33,32,1,14,0,C1,93
0,C1E094.5_FINAL_MATT_treated.txt,MATT,4342,832,0.119413,0.139438,0.075888,0.131709,0.156738,0.111246,...,0,1,0,0,0,0,0,0,C1,Others
