# Load Dependencies

In [8]:
import pandas as pds
import os
import pathlib
import re
import itertools

actorList = ['MATT', 'MARISHA', 'TRAVIS', 'LAURA', 'SAM', 'LIAM', 'ASHLEY', 'TALIESIN', 'ORION']

# Get Character Intros

In [28]:
introPath = 'FINAL TEXT FILES/zcharacter-backgrounds_FINAL.txt'
folderPath = 'data/INTROS/'

try:
    os.mkdir('data/INTROS')
    print("Directory 'data/INTROS' was created.")
    
except:
    print("Directory 'data/INTROS' already exists")

#Open the file
current_file = open(introPath, "r", encoding = 'utf8')

#Read the file
introContent = current_file.read()
current_file.close()

#Replace \n and double whitespaces with a single whitespace
introContent_noBreaks =  introContent.replace("\n", " ")
introContent_noBreaks = introContent_noBreaks.replace("  ", " ")

for actor in actorList:
    
    if actor == 'LAURA':
        searchRegex = r"({actor}((, [A-Z]+)?):\s*.*)".format(actor = actor)
    
    else:
        searchRegex = r"({actor}((, [A-Z]+)?):\s*.*?[A-Z+]:)".format(actor = actor)
        
    pattern = re.compile(searchRegex, re.MULTILINE)
    matches = re.findall(pattern, introContent_noBreaks)
    
    #If matches is an empty list, then skip.
    if len(matches) == 0:
        print('No matches for {actor}'.format(actor = actor))
        continue


    #If matches has items in list,    
    elif len(matches) > 0:

    #Flatten list of tuples with itertools

        flatMatches = list(itertools.chain(*matches))
        flatMatches = list(filter(('').__ne__, flatMatches))

        finalMatches = []

        #For each item in the list,
        for i in flatMatches:

            #Remove the <ACTOR>:'s
            fixedContent = re.sub(r"[A-Z]*:", "", i)

            #Strip the whitespace right after the text
            fixedContent = fixedContent.rstrip()

            #Append to a temporary list
            finalMatches.append(fixedContent)


        #Join list into one long string of text for that actor
        tempStr = ""
        tempStr = tempStr.join(finalMatches)
        tempStr = tempStr.strip()
        
        tempStr = tempStr.replace('Impression', 'impression')
        
        save_file = open('data/INTROS/{actor}_intro.txt'.format(actor = actor),'w+b')
        save_file.write(tempStr.encode('utf8'))
        save_file.close()

        print("Saved matches to data/INTROS/{actor}_intro.txt!".format(actor = actor))

Directory 'data/INTROS' already exists
No matches for MATT
Saved matches to data/INTROS/MARISHA_intro.txt!
Saved matches to data/INTROS/TRAVIS_intro.txt!
Saved matches to data/INTROS/LAURA_intro.txt!
Saved matches to data/INTROS/SAM_intro.txt!
Saved matches to data/INTROS/LIAM_intro.txt!
Saved matches to data/INTROS/ASHLEY_intro.txt!
Saved matches to data/INTROS/TALIESIN_intro.txt!
Saved matches to data/INTROS/ORION_intro.txt!


# Transcripts to Corpora

In [44]:
folder = 'FINAL TEXT FILES'

for actor in actorList:
    #Check if folder has been created
    folderPath = 'data/ACTOR TRANSCRIPTS/{actor}'.format(actor = actor)

    try:  
        # Create directory
        os.mkdir(folderPath)
        print("Directory '{folderPath}' was created.".format(folderPath = folderPath))


    except FileExistsError:
        print("Directory '{folderPath}' already exists.".format(folderPath = folderPath))

#For each file in the folder,
for path in pathlib.Path(folder).iterdir():

    if path.is_file():
        print("\n{path}".format(path = path))
        
        #Open the file
        current_file = open(path, "r", encoding = 'utf8')
        
        #Read the file
        content = current_file.read()
        current_file.close()

        #Replace \n and double whitespaces with a single whitespace
        content_noBreaks =  content.replace("\n", " ")
        content_noBreaks = content_noBreaks.replace("  ", " ")


        #For each actor,
        for actor in actorList:
            
            #Find this specific pattern in the text and put into 'matches'
            searchRegex = r"({actor}((, [A-Z]+)?):\s*.*?[A-Z+]:)".format(actor = actor)
            pattern = re.compile(searchRegex, re.MULTILINE)
            matches = re.findall(pattern, content_noBreaks)
            
            #If matches is an empty list, then skip.
            if len(matches) == 0:
                print('No matches for {actor}'.format(actor = actor))
                continue
                
                
            #If matches has items in list,    
            elif len(matches) > 0:
                
            #Flatten list of tuples with itertools

                flatMatches = list(itertools.chain(*matches))
                flatMatches = list(filter(('').__ne__, flatMatches))

                finalMatches = []

                #For each item in the list,
                for i in flatMatches:

                    #Remove the <ACTOR>:'s
                    fixedContent = re.sub(r"[A-Z]*:", "", i)

                    #Strip the whitespace right after the text
                    fixedContent = fixedContent.rstrip()

                    #Append to a temporary list
                    finalMatches.append(fixedContent)


                #Join list into one long string of text for that actor
                tempStr = ""
                tempStr = tempStr.join(finalMatches)
                
                tempStr = tempStr.strip()

                save_file = open('data/ACTOR TRANSCRIPTS/{actor}/{filename}_{actor}.txt'.format(actor = actor, filename = path.stem), 'w+b')
                save_file.write(tempStr.encode('utf8'))
                save_file.close()

                print("Saved matches to {actor}/{filename}_{actor}.txt!".format(actor = actor, filename = path.stem))

Directory 'data/ACTOR TRANSCRIPTS/MATT' already exists.
Directory 'data/ACTOR TRANSCRIPTS/MARISHA' already exists.
Directory 'data/ACTOR TRANSCRIPTS/TRAVIS' already exists.
Directory 'data/ACTOR TRANSCRIPTS/LAURA' already exists.
Directory 'data/ACTOR TRANSCRIPTS/SAM' already exists.
Directory 'data/ACTOR TRANSCRIPTS/LIAM' already exists.
Directory 'data/ACTOR TRANSCRIPTS/ASHLEY' already exists.
Directory 'data/ACTOR TRANSCRIPTS/TALIESIN' already exists.
Directory 'data/ACTOR TRANSCRIPTS/ORION' already exists.

FINAL TEXT FILES\C1E001_FINAL.txt
No matches for MATT
No matches for MARISHA
No matches for TRAVIS
No matches for LAURA
No matches for SAM
No matches for LIAM
No matches for ASHLEY
No matches for TALIESIN
No matches for ORION

FINAL TEXT FILES\C1E001_FINAL_V2.txt
Saved matches to MATT/C1E001_FINAL_V2_MATT.txt!
Saved matches to MARISHA/C1E001_FINAL_V2_MARISHA.txt!
Saved matches to TRAVIS/C1E001_FINAL_V2_TRAVIS.txt!
Saved matches to LAURA/C1E001_FINAL_V2_LAURA.txt!
Saved matches t

Saved matches to MATT/C1E020_FINAL_MATT.txt!
Saved matches to MARISHA/C1E020_FINAL_MARISHA.txt!
No matches for TRAVIS
No matches for LAURA
No matches for SAM
Saved matches to LIAM/C1E020_FINAL_LIAM.txt!
No matches for ASHLEY
No matches for TALIESIN
Saved matches to ORION/C1E020_FINAL_ORION.txt!

FINAL TEXT FILES\C1E021_FINAL.txt
Saved matches to MATT/C1E021_FINAL_MATT.txt!
Saved matches to MARISHA/C1E021_FINAL_MARISHA.txt!
No matches for TRAVIS
No matches for LAURA
No matches for SAM
Saved matches to LIAM/C1E021_FINAL_LIAM.txt!
No matches for ASHLEY
Saved matches to TALIESIN/C1E021_FINAL_TALIESIN.txt!
Saved matches to ORION/C1E021_FINAL_ORION.txt!

FINAL TEXT FILES\C1E022_FINAL.txt
Saved matches to MATT/C1E022_FINAL_MATT.txt!
Saved matches to MARISHA/C1E022_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C1E022_FINAL_TRAVIS.txt!
Saved matches to LAURA/C1E022_FINAL_LAURA.txt!
No matches for SAM
Saved matches to LIAM/C1E022_FINAL_LIAM.txt!
Saved matches to ASHLEY/C1E022_FINAL_ASHLEY.txt!
Save

Saved matches to MARISHA/C1E040_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C1E040_FINAL_TRAVIS.txt!
Saved matches to LAURA/C1E040_FINAL_LAURA.txt!
No matches for SAM
Saved matches to LIAM/C1E040_FINAL_LIAM.txt!
Saved matches to ASHLEY/C1E040_FINAL_ASHLEY.txt!
Saved matches to TALIESIN/C1E040_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C1E041_FINAL.txt
Saved matches to MATT/C1E041_FINAL_MATT.txt!
Saved matches to MARISHA/C1E041_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C1E041_FINAL_TRAVIS.txt!
Saved matches to LAURA/C1E041_FINAL_LAURA.txt!
Saved matches to SAM/C1E041_FINAL_SAM.txt!
Saved matches to LIAM/C1E041_FINAL_LIAM.txt!
Saved matches to ASHLEY/C1E041_FINAL_ASHLEY.txt!
Saved matches to TALIESIN/C1E041_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C1E042_FINAL.txt
Saved matches to MATT/C1E042_FINAL_MATT.txt!
Saved matches to MARISHA/C1E042_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C1E042_FINAL_TRAVIS.txt!
Saved matches to LAURA/C1E042_FINAL_LAURA.txt!
S

Saved matches to MATT/C1E061_FINAL_MATT.txt!
Saved matches to MARISHA/C1E061_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C1E061_FINAL_TRAVIS.txt!
Saved matches to LAURA/C1E061_FINAL_LAURA.txt!
Saved matches to SAM/C1E061_FINAL_SAM.txt!
Saved matches to LIAM/C1E061_FINAL_LIAM.txt!
No matches for ASHLEY
Saved matches to TALIESIN/C1E061_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C1E062_FINAL.txt
Saved matches to MATT/C1E062_FINAL_MATT.txt!
Saved matches to MARISHA/C1E062_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C1E062_FINAL_TRAVIS.txt!
Saved matches to LAURA/C1E062_FINAL_LAURA.txt!
Saved matches to SAM/C1E062_FINAL_SAM.txt!
No matches for LIAM
Saved matches to ASHLEY/C1E062_FINAL_ASHLEY.txt!
Saved matches to TALIESIN/C1E062_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C1E063_FINAL.txt
Saved matches to MATT/C1E063_FINAL_MATT.txt!
Saved matches to MARISHA/C1E063_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C1E063_FINAL_TRAVIS.txt!
Saved matches to LAURA/C1E063_F

Saved matches to MATT/C1E080_FINAL_MATT.txt!
Saved matches to MARISHA/C1E080_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C1E080_FINAL_TRAVIS.txt!
Saved matches to LAURA/C1E080_FINAL_LAURA.txt!
Saved matches to SAM/C1E080_FINAL_SAM.txt!
Saved matches to LIAM/C1E080_FINAL_LIAM.txt!
No matches for ASHLEY
Saved matches to TALIESIN/C1E080_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C1E081_FINAL.txt
Saved matches to MATT/C1E081_FINAL_MATT.txt!
Saved matches to MARISHA/C1E081_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C1E081_FINAL_TRAVIS.txt!
Saved matches to LAURA/C1E081_FINAL_LAURA.txt!
Saved matches to SAM/C1E081_FINAL_SAM.txt!
Saved matches to LIAM/C1E081_FINAL_LIAM.txt!
No matches for ASHLEY
Saved matches to TALIESIN/C1E081_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C1E082_FINAL.txt
Saved matches to MATT/C1E082_FINAL_MATT.txt!
Saved matches to MARISHA/C1E082_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C1E082_FINAL_TRAVIS.txt!
Saved matches to LAURA/C1E082_FIN

Saved matches to SAM/C1E102_FINAL_SAM.txt!
Saved matches to LIAM/C1E102_FINAL_LIAM.txt!
Saved matches to ASHLEY/C1E102_FINAL_ASHLEY.txt!
Saved matches to TALIESIN/C1E102_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C1E103_FINAL.txt
Saved matches to MATT/C1E103_FINAL_MATT.txt!
Saved matches to MARISHA/C1E103_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C1E103_FINAL_TRAVIS.txt!
Saved matches to LAURA/C1E103_FINAL_LAURA.txt!
Saved matches to SAM/C1E103_FINAL_SAM.txt!
Saved matches to LIAM/C1E103_FINAL_LIAM.txt!
Saved matches to ASHLEY/C1E103_FINAL_ASHLEY.txt!
Saved matches to TALIESIN/C1E103_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C1E104_FINAL.txt
Saved matches to MATT/C1E104_FINAL_MATT.txt!
Saved matches to MARISHA/C1E104_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C1E104_FINAL_TRAVIS.txt!
Saved matches to LAURA/C1E104_FINAL_LAURA.txt!
Saved matches to SAM/C1E104_FINAL_SAM.txt!
Saved matches to LIAM/C1E104_FINAL_LIAM.txt!
Saved matches to ASHLEY/C1E104_FINAL

Saved matches to MATT/C2E004_FINAL_MATT.txt!
Saved matches to MARISHA/C2E004_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C2E004_FINAL_TRAVIS.txt!
Saved matches to LAURA/C2E004_FINAL_LAURA.txt!
Saved matches to SAM/C2E004_FINAL_SAM.txt!
Saved matches to LIAM/C2E004_FINAL_LIAM.txt!
Saved matches to ASHLEY/C2E004_FINAL_ASHLEY.txt!
Saved matches to TALIESIN/C2E004_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C2E005_FINAL.txt
Saved matches to MATT/C2E005_FINAL_MATT.txt!
Saved matches to MARISHA/C2E005_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C2E005_FINAL_TRAVIS.txt!
Saved matches to LAURA/C2E005_FINAL_LAURA.txt!
Saved matches to SAM/C2E005_FINAL_SAM.txt!
Saved matches to LIAM/C2E005_FINAL_LIAM.txt!
No matches for ASHLEY
Saved matches to TALIESIN/C2E005_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C2E006_FINAL.txt
Saved matches to MATT/C2E006_FINAL_MATT.txt!
Saved matches to MARISHA/C2E006_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C2E006_FINAL_TRAVIS.txt!
Saved 

Saved matches to TALIESIN/C2E026_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C2E027_FINAL.txt
Saved matches to MATT/C2E027_FINAL_MATT.txt!
Saved matches to MARISHA/C2E027_FINAL_MARISHA.txt!
No matches for TRAVIS
No matches for LAURA
Saved matches to SAM/C2E027_FINAL_SAM.txt!
Saved matches to LIAM/C2E027_FINAL_LIAM.txt!
No matches for ASHLEY
Saved matches to TALIESIN/C2E027_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C2E028_FINAL.txt
Saved matches to MATT/C2E028_FINAL_MATT.txt!
Saved matches to MARISHA/C2E028_FINAL_MARISHA.txt!
No matches for TRAVIS
No matches for LAURA
Saved matches to SAM/C2E028_FINAL_SAM.txt!
Saved matches to LIAM/C2E028_FINAL_LIAM.txt!
No matches for ASHLEY
Saved matches to TALIESIN/C2E028_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C2E029_FINAL.txt
Saved matches to MATT/C2E029_FINAL_MATT.txt!
Saved matches to MARISHA/C2E029_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C2E029_FINAL_TRAVIS.txt!
Saved matches to LAURA/C2E029_

Saved matches to TALIESIN/C2E044_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C2E045_FINAL.txt
Saved matches to MATT/C2E045_FINAL_MATT.txt!
Saved matches to MARISHA/C2E045_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C2E045_FINAL_TRAVIS.txt!
Saved matches to LAURA/C2E045_FINAL_LAURA.txt!
Saved matches to SAM/C2E045_FINAL_SAM.txt!
Saved matches to LIAM/C2E045_FINAL_LIAM.txt!
Saved matches to ASHLEY/C2E045_FINAL_ASHLEY.txt!
Saved matches to TALIESIN/C2E045_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C2E046_FINAL.txt
Saved matches to MATT/C2E046_FINAL_MATT.txt!
Saved matches to MARISHA/C2E046_FINAL_MARISHA.txt!
Saved matches to TRAVIS/C2E046_FINAL_TRAVIS.txt!
Saved matches to LAURA/C2E046_FINAL_LAURA.txt!
Saved matches to SAM/C2E046_FINAL_SAM.txt!
Saved matches to LIAM/C2E046_FINAL_LIAM.txt!
Saved matches to ASHLEY/C2E046_FINAL_ASHLEY.txt!
Saved matches to TALIESIN/C2E046_FINAL_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\C2E047.en.txt
Saved matches to M

Saved matches to MATT/CR-Extra_Pants Optional Critmas_MATT.txt!
Saved matches to MARISHA/CR-Extra_Pants Optional Critmas_MARISHA.txt!
No matches for TRAVIS
Saved matches to LAURA/CR-Extra_Pants Optional Critmas_LAURA.txt!
No matches for SAM
Saved matches to LIAM/CR-Extra_Pants Optional Critmas_LIAM.txt!
Saved matches to ASHLEY/CR-Extra_Pants Optional Critmas_ASHLEY.txt!
Saved matches to TALIESIN/CR-Extra_Pants Optional Critmas_TALIESIN.txt!
No matches for ORION

FINAL TEXT FILES\CR-Extra_perfume.en.txt
No matches for MATT
Saved matches to MARISHA/CR-Extra_perfume.en_MARISHA.txt!
No matches for TRAVIS
No matches for LAURA
No matches for SAM
No matches for LIAM
No matches for ASHLEY
No matches for TALIESIN
No matches for ORION

FINAL TEXT FILES\CR-Extra_RPG Show QA and Battle Royale.en.txt
Saved matches to MATT/CR-Extra_RPG Show QA and Battle Royale.en_MATT.txt!
Saved matches to MARISHA/CR-Extra_RPG Show QA and Battle Royale.en_MARISHA.txt!
No matches for TRAVIS
No matches for LAURA
No m

Saved matches to MARISHA/Honey Heist 3 Tovas Honeys.en_MARISHA.txt!
No matches for TRAVIS
Saved matches to LAURA/Honey Heist 3 Tovas Honeys.en_LAURA.txt!
No matches for SAM
No matches for LIAM
No matches for ASHLEY
No matches for TALIESIN
No matches for ORION

FINAL TEXT FILES\zcharacter-backgrounds_FINAL.txt
No matches for MATT
Saved matches to MARISHA/zcharacter-backgrounds_FINAL_MARISHA.txt!
Saved matches to TRAVIS/zcharacter-backgrounds_FINAL_TRAVIS.txt!
No matches for LAURA
Saved matches to SAM/zcharacter-backgrounds_FINAL_SAM.txt!
Saved matches to LIAM/zcharacter-backgrounds_FINAL_LIAM.txt!
Saved matches to ASHLEY/zcharacter-backgrounds_FINAL_ASHLEY.txt!
Saved matches to TALIESIN/zcharacter-backgrounds_FINAL_TALIESIN.txt!
Saved matches to ORION/zcharacter-backgrounds_FINAL_ORION.txt!

FINAL TEXT FILES\zEP125_FINAL.txt
Saved matches to MATT/zEP125_FINAL_MATT.txt!
Saved matches to MARISHA/zEP125_FINAL_MARISHA.txt!
Saved matches to TRAVIS/zEP125_FINAL_TRAVIS.txt!
Saved matches to LA

In [85]:
folder = 'FINAL TEXT FILES'



current_file = open('FINAL TEXT FILES\C1E065_FINAL.txt', "r", encoding = 'utf8')
content = current_file.read()
current_file.close()

content_noBreaks =  content.replace("\n", " ")
content_noBreaks = content_noBreaks.replace("  ", " ")

matches = re.findall(pattern, content_noBreaks)
matches

[("MATT: Hello, everyone, and welcome to tonight's episode of Critical Role, where Taliesin jumps the gun and says his phrase too late. TRAVIS:",
  '',
  ''),
 ("MATT: Welcome. Happy to be back. Very emotional episode last week. We're going to kick it into the next step of Vox Machina's adventure this week, and then, me, Marisha, and Taliesin will be gone next week, so there will not be progression in this current storyline. However, Liam O'Brien over there has been cracking up an awesome one-shot adventure for our remaining players and a few guests. Liam, if you want to talk a little about it? LIAM:",
  '',
  ''),
 ("MATT: Yeah, it's going to be fun. This is Liam's brainchild, and I'm excited to see them play through it. LIAM:",
  '',
  ''),
 ("MATT: I think you'll be fine. If the last game was any indication, I think you'll be fine. All right, other announcements: once again, we have our wonderful sponsor with a new theme kicking in. Sam, you want to take this away? SAM:",
  '',
  ''

In [39]:
df.to_csv(r'data/critical_role_transcripts.csv',index = False)

In [33]:
df = pd.DataFrame.from_dict(transcriptDict, orient = 'index', columns = ['transcript'])
df.reset_index(inplace = True)
df.columns = ['file', 'transcript']

In [111]:
print(content)

TALIESIN: Pissblossom.

MATT: Hello, everyone, and welcome to tonight's
episode of Critical Role, where Taliesin jumps the

gun and says his phrase too late.

TRAVIS: Way too late, man! Way too late!

MATT: Welcome. Happy to be back. Very emotional
episode last week. We're going to kick it into the

next step of Vox Machina's adventure this week,
and then, me, Marisha, and Taliesin will be gone

next week, so there will not be progression in
this current storyline. However, Liam O'Brien over

there has been cracking up an awesome one-shot
adventure for our remaining players and a few

guests. Liam, if you want to talk a little about
it?

LIAM: Yeah, I'll tell you who's coming on by.
Sure, I'll say who's coming on by. Mary Elizabeth

McGlynn and Ashly Burch. And I don't want to spoil
it, but Matt knows. It's different.

MATT: Yeah, it's going to be fun. This is Liam's
brainchild, and I'm excited to see them play

through it.

LIAM: It could blow up in my face.

MATT: I think you'll be f

In [127]:
for actor in actorList:
            
    #Find this specific pattern in the text and put into 'matches'
    searchRegex = r"({actor}((, [A-Z]+)?):\s*.*?[A-Z+]:)".format(actor = actor)
#     print(searchRegex)

[("MATT: Hello, everyone, and welcome to tonight's episode of Critical Role, where Taliesin jumps the gun and says his phrase too late. TRAVIS:", '', ''), ("MATT: Welcome. Happy to be back. Very emotional episode last week. We're going to kick it into the next step of Vox Machina's adventure this week, and then, me, Marisha, and Taliesin will be gone next week, so there will not be progression in this current storyline. However, Liam O'Brien over there has been cracking up an awesome one-shot adventure for our remaining players and a few guests. Liam, if you want to talk a little about it? LIAM:", '', ''), ("MATT: Yeah, it's going to be fun. This is Liam's brainchild, and I'm excited to see them play through it. LIAM:", '', ''), ("MATT: I think you'll be fine. If the last game was any indication, I think you'll be fine. All right, other announcements: once again, we have our wonderful sponsor with a new theme kicking in. Sam, you want to take this away? SAM:", '', ''), ('MATT: All righ

In [34]:
df.head()

Unnamed: 0,file,transcript
0,C1E001_FINAL,Matt: Hello everyone. My name is Matthew Merce...
1,C1E001_FINAL_V2,MATT: Hello everyone. My name is Matthew Merce...
2,C1E002_FINAL,"MATT: Hello everyone, and welcome\nto the seco..."
3,C1E003_FINAL,"MATT: Hey, everyone. Sorry about that little i..."
4,C1E004_FINAL,"MATT: Everyone, welcome to the new\nepisode of..."


In [22]:
folder = 'FINAL TEXT FILES'

transcriptDict = dict()

for path in pathlib.Path(folder).iterdir():
    if path.is_file():

        print(path.stem)
        current_file = open(path, "r", encoding = 'utf8')

        transcriptDict[path.stem] = current_file.read()
#         print(current_file.read())
        current_file.close()

C1E001_FINAL
C1E001_FINAL_V2
C1E002_FINAL
C1E003_FINAL
C1E004_FINAL
C1E005_FINAL
C1E006_FINAL
C1E007_FINAL
C1E008_FINAL
C1E009_FINAL
C1E010_FINAL
C1E011_FINAL
C1E012_FINAL
C1E013_FINAL
C1E014_FINAL
C1E015-FINAL
C1E015_FINAL
C1E016_FINAL
C1E017_FINAL
C1E018_FINAL
C1E019_FINAL
C1E020_FINAL
C1E021_FINAL
C1E022_FINAL
C1E023_FINAL
C1E024_FINAL
C1E025_FINAL
C1E026_FINAL
C1E027_FINAL
C1E028_FINAL
C1E029_FINAL
C1E030_FINAL
C1E031-1_FINAL
C1E031-2_FINAL
C1E032_FINAL
C1E033-1_FINAL
C1E033-2_FINAL
C1E034_FINAL
C1E035-1_FINAL
C1E035-2_FINAL
C1E036_FINAL
C1E037_FINAL
C1E038_FINAL
C1E039_FINAL
C1E040_FINAL
C1E041_FINAL
C1E042_FINAL
C1E043_FINAL
C1E044_FINAL
C1E045_FINAL
C1E046_FINAL
C1E047_FINAL
C1E048_FINAL
C1E049-5_FINAL
C1E049_FINAL
C1E050_FINAL
C1E051_FINAL
C1E052_FINAL
C1E053_FINAL
C1E054_FINAL
C1E055_FINAL
C1E056_FINAL
C1E057_FINAL
C1E058-5_FINAL
C1E058_FINAL
C1E059_FINAL
C1E060_FINAL
C1E061_FINAL
C1E062_FINAL
C1E063_FINAL
C1E064_FINAL
C1E065-5_FINAL
C1E065_FINAL
C1E066_FINAL
C1E067_FINAL
C1E0