In [32]:
import os
from urllib.parse import quote

import bs4
from bs4 import BeautifulSoup
import requests

BASE_URL = 'http://www.imsdb.com'
corpus_path = 'C:/Users/Rik/Documents/corpus/'
scripts_path = 'imsdb_parsed'

In [33]:
def get_script_blocks_from_link(script_url):
    print("Retrieving script on", script_url)
    reading_state = []
    script_soup = BeautifulSoup(requests.get(script_url).text, "html.parser")
    found_blocks = []
    def get_text_recursively(tag, is_bold=False):
        if isinstance(tag, bs4.element.NavigableString) and len(reading_state) > 0:
            text = tag.string
            stripped_text = text.lstrip()
            leading_spaces = len(text) - len(stripped_text)
            is_empty = not stripped_text.strip()
            found_blocks.append({
                'text': stripped_text,
                'leading_spaces': leading_spaces,
                'is_bold': is_bold,
                'is_empty': is_empty
            })
        if isinstance(tag, bs4.element.Tag):
            if tag.name == 'td' and tag.has_attr('class') and tag['class'][0] == 'scrtext':
                reading_state.append(True)
            if tag.name == 'table' and len(reading_state) > 0:
                reading_state.pop()
            if tag.name != 'head' and tag.name != 'script':
                for el in tag:
                    get_text_recursively(el, tag.name == 'b' or is_bold)
    get_text_recursively(script_soup)
    return found_blocks

In [34]:
def get_character_scene_leading_spaces(df_script_blocks):
    df_script_blocks_bold = df_script_blocks[np.logical_and(df_script_blocks['is_bold']==True, df_script_blocks['is_empty']==False)]
    if(len(df_script_blocks_bold)==0):
        return None, None
    count_by_leading = [{'leading_spaces': leading_spaces, 'count': len(bold_group)} for leading_spaces, bold_group in df_script_blocks_bold.groupby(['leading_spaces'])]
    df_count_by_leading = pd.DataFrame(count_by_leading).sort_values('count', ascending=False)
    if(len(df_count_by_leading)> 2):
        return df_count_by_leading.iloc[0]['leading_spaces'], df_count_by_leading.iloc[1]['leading_spaces']
    return None, None

In [35]:
import re

def clean_text(text):
    text = text.replace('\r', ' ').replace('\n', ' ')
    return re.sub(r"\s+", " ", text).strip()

In [36]:
import pandas as pd
import numpy as np
def get_script_from_page(script_url):
    try:
        script_blocks = get_script_blocks_from_link(script_url)
    except:
        print("error getting blocks")
        return None
    if (len(script_blocks)==0):
        print("No blocks found")
        return None
    df_script_blocks = pd.DataFrame(script_blocks)
    character_leading_spaces, scene_leading_spaces = get_character_scene_leading_spaces(df_script_blocks)
    if not character_leading_spaces:
        print("Couldn't find correct leading spaces")
        return None
    intro = ""
    scenes = []
    current_scene = None
    characters = []
    current_dialogue = None
    for idx, row in df_script_blocks.iterrows():
        cleaned_text = clean_text(row['text'])
        # if we don't have a scene and the row is not bold we're still in the intro
        if current_scene is None and not row['is_bold']:
            intro += cleaned_text + " "
        elif row['is_bold']:
            # if it's a bold row it's either a scene, a character, or an empty line
            if row['leading_spaces'] == scene_leading_spaces:
                # store previous scene
                if current_scene is not None:
                    scenes.append(current_scene)
                # create new scene
                current_scene = { 'heading': cleaned_text, 'body': []}
            elif row['leading_spaces'] == character_leading_spaces:
                # get character name
                character_name = cleaned_text
                # add to characters if not there yet
                if character_name not in characters:
                    characters.append(character_name)
                # add a dialogue
                current_dialogue = { 'type': 'dialogue', 'character': character_name, 'text': '' }
            elif row['is_empty']:
                # empty line might mean a dialogue is finished
                if current_dialogue is not None and current_scene is not None:
                    current_dialogue['text'] = current_dialogue['text'].strip() 
                    current_scene['body'].append(current_dialogue)
                    current_dialogue = None
            elif current_scene is None:
                # if we're still in the intro but it's bold
                intro += cleaned_text + " "
            elif cleaned_text == "END":
                # end of the script, so we break out
                break
        elif current_scene is not None:
            # if we still have a dialogue it's a dialogue
            if current_dialogue is not None:
                current_dialogue['text'] += cleaned_text + " "
            else:
                current_scene['body'].append({'type': 'action', 'text': cleaned_text})
    if current_dialogue is not None and current_scene is not None:
        current_dialogue['text'] = current_dialogue['text'].strip() 
        current_scene['body'].append(current_dialogue)
    if current_scene is not None:
        scenes.append(current_scene)
    
    intro = intro.strip()
    print("Found", len(scenes), "scenes and", len(characters), "characters")
    return {
        'intro': intro,
        'scenes': scenes,
        'characters': characters
    }

In [37]:
def get_script(anchor):
    title = anchor.text
    print(title)
    relative_link = anchor['href']
    tail = relative_link.split('/')[-1]
    script_front_url = BASE_URL + quote(relative_link)
    front_page_response = requests.get(script_front_url)
    front_soup = BeautifulSoup(front_page_response.text, "html.parser")

    try:
        script_link = front_soup.find_all('p', align="center")[0].a['href']
    except IndexError:
        print('%s has no script :(' % tail)
        return None, None

    if script_link.endswith('.html'):
        file_title = script_link.split('/')[-1].split(' Script')[0].strip('.html')
        script_url = BASE_URL + script_link
        script = get_script_from_page(script_url)
        if script is None:
            return None, None
        script['title'] = title
        script['file_title'] = file_title
        return file_title, script
    else:
        print('%s is a pdf :(' % tail)
        return None, None

In [38]:
import json

response = requests.get('http://www.imsdb.com/all%20scripts/')
html = response.text

soup = BeautifulSoup(html, "html.parser")
paragraphs = soup.find_all('p')

correct_parsed = []
no_script = []
error = []

for p in paragraphs:
    
    anchor = p.a
    
    title, script = get_script(anchor)
    if not script:
        no_script.append(anchor['href'])
        continue
    try:
        with open(os.path.join(corpus_path, scripts_path, title + '.json'), 'w') as outfile:
            outfile.write(json.dumps(script))
        correct_parsed.append(anchor['href'])
    except:
        print("error while writing")
        error.append(anchor['href'])
print("Correctly parsed", len(correct_parsed))
print("No script", len(no_script))
print("Errors", len(error))
for error_url in error:
    print(error_url)

10 Things I Hate About You
Retrieving script on http://www.imsdb.com/scripts/10-Things-I-Hate-About-You.html
Found 141 scenes and 38 characters
12
Retrieving script on http://www.imsdb.com/scripts/12.html
Found 311 scenes and 58 characters
12 and Holding
Retrieving script on http://www.imsdb.com/scripts/12-and-Holding.html
Found 266 scenes and 35 characters
12 Monkeys
Retrieving script on http://www.imsdb.com/scripts/12-Monkeys.html
Found 201 scenes and 105 characters
12 Years a Slave
Retrieving script on http://www.imsdb.com/scripts/12-Years-a-Slave.html
Found 228 scenes and 136 characters
127 Hours
Retrieving script on http://www.imsdb.com/scripts/127-Hours.html
Found 262 scenes and 49 characters
1492: Conquest of Paradise
Retrieving script on http://www.imsdb.com/scripts/1492-Conquest-of-Paradise.html
Found 176 scenes and 54 characters
15 Minutes
Retrieving script on http://www.imsdb.com/scripts/15-Minutes.html
Found 111 scenes and 96 characters
17 Again
Retrieving script on http://

Found 238 scenes and 78 characters
American President, The
Retrieving script on http://www.imsdb.com/scripts/American-President,-The.html
Found 145 scenes and 88 characters
American Psycho
Retrieving script on http://www.imsdb.com/scripts/American-Psycho.html
Couldn't find correct leading spaces
American Shaolin: King of Kickboxers II
Retrieving script on http://www.imsdb.com/scripts/American-Shaolin-King-of-Kickboxers-II.html
Couldn't find correct leading spaces
American Sniper
Retrieving script on http://www.imsdb.com/scripts/American-Sniper.html
Found 293 scenes and 50 characters
American Splendor
Retrieving script on http://www.imsdb.com/scripts/American-Splendor.html
Found 247 scenes and 56 characters
American Werewolf in London
Retrieving script on http://www.imsdb.com/scripts/American-Werewolf-in-London.html
Couldn't find correct leading spaces
American, The
Retrieving script on http://www.imsdb.com/scripts/American,-The.html
Found 187 scenes and 13 characters
Amityville Asylum,

Couldn't find correct leading spaces
Basquiat
Retrieving script on http://www.imsdb.com/scripts/Basquiat.html
Found 164 scenes and 76 characters
Batman
Retrieving script on http://www.imsdb.com/scripts/Batman.html
Found 62 scenes and 86 characters
Batman 2
Retrieving script on http://www.imsdb.com/scripts/Batman-2.html
Found 356 scenes and 83 characters
Batman and Robin
Batman and Robin Script.html has no script :(
Batman Begins
Batman Begins Script.html has no script :(
Batman Forever
Batman Forever Script.html has no script :(
Batman Returns
Batman Returns Script.html has no script :(
Battle of Algiers, The
Retrieving script on http://www.imsdb.com/scripts/Battle-of-Algiers,-The.html
Found 145 scenes and 69 characters
Battle of Shaker Heights, The
Retrieving script on http://www.imsdb.com/scripts/Battle-of-Shaker-Heights,-The.html
Found 120 scenes and 39 characters
Battle: Los Angeles
Retrieving script on http://www.imsdb.com/scripts/Battle-Los-Angeles.html
Found 161 scenes and 297 c

Couldn't find correct leading spaces
Box, The
Retrieving script on http://www.imsdb.com/scripts/Box,-The.html
Found 393 scenes and 105 characters
Boxtrolls, The
Retrieving script on http://www.imsdb.com/scripts/Boxtrolls,-The.html
Found 214 scenes and 46 characters
Boyhood
Retrieving script on http://www.imsdb.com/scripts/Boyhood.html
Found 397 scenes and 105 characters
Braveheart
Retrieving script on http://www.imsdb.com/scripts/Braveheart.html
Found 198 scenes and 93 characters
Brazil
Retrieving script on http://www.imsdb.com/scripts/Brazil.html
Found 258 scenes and 2 characters
Break
Retrieving script on http://www.imsdb.com/scripts/Break.html
Couldn't find correct leading spaces
Breakdown
Retrieving script on http://www.imsdb.com/scripts/Breakdown.html
Couldn't find correct leading spaces
Breakfast Club, The
Retrieving script on http://www.imsdb.com/scripts/Breakfast-Club,-The.html
Found 75 scenes and 5 characters
Breaking Away
Retrieving script on http://www.imsdb.com/scripts/Brea

Retrieving script on http://www.imsdb.com/scripts/Color-of-Night.html
Found 120 scenes and 40 characters
Commando
Retrieving script on http://www.imsdb.com/scripts/Commando.html
Found 274 scenes and 270 characters
Conan the Barbarian
Retrieving script on http://www.imsdb.com/scripts/Conan-the-Barbarian.html
Found 161 scenes and 65 characters
Confessions of a Dangerous Mind
Retrieving script on http://www.imsdb.com/scripts/Confessions-of-a-Dangerous-Mind.html
Found 335 scenes and 99 characters
Confidence
Retrieving script on http://www.imsdb.com/scripts/Confidence.html
Found 285 scenes and 50 characters
Constantine
Retrieving script on http://www.imsdb.com/scripts/Constantine.html
Found 324 scenes and 40 characters
Contact
Contact Script.html has no script :(
Cooler, The
Retrieving script on http://www.imsdb.com/scripts/Cooler,-The.html
Found 120 scenes and 40 characters
Copycat
Retrieving script on http://www.imsdb.com/scripts/Copycat.html
Found 702 scenes and 70 characters
Coraline
Re

Found 319 scenes and 43 characters
Dog Day Afternoon
Retrieving script on http://www.imsdb.com/scripts/Dog-Day-Afternoon.html
Found 303 scenes and 70 characters
Dogma
Retrieving script on http://www.imsdb.com/scripts/Dogma.html
Found 305 scenes and 19 characters
Donnie Brasco
Retrieving script on http://www.imsdb.com/scripts/Donnie-Brasco.html
Found 227 scenes and 45 characters
Donnie Darko
Donnie Darko Script.html has no script :(
Doors, The
Retrieving script on http://www.imsdb.com/scripts/Doors,-The.html
Found 245 scenes and 118 characters
Double Indemnity
Retrieving script on http://www.imsdb.com/scripts/Double-Indemnity.html
Found 197 scenes and 30 characters
Drag Me to Hell
Retrieving script on http://www.imsdb.com/scripts/Drag-Me-to-Hell.html
Found 384 scenes and 282 characters
Dragonslayer
Retrieving script on http://www.imsdb.com/scripts/Dragonslayer.html
Found 148 scenes and 21 characters
Drive
Retrieving script on http://www.imsdb.com/scripts/Drive.html
Found 160 scenes and 

Couldn't find correct leading spaces
Flight
Retrieving script on http://www.imsdb.com/scripts/Flight.html
Found 384 scenes and 104 characters
Flintstones, The
Retrieving script on http://www.imsdb.com/scripts/Flintstones,-The.html
Couldn't find correct leading spaces
Forrest Gump
Retrieving script on http://www.imsdb.com/scripts/Forrest-Gump.html
Couldn't find correct leading spaces
Four Feathers
Retrieving script on http://www.imsdb.com/scripts/Four-Feathers.html
error getting blocks
Four Rooms
Retrieving script on http://www.imsdb.com/scripts/Four-Rooms.html
Found 92 scenes and 31 characters
Foxcatcher
Retrieving script on http://www.imsdb.com/scripts/Foxcatcher.html
Found 229 scenes and 84 characters
Fracture
Retrieving script on http://www.imsdb.com/scripts/Fracture.html
Found 153 scenes and 37 characters
Frances
Retrieving script on http://www.imsdb.com/scripts/Frances.html
Found 184 scenes and 94 characters
Frankenstein
Retrieving script on http://www.imsdb.com/scripts/Frankenste

Found 131 scenes and 62 characters
Grand Theft Parsons
Retrieving script on http://www.imsdb.com/scripts/Grand-Theft-Parsons.html
Found 249 scenes and 44 characters
Grapes of Wrath, The
Retrieving script on http://www.imsdb.com/scripts/Grapes-of-Wrath,-The.html
Found 8 scenes and 91 characters
Gravity
Retrieving script on http://www.imsdb.com/scripts/Gravity.html
Found 223 scenes and 44 characters
Great Gatsby, The
Retrieving script on http://www.imsdb.com/scripts/Great-Gatsby,-The.html
Found 218 scenes and 58 characters
Green Mile, The
Retrieving script on http://www.imsdb.com/scripts/Green-Mile,-The.html
Found 201 scenes and 45 characters
Gremlins
Retrieving script on http://www.imsdb.com/scripts/Gremlins.html
Found 458 scenes and 440 characters
Gremlins 2
Retrieving script on http://www.imsdb.com/scripts/Gremlins-2.html
Found 712 scenes and 608 characters
Grifters, The
Retrieving script on http://www.imsdb.com/scripts/Grifters,-The.html
Found 165 scenes and 51 characters
Grosse Poin

Found 151 scenes and 40 characters
How to Lose Friends & Alienate People
Retrieving script on http://www.imsdb.com/scripts/How-to-Lose-Friends-&-Alienate-People.html
Found 209 scenes and 176 characters
How to Train Your Dragon
Retrieving script on http://www.imsdb.com/scripts/How-to-Train-Your-Dragon.html
Found 356 scenes and 131 characters
How to Train Your Dragon 2
Retrieving script on http://www.imsdb.com/scripts/How-to-Train-Your-Dragon-2.html
Found 88 scenes and 79 characters
Hudson Hawk
Retrieving script on http://www.imsdb.com/scripts/Hudson-Hawk.html
Found 210 scenes and 40 characters
Hudsucker Proxy, The
Retrieving script on http://www.imsdb.com/scripts/Hudsucker-Proxy,-The.html
Found 279 scenes and 117 characters
Human Nature
Retrieving script on http://www.imsdb.com/scripts/Human-Nature.html
Found 163 scenes and 34 characters
Hunt for Red October, The
Retrieving script on http://www.imsdb.com/scripts/Hunt-for-Red-October,-The.html
Found 238 scenes and 418 characters
Hurt Loc

Found 278 scenes and 408 characters
Juno
Retrieving script on http://www.imsdb.com/scripts/Juno.html
Found 127 scenes and 32 characters
Jurassic Park
Retrieving script on http://www.imsdb.com/scripts/Jurassic-Park.html
Found 176 scenes and 57 characters
Jurassic Park III
Retrieving script on http://www.imsdb.com/scripts/Jurassic-Park-III.html
Found 128 scenes and 45 characters
Jurassic Park: The Lost World
Retrieving script on http://www.imsdb.com/scripts/Jurassic-Park-The-Lost-World.html
Found 238 scenes and 36 characters
Kafka
Retrieving script on http://www.imsdb.com/scripts/Kafka.html
Found 230 scenes and 37 characters
Kalifornia
Retrieving script on http://www.imsdb.com/scripts/Kalifornia.html
Found 372 scenes and 38 characters
Kate & Leopold
Retrieving script on http://www.imsdb.com/scripts/Kate-&-Leopold.html
Found 140 scenes and 38 characters
Kids
Retrieving script on http://www.imsdb.com/scripts/Kids.html
Couldn't find correct leading spaces
Kids Are All Right, The
Retrieving 

Retrieving script on http://www.imsdb.com/scripts/Lord-of-Illusions.html
Found 225 scenes and 64 characters
Lord of the Rings: Fellowship of the Ring, The
Retrieving script on http://www.imsdb.com/scripts/Lord-of-the-Rings-Fellowship-of-the-Ring,-The.html
Found 328 scenes and 62 characters
Lord of the Rings: Return of the King
Retrieving script on http://www.imsdb.com/scripts/Lord-of-the-Rings-Return-of-the-King.html
Couldn't find correct leading spaces
Lord of the Rings: The Two Towers
Retrieving script on http://www.imsdb.com/scripts/Lord-of-the-Rings-The-Two-Towers.html
Found 9 scenes and 69 characters
Lord of War
Retrieving script on http://www.imsdb.com/scripts/Lord-of-War.html
Found 182 scenes and 64 characters
Losers, The
Retrieving script on http://www.imsdb.com/scripts/Losers,-The.html
Found 177 scenes and 131 characters
Lost Highway
Retrieving script on http://www.imsdb.com/scripts/Lost-Highway.html
Found 197 scenes and 73 characters
Lost Horizon
Retrieving script on http://w

Retrieving script on http://www.imsdb.com/scripts/Mirrors.html
Couldn't find correct leading spaces
Misery
Retrieving script on http://www.imsdb.com/scripts/Misery.html
Found 423 scenes and 18 characters
Mission Impossible
Retrieving script on http://www.imsdb.com/scripts/Mission-Impossible.html
Found 272 scenes and 58 characters
Mission Impossible II
Retrieving script on http://www.imsdb.com/scripts/Mission-Impossible-II.html
Found 277 scenes and 45 characters
Mission to Mars
Retrieving script on http://www.imsdb.com/scripts/Mission-to-Mars.html
Found 467 scenes and 31 characters
Moneyball
Retrieving script on http://www.imsdb.com/scripts/Moneyball.html
Found 478 scenes and 191 characters
Monkeybone
Retrieving script on http://www.imsdb.com/scripts/Monkeybone.html
Found 180 scenes and 79 characters
Monster's Ball
Monster's Ball Script.html is a pdf :(
Monte Carlo
Retrieving script on http://www.imsdb.com/scripts/Monte-Carlo.html
Found 165 scenes and 243 characters
Moon
Retrieving scri

Retrieving script on http://www.imsdb.com/scripts/Orgy-of-the-Dead.html
Found 39 scenes and 11 characters
Orphan
Retrieving script on http://www.imsdb.com/scripts/Orphan.html
Found 502 scenes and 530 characters
Other Boleyn Girl, The
Retrieving script on http://www.imsdb.com/scripts/Other-Boleyn-Girl,-The.html
Found 267 scenes and 32 characters
Out of Sight
Retrieving script on http://www.imsdb.com/scripts/Out-of-Sight.html
Found 212 scenes and 92 characters
Outbreak
Outbreak Script.html has no script :(
Pacifier, The
Retrieving script on http://www.imsdb.com/scripts/Pacifier,-The.html
Found 265 scenes and 81 characters
Pandorum
Retrieving script on http://www.imsdb.com/scripts/Pandorum.html
Found 398 scenes and 151 characters
Panic Room
Retrieving script on http://www.imsdb.com/scripts/Panic-Room.html
Found 293 scenes and 18 characters
Papadopoulos & Sons
Retrieving script on http://www.imsdb.com/scripts/Papadopoulos-&-Sons.html
Found 134 scenes and 32 characters
ParaNorman
Retrieving

Found 346 scenes and 45 characters
Reader, The
Retrieving script on http://www.imsdb.com/scripts/Reader,-The.html
Found 201 scenes and 36 characters
Real Genius
Retrieving script on http://www.imsdb.com/scripts/Real-Genius.html
error getting blocks
Rear Window
Retrieving script on http://www.imsdb.com/scripts/Rear-Window.html
Found 487 scenes and 21 characters
Rebel Without A Cause
Retrieving script on http://www.imsdb.com/scripts/Rebel-Without-A-Cause.html
Found 17 scenes and 51 characters
Red Planet
Retrieving script on http://www.imsdb.com/scripts/Red-Planet.html
Couldn't find correct leading spaces
Red Riding Hood
Retrieving script on http://www.imsdb.com/scripts/Red-Riding-Hood.html
Found 374 scenes and 51 characters
Reindeer Games
Retrieving script on http://www.imsdb.com/scripts/Reindeer-Games.html
Found 224 scenes and 43 characters
Relic, The
Retrieving script on http://www.imsdb.com/scripts/Relic,-The.html
Found 163 scenes and 56 characters
Remember Me
Retrieving script on htt

Retrieving script on http://www.imsdb.com/scripts/Shame.html
Found 189 scenes and 29 characters
Shampoo
Retrieving script on http://www.imsdb.com/scripts/Shampoo.html
Found 140 scenes and 73 characters
Shawshank Redemption, The
Retrieving script on http://www.imsdb.com/scripts/Shawshank-Redemption,-The.html
Found 288 scenes and 62 characters
She's Out of My League
Retrieving script on http://www.imsdb.com/scripts/She's-Out-of-My-League.html
Found 90 scenes and 82 characters
Sherlock Holmes
Retrieving script on http://www.imsdb.com/scripts/Sherlock-Holmes.html
Found 320 scenes and 34 characters
Shifty
Retrieving script on http://www.imsdb.com/scripts/Shifty.html
Found 204 scenes and 29 characters
Shining, The
Retrieving script on http://www.imsdb.com/scripts/Shining,-The.html
Found 656 scenes and 60 characters
Shipping News, The
Retrieving script on http://www.imsdb.com/scripts/Shipping-News,-The.html
Found 201 scenes and 28 characters
Shivers
Retrieving script on http://www.imsdb.com/s

Found 110 scenes and 32 characters
Stepmom
Retrieving script on http://www.imsdb.com/scripts/Stepmom.html
Found 114 scenes and 32 characters
Sting, The
Retrieving script on http://www.imsdb.com/scripts/Sting,-The.html
Found 201 scenes and 52 characters
Stir of Echoes
Retrieving script on http://www.imsdb.com/scripts/Stir-of-Echoes.html
Found 511 scenes and 328 characters
Storytelling
Retrieving script on http://www.imsdb.com/scripts/Storytelling.html
Found 82 scenes and 42 characters
Straight Outta Compton
Retrieving script on http://www.imsdb.com/scripts/Straight-Outta-Compton.html
Found 281 scenes and 70 characters
Strange Days
Retrieving script on http://www.imsdb.com/scripts/Strange-Days.html
Found 118 scenes and 57 characters
Strangers on a Train
Retrieving script on http://www.imsdb.com/scripts/Strangers-on-a-Train.html
Couldn't find correct leading spaces
Stuntman, The
Retrieving script on http://www.imsdb.com/scripts/Stuntman,-The.html
Found 222 scenes and 31 characters
Sugar
R

Retrieving script on http://www.imsdb.com/scripts/Total-Recall.html
Found 146 scenes and 71 characters
Tourist, The
Retrieving script on http://www.imsdb.com/scripts/Tourist,-The.html
Found 183 scenes and 45 characters
Toy Story
Retrieving script on http://www.imsdb.com/scripts/Toy-Story.html
Found 239 scenes and 73 characters
Traffic
Retrieving script on http://www.imsdb.com/scripts/Traffic.html
Found 343 scenes and 109 characters
Training Day
Retrieving script on http://www.imsdb.com/scripts/Training-Day.html
Couldn't find correct leading spaces
Trainspotting
Retrieving script on http://www.imsdb.com/scripts/Trainspotting.html
Found 239 scenes and 10 characters
Transformers: The Movie
Retrieving script on http://www.imsdb.com/scripts/Transformers-The-Movie.html
Couldn't find correct leading spaces
Tremors
Retrieving script on http://www.imsdb.com/scripts/Tremors.html
Found 176 scenes and 28 characters
Tristan and Isolde
Retrieving script on http://www.imsdb.com/scripts/Tristan-and-Is

Found 643 scenes and 753 characters
Wild Hogs
Retrieving script on http://www.imsdb.com/scripts/Wild-Hogs.html
Found 125 scenes and 147 characters
Wild Things
Retrieving script on http://www.imsdb.com/scripts/Wild-Things.html
Found 223 scenes and 67 characters
Wild Things: Diamonds in the Rough
Retrieving script on http://www.imsdb.com/scripts/Wild-Things-Diamonds-in-the-Rough.html
Found 201 scenes and 51 characters
Wild Wild West
Retrieving script on http://www.imsdb.com/scripts/Wild-Wild-West.html
Couldn't find correct leading spaces
Willow
Retrieving script on http://www.imsdb.com/scripts/Willow.html
Found 211 scenes and 14 characters
Win Win
Retrieving script on http://www.imsdb.com/scripts/Win-Win.html
Found 163 scenes and 75 characters
Wind Chill
Retrieving script on http://www.imsdb.com/scripts/Wind-Chill.html
Found 199 scenes and 11 characters
Withnail and I
Retrieving script on http://www.imsdb.com/scripts/Withnail-and-I.html
Found 71 scenes and 2 characters
Witness
Retrieving

In [39]:
import sys
sys.getrecursionlimit()

3000