In [2]:
import requests
from bs4 import BeautifulSoup
import os
import time
import re

BASE_URL = "https://imsdb.com"
SAVE_DIR = "movie_scripts"
os.makedirs(SAVE_DIR, exist_ok=True)

# Step 1: Fetch all movie links
url = BASE_URL + "/all-scripts.html"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Links are inside <p> tags -> <a href="/Movie Scripts/Avatar Script.html">
script_links = [BASE_URL + a['href'] for a in soup.select("p a") if "Script" in a['href']]

print(f"Found {len(script_links)} movie pages.")

# Step 2: Visit each movie page
for link in script_links:
    try:
        movie_name = link.split("/")[-1].replace(" Script.html", "").replace(" ", "_")

        # Fetch movie page
        movie_page = requests.get(link)
        movie_soup = BeautifulSoup(movie_page.text, "html.parser")

        # The script is linked with "Read Script" anchor
        read_link = movie_soup.find("a", string=re.compile("Read.*Script"))
        if read_link:
            script_url = BASE_URL + read_link['href']
            script_page = requests.get(script_url)
            script_soup = BeautifulSoup(script_page.text, "html.parser")

            # Script content is inside <pre>
            pre_tag = script_soup.find("pre")
            if pre_tag:
                script_text = pre_tag.get_text()

                # Save as text file
                file_path = os.path.join(SAVE_DIR, f"{movie_name}.txt")
                with open(file_path, "w", encoding="utf-8") as f:
                    f.write(script_text)

                print(f"✅ Saved: {movie_name}")
            else:
                print(f"⚠️ Script not found for {movie_name}")
        else:
            print(f"⚠️ No script link for {movie_name}")

        time.sleep(1)  # polite delay
    except Exception as e:
        print(f"❌ Error with {link}: {e}")


Found 1297 movie pages.
✅ Saved: 10_Things_I_Hate_About_You
✅ Saved: 12
✅ Saved: 12_and_Holding
✅ Saved: 12_Monkeys
✅ Saved: 12_Years_a_Slave
✅ Saved: 127_Hours
✅ Saved: 1492:_Conquest_of_Paradise
✅ Saved: 15_Minutes
✅ Saved: 17_Again
✅ Saved: 187
✅ Saved: 2001:_A_Space_Odyssey
✅ Saved: 2012
✅ Saved: 20th_Century_Women
⚠️ Script not found for 25th_Hour
✅ Saved: 28_Days_Later
✅ Saved: 30_Minutes_or_Less
✅ Saved: 42
✅ Saved: 44_Inch_Chest
✅ Saved: 48_Hrs.
✅ Saved: 50-50
✅ Saved: 500_Days_of_Summer
⚠️ Script not found for 8_Mile
✅ Saved: 8MM
✅ Saved: 9
✅ Saved: A_Few_Good_Men
✅ Saved: A_Good_Person
✅ Saved: A_Million_Miles_Away
✅ Saved: A_Most_Violent_Year
✅ Saved: A_Prayer_Before_Dawn
✅ Saved: A_Quiet_Place
✅ Saved: A_Real_Pain
✅ Saved: A_Scanner_Darkly
✅ Saved: A_Serious_Man
⚠️ No script link for A.I.
✅ Saved: Above_the_Law
✅ Saved: Absolute_Power
✅ Saved: Abyss,_The
✅ Saved: Ace_Ventura:_Pet_Detective
✅ Saved: Ad_Astra
✅ Saved: Adaptation
✅ Saved: Addams_Family,_The
✅ Saved: Adjustment