In [1]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException

# Setup Chrome driver
driver = webdriver.Chrome()

titles, genres_list, ratings, votes, durations = [], [], [], [], []

# Open first page
url = "https://www.imdb.com/search/title/?title_type=feature&release_date=2024-01-01,2024-12-31&genres=crime" #change only genre name 
driver.get(url)
time.sleep(3)  # Wait for page to load

# Keep clicking "Load More" until it's gone
while True:
    try:
        load_more = driver.find_element(By.XPATH, '//button[.//span[text()="Load More"] or contains(., "Load More")]')
        driver.execute_script("arguments[0].click();", load_more)
        print("Clicked 'Load More' — loading next set...")
        time.sleep(3)  # Wait for new movies to load
    except NoSuchElementException:
        print("✅ No more 'Load More' button — all movies loaded!")
        break
    except ElementClickInterceptedException:
        print("⚠️ Click intercepted — waiting and retrying...")
        time.sleep(2)
        continue

# Now scrape all loaded movies
movie_items = driver.find_elements(By.XPATH, '//li[@class="ipc-metadata-list-summary-item"]')
print(f"Total movies found: {len(movie_items)}")

for movie in movie_items:
    try:
        title = movie.find_element(By.XPATH, './/h3').text

        # Genres
        genre_spans = movie.find_elements(By.XPATH, './/span[@class="ipc-chip__text"]')
        genres = [g.text for g in genre_spans]
        genre = ", ".join(genres) if genres else "crime"

        # Rating
        try:
            rating = movie.find_element(By.XPATH, './/span[contains(@class,"ipc-rating-star--rating")]').text
        except:
            rating = "N/A"

        # Votes
        try:
            vote = movie.find_element(By.XPATH, './/span[contains(@class,"ipc-rating-star--voteCount")]').text
            vote = vote.strip("() ").replace(",", "")
        except:
            vote = "N/A"

        # Duration
        try:
            duration = movie.find_element(By.XPATH, './/span[contains(text(),"h")]').text
        except:
            duration = "N/A"

        titles.append(title)
        genres_list.append(genre)
        ratings.append(rating)
        votes.append(vote)
        durations.append(duration)

    except Exception as e:
        print(f"Skipping movie due to: {e}")

# Save results
df = pd.DataFrame({
    "Title": titles,
    "Genre": genres_list,
    "Rating": ratings,
    "Votes": votes,
    "Duration": durations
})

print(df)
#df.to_csv("imdb_crime_movies_2024_loadmore.csv", index=False)
print("✅ Saved to imdb_crime_movies_2024_loadmore.csv")

driver.quit()


✅ No more 'Load More' button — all movies loaded!
Total movies found: 50
                             Title  Genre Rating Votes Duration
0            1. The Unholy Trinity  crime    5.5  1.7K   1h 35m
1                     2. Riff Raff  crime    5.7    5K   1h 43m
2                     3. The Order  crime    6.8   51K   1h 56m
3                      4. Juror #2  crime    7.0  107K   1h 54m
4                      5. Longlegs  crime    6.6  196K   1h 41m
5                      6. Carry-On  crime    6.5  174K   1h 59m
6                         7. Wolfs  crime    6.5   90K   1h 48m
7                 8. The Beekeeper  crime    6.3  164K   1h 45m
8                          9. Trap  crime    5.8  146K   1h 45m
9                 10. Freaky Tales  crime    6.2  7.9K   1h 47m
10                    11. MaXXXine  crime    6.2   75K   1h 43m
11                 12. The Thicket  crime    5.8  3.9K   1h 48m
12                 13. Rebel Ridge  crime    6.8   92K   2h 11m
13                     14. Abig