In [None]:
import requests
from bs4 import BeautifulSoup
import re
import time
import logging

# Configure logging for detailed error tracking
logging.basicConfig(level=logging.DEBUG)

class IMDbGenreScraper:
    URLS = {
        "Drama": 'https://www.imdb.com/search/title/?title_type=feature&genres=drama',
        "Action": 'https://www.imdb.com/search/title/?title_type=feature&genres=action',
        "Comedy": 'https://www.imdb.com/search/title/?title_type=feature&genres=comedy',
        "Horror": 'https://www.imdb.com/search/title/?title_type=feature&genres=horror',
        "Crime": 'https://www.imdb.com/search/title/?title_type=feature&genres=crime',
    }

    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Referer': 'https://www.imdb.com/',
            'DNT': '1',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1'
        }

    def scrape_movies_by_genre(self, genre):
        url = self.URLS.get(genre.capitalize())
        if not url:
            print("Invalid genre. Choose from: Drama, Action, Comedy, Horror, Crime")
            return []

        try:
            # Add more robust request handling
            response = requests.get(url, headers=self.headers, timeout=15)
            response.raise_for_status()

            # Save raw HTML for debugging
            with open('debug_response.html', 'w', encoding='utf-8') as f:
                f.write(response.text)

            # Use more flexible parsing
            soup = BeautifulSoup(response.text, 'html.parser')

            # Multiple strategies for finding titles
            strategies = [
                lambda: [a.text for a in soup.find_all('a', href=re.compile(r'/title/tt\d+/'))],
                lambda: [title.text for title in soup.find_all('h3', class_='lister-item-header')],
                lambda: [div.find('a').text for div in soup.find_all('div', class_='lister-item-content') if div.find('a')]
            ]

            # Try each strategy
            for strategy in strategies:
                titles = strategy()
                if titles:
                    print(f"Found {len(titles)} titles using this strategy")
                    return list(dict.fromkeys(titles))  # Remove duplicates while preserving order

            print("No titles found using any strategy")
            return []

        except Exception as e:
            print(f"Error scraping movies: {e}")
            return []

    def display_movies(self, genre):
        movie_titles = self.scrape_movies_by_genre(genre)

        if not movie_titles:
            print("Debugging Information:")
            print("1. Check your internet connection")
            print("2. Verify the genre spelling")
            print("3. IMDb might be blocking the request")
            print("4. Check 'debug_response.html' for raw HTML")
            return

        max_titles = min(21, len(movie_titles))
        print(f"\n=== Top {max_titles -1} {genre.upper()} Movies ===")
        for i, title in enumerate(movie_titles[:max_titles], 1):
            print(f"{title}")

def main():
    scraper = IMDbGenreScraper()
    genre = input("Enter the genre (Drama/Action/Comedy/Horror/Crime): ").strip()
    scraper.display_movies(genre)

if __name__ == '__main__':
    main()

Enter the genre (Drama/Action/Comedy/Horror/Crime): Horror
Found 50 titles using this strategy

=== Top 20 HORROR Movies ===

1. Nosferatu
2. The Substance
3. Wolf Man
4. Alien: Romulus
5. Heretic
6. Longlegs
7. Peter Pan's Neverland Nightmare
8. Get Away
9. Nosferatu: A Symphony of Horror
10. Eraserhead
11. Nightbitch
12. Smile 2
13. Presence
14. The Woman in the Yard
15. Speak No Evil
16. Bram Stoker's Dracula
17. 28 Years Later
18. Hereditary
19. Beetlejuice Beetlejuice
20. The Monkey
