
## Extract the 30 best movies from the action, comedy, drama and horror genres on FilmAffinity, as well as movies from 1980 onwards.

In [10]:
import requests
from bs4 import BeautifulSoup

# Empty list to store the information about the movies
all_movies = []

# Update the genres with the correct codes
genres = {'AC': 'action', 'CO': 'comedy', 'DR': 'drama', 'TE': 'horror'}  # TE for horror

# Iterate over each genre
for genre_code, genre_name in genres.items():
    # Build the URL for each genre
    genre_url = f'https://www.filmaffinity.com/us/topgen.php?genres=%2B{genre_code}&chv=0&orderby=rc&movietype=movie%7C&country=&fromyear=1980&toyear=2023&ratingcount=2&runtimemin=0&runtimemax=4'
    print(f"Fetching data for genre: {genre_name} from URL: {genre_url}")

    try:
        # Make a GET request to the genre URL and get the page
        req = requests.get(genre_url)
        req.raise_for_status()

        # Create a BeautifulSoup object to parse the HTML
        soup = BeautifulSoup(req.text, 'html.parser')

        # Find the parent container for each movie entry
        movie_cards = soup.select('.movie-card')

        if not movie_cards:
            print(f"No movies found for genre: {genre_name}.")
            continue

        # Process each movie
        for movie_container in movie_cards:
            # Extract the title
            title_element = movie_container.select_one('.mc-title')
            title = title_element.text.strip() if title_element else "Unknown Title"

            # Extract the year
            year_element = movie_container.select_one('.mc-year')
            year = year_element.text.strip() if year_element else "Unknown Year"

            # Extract the actors
            cast_div = movie_container.select_one('.mc-cast')
            actors = [actor.text.strip() for actor in cast_div.select('.nb') if actor.a] if cast_div else []

            # Add the movie information to the list of movies
            all_movies.append({
                "title": title,
                "year": year,
                "genre": genre_name,
                "actors": ", ".join(actors)
            })
    
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data for genre {genre_name}: {e}")

# Display the complete list of collected movies
for i, movie in enumerate(all_movies, 1):
    print(f"{i}. {movie}")


Fetching data for genre: action from URL: https://www.filmaffinity.com/us/topgen.php?genres=%2BAC&chv=0&orderby=rc&movietype=movie%7C&country=&fromyear=1980&toyear=2023&ratingcount=2&runtimemin=0&runtimemax=4
Fetching data for genre: comedy from URL: https://www.filmaffinity.com/us/topgen.php?genres=%2BCO&chv=0&orderby=rc&movietype=movie%7C&country=&fromyear=1980&toyear=2023&ratingcount=2&runtimemin=0&runtimemax=4
Fetching data for genre: drama from URL: https://www.filmaffinity.com/us/topgen.php?genres=%2BDR&chv=0&orderby=rc&movietype=movie%7C&country=&fromyear=1980&toyear=2023&ratingcount=2&runtimemin=0&runtimemax=4
Fetching data for genre: horror from URL: https://www.filmaffinity.com/us/topgen.php?genres=%2BTE&chv=0&orderby=rc&movietype=movie%7C&country=&fromyear=1980&toyear=2023&ratingcount=2&runtimemin=0&runtimemax=4
1. {'title': 'The Matrix', 'year': '1999', 'genre': 'action', 'actors': 'Keanu Reeves,, Laurence Fishburne,, Carrie-Anne Moss,, Joe Pantoliano,, Hugo Weaving'}
2. {'

## Save the list of all movies as a csv file.

In [11]:
import csv

# Specify the filename
filename = "movies.csv"

# Define the header based on the dictionary keys in all_movies
header = ["title", "year", "genre", "actors"]

# Write the data to a CSV file
with open(filename, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=header)
    writer.writeheader()  # Write the header
    writer.writerows(all_movies)  # Write each movie as a row

print(f"Data successfully written to {filename}")


Data successfully written to movies.csv
