In [9]:
# Write a Python script to download IMDB 250 Top Rated Titles.
# For each movie, you'll need to retrieve the movie title, the ranking, the initial release year, the casting and 
# the rating.
# Your data must be stored in a proper imdb_top_250.csv file.

import requests, csv
from bs4 import BeautifulSoup
import re

def get_movies(url):
    response = requests.get(url)
    soup_imdb = BeautifulSoup(response.text)

    # Getting all the values in raw form
    all_movies = soup_imdb.select('td.titleColumn')
    crew = [a.attrs.get('title') for a in soup_imdb.select('td.titleColumn a')]
    movie_ratings = [b.attrs.get('data-value')
           for b in soup_imdb.select('td.posterColumn span[name=ir]')]
 
    # create a empty lists for storing movie information
    titles = []
    rankings = []
    years = []
    castings = []
    ratings = []
    
 
    # Iterating over movies to extract each movie's details
    for index in range(0, len(all_movies)):

        # refining raw data
        movie_string = all_movies[index].get_text()
        movie = (' '.join(movie_string.split()).replace('.', ''))
        title = movie[len(str(index))+1:-7]
        year = re.search('\((.*?)\)', movie_string).group(1)
        rank = movie[:len(str(index))-(len(movie))]
        
        # appending required values into lists
        titles.append(title)
        rankings.append(rank)
        years.append(year)
        castings.append(crew[index])
        ratings.append(movie_ratings[index])
        
        
    mega_list = list(zip(titles, rankings, years, castings, ratings))
    mega_list = [list(elt) for elt in mega_list]

    # Write csv
    str_lst = [f"{mega_list[i][1]} / {mega_list[i][0]} ({mega_list[i][2]}) / Starring: {mega_list[i][3]}" \
    for i, val in enumerate(mega_list)]

    res = [elt.split("/") for elt in str_lst]

    with open("imdb_top_250.csv", "w") as f:
        writer = csv.writer(f, delimiter="-")
        writer.writerows(res)

In [11]:
def main():
    url = "https://www.imdb.com/chart/top"
    get_movies(url)

if __name__ == "__main__":
    main()

In [14]:
import pandas as pd

#Read CSV and show the first 5 rows
imdb = pd.read_csv('imdb_top_250.csv', delimiter='-',encoding='latin-1')
imdb.head(5)

Unnamed: 0,1,The Shawshank Redemption (1994),"Starring: Frank Darabont (dir.), Tim Robbins, Morgan Freeman"
0,2,The Godfather (1972),"Starring: Francis Ford Coppola (dir.), Marlon..."
1,3,The Dark Knight (2008),"Starring: Christopher Nolan (dir.), Christian..."
2,4,The Godfather: Part II (1974),"Starring: Francis Ford Coppola (dir.), Al Pac..."
3,5,12 Angry Men (1957),"Starring: Sidney Lumet (dir.), Henry Fonda, L..."
4,6,Schindler's List (1993),"Starring: Steven Spielberg (dir.), Liam Neeso..."
