In [2]:
import requests
from bs4 import BeautifulSoup

def get_imdb_page(url):
    
    headers = {'Accept-Language': 'en-US,en;q=0.5'}
    response=requests.get(url, headers=headers)
    
    if response.status_code != 200:
        raise Exception(f'Failed to load page {imdb_url}')
    
    doc = BeautifulSoup(response.text, 'html.parser')
    return doc

In [3]:
imdb_url = 'https://www.imdb.com/search/title/?groups=top_250'
doc = get_imdb_page(imdb_url)

In [4]:
def get_movie_titles(doc):
    
    title_selection_class="lister-item-header"
    movie_title_tags=doc.find_all('h3',{'class':title_selection_class})
    movie_titles=[]

    for tag in movie_title_tags:
        title = tag.find('a').text
        movie_titles.append(title)
        
        
    return movie_titles

In [5]:
title = get_movie_titles(doc)

In [6]:
title[:5]

['Top Gun: Maverick',
 'The Godfather',
 'Interstellar',
 'The Shawshank Redemption',
 "Schindler's List"]

In [7]:
def get_movie_rating(doc):
    rating_selector="inline-block ratings-imdb-rating"            
    movie_rating_tags=doc.find_all('div',{'class':rating_selector})
    movie_rating=[]
    for tag in movie_rating_tags:
        movie_rating.append(tag.get_text().strip())
    return movie_rating

In [8]:
rating = get_movie_rating(doc)

In [9]:
rating[:10]

['8.3', '9.2', '8.6', '9.3', '9.0', '8.2', '8.9', '9.0', '8.8', '8.2']

In [10]:
def get_movie_genre(doc):
    genre_selector="genre"            
    movie_genre_tags=doc.find_all('span',{'class':genre_selector})
    movie_genre=[]
    for tag in movie_genre_tags:
        movie_genre.append(tag.get_text().strip())
    return movie_genre

In [11]:
genre = get_movie_genre(doc)

In [12]:
genre[:10]

['Action, Drama',
 'Crime, Drama',
 'Adventure, Drama, Sci-Fi',
 'Drama',
 'Biography, Drama, History',
 'Biography, Comedy, Crime',
 'Crime, Drama',
 'Action, Crime, Drama',
 'Action, Adventure, Sci-Fi',
 'Action, Adventure, Fantasy']

In [13]:
def get_movie_year(doc):
    year_selector = "lister-item-year text-muted unbold"           
    movie_year_tags=doc.find_all('span',{'class':year_selector})
    released_year=[]
    for tag in movie_year_tags:
        released_year.append(tag.get_text().strip()[1:5])
    return released_year

In [14]:
year = get_movie_year(doc)

In [15]:
year[:5]

['2022', '1972', '2014', '1994', '1993']

In [16]:
def get_director(doc):
    
    
    director_tags=doc.find_all('p', class_='')
    directors=[]

    for tag in director_tags:
        if tag.find_next().name=='a':

          directors.append(tag.find_next().text)
        
        
    return directors

In [17]:
director = get_director (doc)

In [18]:
director[:10]

['Joseph Kosinski',
 'Francis Ford Coppola',
 'Christopher Nolan',
 'Frank Darabont',
 'Steven Spielberg',
 'Martin Scorsese',
 'Quentin Tarantino',
 'Christopher Nolan',
 'Christopher Nolan',
 'Jon Watts']

In [19]:
import pandas as pd

In [30]:
def all_pages(num=5):
    movies_dict={
        'titles':[],
        'genre':[],
        
        'rating':[],
        'year':[],
        'director':[]
    }
  
    for i in range(1,num*25,25):
       
        url = 'https://www.imdb.com/search/title/?groups=top_250&start'+str(i)+'&ref_=adv_next'
        doc = get_imdb_page(url)
   
        
        movies_dict['titles'] += get_movie_titles(doc)
        
        
        movies_dict['rating'] += get_movie_rating(doc)
        
        movies_dict['year'] += get_movie_year(doc)
        movies_dict['genre'] += get_movie_genre(doc)  
        movies_dict['director'] += get_director(doc)  
        
    return pd.DataFrame(movies_dict)

In [32]:
movies = all_pages()

In [33]:
movies.to_csv('movies.csv',index=None)

In [34]:
dataframe = pd.read_csv('movies.csv')

In [35]:
dataframe

Unnamed: 0,titles,genre,rating,year,director
0,Top Gun: Maverick,"Action, Drama",8.3,2022,Joseph Kosinski
1,The Godfather,"Crime, Drama",9.2,1972,Francis Ford Coppola
2,Interstellar,"Adventure, Drama, Sci-Fi",8.6,2014,Christopher Nolan
3,The Shawshank Redemption,Drama,9.3,1994,Frank Darabont
4,Schindler's List,"Biography, Drama, History",9.0,1993,Steven Spielberg
...,...,...,...,...,...
245,The Dark Knight Rises,"Action, Drama",8.4,2012,Christopher Nolan
246,Hacksaw Ridge,"Biography, Drama, History",8.1,2016,Mel Gibson
247,The Lord of the Rings: The Return of the King,"Action, Adventure, Drama",9.0,2003,Peter Jackson
248,Taxi Driver,"Crime, Drama",8.2,1976,Martin Scorsese
