In [2]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
import pandas as pd
import time

from bs4 import BeautifulSoup
import requests

#===============================================================================================================

#============================================= SELENIUM ==================================================

driver = webdriver.Chrome()
driver.get('https://www.imdb.com/')
driver.maximize_window()


dropdown = driver.find_element(By.CLASS_NAME, 'ipc-icon--arrow-drop-down')
dropdown.click()

# make selenium wait for 2 seconds before clicking on the advanced search btn
time.sleep(2)

# advanced search logic
element = driver.find_element(By.LINK_TEXT, 'Advanced Search')
element.click()

# click on the advanced title search
adv_title = driver.find_element(By.LINK_TEXT, 'Advanced Title Search')
adv_title.click()

# select feature film
feature_film = driver.find_element(By.ID, 'title_type-1')
feature_film.click()

# select tv_movie
tv_movie = driver.find_element(By.ID, 'title_type-2')
tv_movie.click()

# input min date
min_date = driver.find_element(By.NAME, 'release_date-min')
min_date.click()
min_date.send_keys('1990')

# input max date
max_date = driver.find_element(By.NAME, 'release_date-max')
max_date.click()
max_date.send_keys('2022')

# rating min
rating_min = driver.find_element(By.NAME, 'user_rating-min')
rating_min.click()
dropdown_2 = Select(rating_min)
dropdown_2.select_by_visible_text('1.0')

# rating max
rating_max = driver.find_element(By.NAME, 'user_rating-max')
rating_max.click()
dropdown_3 = Select(rating_max)
dropdown_3.select_by_visible_text('10')

# oscar nominated 
oscar_nominated = driver.find_element(By.ID, 'groups-7')
oscar_nominated.click()

# select color
color = driver.find_element(By.ID, 'colors-1')
color.click()

# select language
language = driver.find_element(By.NAME, 'languages')
dropdown_4 = Select(language)
dropdown_4.select_by_visible_text('English')

# select page's result
results_count = driver.find_element(By.ID, 'search-count')
dropdown_5 = Select(results_count)
dropdown_5.select_by_index(2)

# click on the search btn

search_btn = driver.find_element(By.XPATH, '(//button[@type="submit"])[2]')
search_btn.click()

# current
current_url = driver.current_url

time.sleep(5)
#===============================================================================================================

#============================================= BEAUTIFUL SOUP ==================================================


# create an empty list to store the data for all pages
all_data = []

# create a loop to go through all the pages
for page_num in range(1, 5): # replace "5" with the number of pages you want to scrape
    # modify the URL to include the "&start=" parameter with the appropriate value
    time.sleep(5)
    url = current_url + "&start=" + str((page_num - 1) * 250)
    print(url)

    # get request
    response = requests.get(url)

    # soup object
    soup = BeautifulSoup(response.content, 'html.parser')

    list_items = soup.find_all('div', {'class':'lister-item'})

    # get all the titles
    movie_title = [result.find('h3').find('a').text for result in list_items]

    # get all the  years
    year = [result.find('h3').find('span', {'class':'lister-item-year'}).text.replace('(', '').replace(')', '') for result in list_items]

    # get all the genres
    genre = [result.find('span', {'class':'genre'}).text.replace('\n', '').strip() for result in list_items]

    # get all the ratings
    rating = [result.find('div', {'class':'ratings-imdb-rating'}).find('strong').text for result in list_items]

    # get all the descriptions
    description = [result.find('div', {'class':'lister-item-content'}).find_all('p')[1].text.strip() for result in list_items]

    # get all the durations
    duration = [result.find('span', {'class':'runtime'}).text for result in list_items]

    # get all the directors
    director = [result.find('p', {'class': ''}).find('a').text for result in list_items]

    # get all the stars
    cast = [result.find('p', {'class': ''}).find_all('a')[1:] for result in list_items]

    # get all the grosses
    gross = [result.find('span', {'name':'nv'}).text for result in list_items]

    image_urls = [img_tag.attrs['loadlate'] for result in list_items
                  for img_div in [result.find('div', {'class': 'lister-item-image'})]
                  for img_tag in [img_div.find('img')]
                  if img_div and img_tag and 'loadlate' in img_tag.attrs
                  and img_tag.attrs['loadlate'].endswith('.jpg')]

    # pandas
    imdb_df = pd.DataFrame({'Movie title': movie_title, 'Year': year,
                            'Genre': genre, 'Rating': rating,
                            'Description': description, 'Duration': duration,
                            'Director': director, 'Cast': cast, 'Gross': gross,
                            'Images': image_urls})

    all_data.append(imdb_df)

# combine all the dataframes
final_df = pd.concat(all_data)
final_df

https://www.imdb.com/search/title/?title_type=feature,tv_movie&release_date=1990-01-01,2022-12-31&user_rating=1.0,10.0&groups=oscar_nominee&colors=color&languages=en&count=250&start=0
https://www.imdb.com/search/title/?title_type=feature,tv_movie&release_date=1990-01-01,2022-12-31&user_rating=1.0,10.0&groups=oscar_nominee&colors=color&languages=en&count=250&start=250
https://www.imdb.com/search/title/?title_type=feature,tv_movie&release_date=1990-01-01,2022-12-31&user_rating=1.0,10.0&groups=oscar_nominee&colors=color&languages=en&count=250&start=500
https://www.imdb.com/search/title/?title_type=feature,tv_movie&release_date=1990-01-01,2022-12-31&user_rating=1.0,10.0&groups=oscar_nominee&colors=color&languages=en&count=250&start=750


Unnamed: 0,Movie title,Year,Genre,Rating,Description,Duration,Director,Cast,Gross,Images
0,Avatar: O Caminho da Água,2022,"Action, Adventure, Fantasy",7.7,Jake Sully lives with his newfound family form...,192 min,James Cameron,"[[Sam Worthington], [Zoe Saldana], [Sigourney ...",338513,https://m.media-amazon.com/images/M/MV5BYjhiNj...
1,Tudo em Todo o Lugar ao Mesmo Tempo,2022,"Action, Adventure, Comedy",7.9,A middle-aged Chinese immigrant is swept up in...,139 min,Daniel Kwan,"[[Daniel Scheinert], [Michelle Yeoh], [Stephan...",414968,https://m.media-amazon.com/images/M/MV5BYTdiOT...
2,A Baleia,2022,Drama,7.8,"A reclusive, morbidly obese English teacher at...",117 min,Darren Aronofsky,"[[Brendan Fraser], [Sadie Sink], [Ty Simpkins]...",118610,https://m.media-amazon.com/images/M/MV5BZDQ4Nj...
3,Triângulo da Tristeza,2022,"Comedy, Drama",7.4,A fashion model celebrity couple join an event...,147 min,Ruben Östlund,"[[Thobias Thorwid], [Harris Dickinson], [Charl...",118351,https://m.media-amazon.com/images/M/MV5BNDRiZj...
4,Top Gun: Maverick,2022,"Action, Drama",8.3,"After thirty years, Maverick is still pushing ...",130 min,Joseph Kosinski,"[[Tom Cruise], [Jennifer Connelly], [Miles Tel...",561799,https://m.media-amazon.com/images/M/MV5BZWYzOG...
...,...,...,...,...,...,...,...,...,...,...
245,Antes da Chuva,1994,"Drama, Romance, War",7.9,Three interconnected stories of love under the...,113 min,Milcho Manchevski,"[[Katrin Cartlidge], [Rade Serbedzija], [Grégo...",15288,https://m.media-amazon.com/images/M/MV5BMTE4MW...
246,Oscar e Lucinda,1997,"Drama, Romance",6.6,"In mid-1800s England, Oscar is a young Anglica...",132 min,Gillian Armstrong,"[[Ralph Fiennes], [Cate Blanchett], [Ciarán Hi...",7044,https://m.media-amazon.com/images/M/MV5BZjUwOT...
247,O Divo,2008,"Biography, Drama",7.2,The story of Italian politician Giulio Andreot...,110 min,Paolo Sorrentino,"[[Toni Servillo], [Anna Bonaiuto], [Giulio Bos...",18414,https://m.media-amazon.com/images/M/MV5BODJlZD...
248,O Enigma do Colar,2001,"Drama, History, Romance",6.0,"In pre-Revolutionary France, a young aristocra...",118 min,Charles Shyer,"[[Hilary Swank], [Simon Baker], [Jonathan Pryc...",5443,https://m.media-amazon.com/images/M/MV5BMTcxMz...
