In [1]:
%pip install selenium

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [1]:
# Action movies 

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time

# Initialize WebDriver
driver = webdriver.Chrome()
driver.get("https://www.imdb.com/search/title/?title_type=feature&genres=action&release_date=2024-01-01,2024-12-31")

time.sleep(3)  # Waiting for the page to load

genre = "Action"

# Scroll Until No More New Data Loads
scrolling = True
movies_per_page = 50  # Assuming each load gives 50 movies
total_movies_needed = 500
current_movies = 0

while scrolling and current_movies < total_movies_needed:
    old_page_source = driver.page_source  # Save old page source
    
    # Scroll down to load more data
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)  # Allow time for new data to load
    
    new_page_source = driver.page_source  # Save new page source

    if new_page_source == old_page_source:
        try:
            # Locate and click the "See More" button if present
            see_more_button = driver.find_element(By.XPATH, "//span[contains(@class, 'ipc-see-more')]")
            ActionChains(driver).move_to_element(see_more_button).click().perform()
            time.sleep(2)  # Wait for new content to load
        except Exception:
            scrolling = False  # Stop scrolling if button isn't found
    else:
        current_movies += movies_per_page  # Increment count

# Extract movie containers
movie_blocks = driver.find_elements(By.CSS_SELECTOR, "li.ipc-metadata-list-summary-item")

movies_list = []

for movie in movie_blocks:
    try:
        title = movie.find_element(By.CSS_SELECTOR, "h3.ipc-title__text").text.strip()
    except:
        title = "N/A"

    try:
        duration_element = movie.find_element(By.XPATH, ".//span[contains(@class, 'dli-title-metadata-item') and (contains(text(),'h') or contains(text(),'m'))]")
        duration = duration_element.text.strip() if duration_element.text.strip() else "N/A"
    except:
        duration = "N/A"

    try:
        rating = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--rating").text.strip()
    except:
        rating = "N/A"

    try:
        voting = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--voteCount").text.strip()
    except:
        voting = "N/A"

    movie_data = {
        "Title": title,
        "Genre": genre,
        "Duration": duration,
        "Rating": rating,
        "Voting": voting,
    }
    movies_list.append(movie_data)

# Print results
for movie in movies_list:
    print(movie)

# Close the driver
driver.quit()


{'Title': '1. Dune: Part Two', 'Genre': 'Action', 'Duration': '2h 46m', 'Rating': '8.5', 'Voting': '(610K)'}
{'Title': '2. Venom: The Last Dance', 'Genre': 'Action', 'Duration': '1h 50m', 'Rating': '6.0', 'Voting': '(105K)'}
{'Title': '3. Gladiator II', 'Genre': 'Action', 'Duration': '2h 28m', 'Rating': '6.6', 'Voting': '(213K)'}
{'Title': '4. Sonic the Hedgehog 3', 'Genre': 'Action', 'Duration': '1h 50m', 'Rating': '6.9', 'Voting': '(51K)'}
{'Title': '5. The Lord of the Rings: The War of the Rohirrim', 'Genre': 'Action', 'Duration': '2h 14m', 'Rating': '6.3', 'Voting': '(27K)'}
{'Title': '6. William Tell', 'Genre': 'Action', 'Duration': '2h 13m', 'Rating': '5.7', 'Voting': '(1.4K)'}
{'Title': '7. Kraven the Hunter', 'Genre': 'Action', 'Duration': '2h 7m', 'Rating': '5.4', 'Voting': '(42K)'}
{'Title': '8. Pushpa: The Rule - Part 2', 'Genre': 'Action', 'Duration': '3h 21m', 'Rating': '6.1', 'Voting': '(53K)'}
{'Title': '9. Furiosa: A Mad Max Saga', 'Genre': 'Action', 'Duration': '2h 28m

In [3]:
#common csv converting code

import pandas as pd


df=pd.DataFrame(movies_list)
# Cleaning Steps:
# 1. Remove leading numbers from "Title"
df["Title"] = df["Title"].str.replace(r"^\d+\.\s*", "", regex=True)

# 2. Remove parentheses from "Voting"
df["Voting"] = df["Voting"].str.replace(r"[()]", "", regex=True)

# 3. Reset index to start from 1
df.index = df.index + 1

df.to_csv(r"D:\Guvi Ds\action.csv", index=False, encoding="utf-8")
print("CSV file saved successfully in D:\\Guvi Ds!")



CSV file saved successfully in D:\Guvi Ds!


In [4]:
# Horror movies 

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time

# Initialize WebDriver
driver = webdriver.Chrome()
driver.get("https://www.imdb.com/search/title/?title_type=feature&release_date=2024-01-01,2024-12-31&genres=horror")

time.sleep(3)  # Waiting for the page to load

genre = "Horror"

# Scroll Until No More New Data Loads
scrolling = True
movies_per_page = 50  # Assuming each load gives 50 movies
total_movies_needed = 500
current_movies = 0

while scrolling and current_movies < total_movies_needed:
    old_page_source = driver.page_source  # Save old page source
    
    # Scroll down to load more data
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)  # Allow time for new data to load
    
    new_page_source = driver.page_source  # Save new page source

    if new_page_source == old_page_source:
        try:
            # Locate and click the "See More" button if present
            see_more_button = driver.find_element(By.XPATH, "//span[contains(@class, 'ipc-see-more')]")
            ActionChains(driver).move_to_element(see_more_button).click().perform()
            time.sleep(2)  # Wait for new content to load
        except Exception:
            scrolling = False  # Stop scrolling if button isn't found
    else:
        current_movies += movies_per_page  # Increment count

# Extract movie containers
movie_blocks = driver.find_elements(By.CSS_SELECTOR, "li.ipc-metadata-list-summary-item")

movies_list = []

for movie in movie_blocks:
    try:
        title = movie.find_element(By.CSS_SELECTOR, "h3.ipc-title__text").text.strip()
    except:
        title = "N/A"

    try:
        duration_element = movie.find_element(By.XPATH, ".//span[contains(@class, 'dli-title-metadata-item') and (contains(text(),'h') or contains(text(),'m'))]")
        duration = duration_element.text.strip() if duration_element.text.strip() else "N/A"
    except:
        duration = "N/A"

    try:
        rating = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--rating").text.strip()
    except:
        rating = "N/A"

    try:
        voting = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--voteCount").text.strip()
    except:
        voting = "N/A"

    movie_data = {
        "Title": title,
        "Genre": genre,
        "Duration": duration,
        "Rating": rating,
        "Voting": voting,
    }
    movies_list.append(movie_data)

# Print results
for movie in movies_list:
    print(movie)

# Close the driver
driver.quit()


{'Title': '1. The Substance', 'Genre': 'Horror', 'Duration': '2h 21m', 'Rating': '7.3', 'Voting': '(275K)'}
{'Title': '2. Nosferatu', 'Genre': 'Horror', 'Duration': '2h 12m', 'Rating': '7.3', 'Voting': '(175K)'}
{'Title': '3. Heretic', 'Genre': 'Horror', 'Duration': '1h 51m', 'Rating': '7.0', 'Voting': '(115K)'}
{'Title': '4. Presence', 'Genre': 'Horror', 'Duration': '1h 24m', 'Rating': '6.2', 'Voting': '(16K)'}
{'Title': '5. Longlegs', 'Genre': 'Horror', 'Duration': '1h 41m', 'Rating': '6.6', 'Voting': '(177K)'}
{'Title': '6. Alien: Romulus', 'Genre': 'Horror', 'Duration': '1h 59m', 'Rating': '7.1', 'Voting': '(236K)'}
{'Title': '7. Smile 2', 'Genre': 'Horror', 'Duration': '2h 7m', 'Rating': '6.7', 'Voting': '(96K)'}
{'Title': '8. MaXXXine', 'Genre': 'Horror', 'Duration': '1h 43m', 'Rating': '6.3', 'Voting': '(67K)'}
{'Title': '9. Speak No Evil', 'Genre': 'Horror', 'Duration': '1h 50m', 'Rating': '6.8', 'Voting': '(91K)'}
{'Title': '10. Abigail', 'Genre': 'Horror', 'Duration': '1h 49m

In [None]:
#common csv converting code

import pandas as pd


df=pd.DataFrame(movies_list)

# 1. Remove leading numbers from "Title"
df["Title"] = df["Title"].str.replace(r"^\d+\.\s*", "", regex=True)

# 2. Remove parentheses from "Voting"
df["Voting"] = df["Voting"].str.replace(r"[()]", "", regex=True)

# 3. Reset index to start from 1
df.index = df.index + 1

df.to_csv(r"D:\Guvi Ds\horror.csv", index=False, encoding="utf-8")
print("CSV file saved successfully in D:\\Guvi Ds!")



CSV file saved successfully in D:\Guvi Ds!


In [6]:
# Thriller movies 

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time

# Initialize WebDriver
driver = webdriver.Chrome()
driver.get("https://www.imdb.com/search/title/?title_type=feature&release_date=2024-01-01,2024-12-31&genres=thriller")

time.sleep(3)  # Waiting for the page to load

genre = "Thriller"

# Scroll Until No More New Data Loads
scrolling = True
movies_per_page = 50  # Assuming each load gives 50 movies
total_movies_needed = 500
current_movies = 0

while scrolling and current_movies < total_movies_needed:
    old_page_source = driver.page_source  # Save old page source
    
    # Scroll down to load more data
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)  # Allow time for new data to load
    
    new_page_source = driver.page_source  # Save new page source

    if new_page_source == old_page_source:
        try:
            # Locate and click the "See More" button if present
            see_more_button = driver.find_element(By.XPATH, "//span[contains(@class, 'ipc-see-more')]")
            ActionChains(driver).move_to_element(see_more_button).click().perform()
            time.sleep(2)  # Wait for new content to load
        except Exception:
            scrolling = False  # Stop scrolling if button isn't found
    else:
        current_movies += movies_per_page  # Increment count

# Extract movie containers
movie_blocks = driver.find_elements(By.CSS_SELECTOR, "li.ipc-metadata-list-summary-item")

movies_list = []

for movie in movie_blocks:
    try:
        title = movie.find_element(By.CSS_SELECTOR, "h3.ipc-title__text").text.strip()
    except:
        title = "N/A"

    try:
        duration_element = movie.find_element(By.XPATH, ".//span[contains(@class, 'dli-title-metadata-item') and (contains(text(),'h') or contains(text(),'m'))]")
        duration = duration_element.text.strip() if duration_element.text.strip() else "N/A"
    except:
        duration = "N/A"

    try:
        rating = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--rating").text.strip()
    except:
        rating = "N/A"

    try:
        voting = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--voteCount").text.strip()
    except:
        voting = "N/A"

    movie_data = {
        "Title": title,
        "Genre": genre,
        "Duration": duration,
        "Rating": rating,
        "Voting": voting,
    }
    movies_list.append(movie_data)

# Print results
for movie in movies_list:
    print(movie)

# Close the driver
driver.quit()


{'Title': '1. Emilia Pérez', 'Genre': 'Thriller', 'Duration': '2h 12m', 'Rating': '5.4', 'Voting': '(83K)'}
{'Title': '2. Conclave', 'Genre': 'Thriller', 'Duration': '2h', 'Rating': '7.4', 'Voting': '(127K)'}
{'Title': '3. Heretic', 'Genre': 'Thriller', 'Duration': '1h 51m', 'Rating': '7.0', 'Voting': '(115K)'}
{'Title': '4. Venom: The Last Dance', 'Genre': 'Thriller', 'Duration': '1h 50m', 'Rating': '6.0', 'Voting': '(105K)'}
{'Title': '5. Babygirl', 'Genre': 'Thriller', 'Duration': '1h 54m', 'Rating': '6.0', 'Voting': '(46K)'}
{'Title': '6. September 5', 'Genre': 'Thriller', 'Duration': '1h 35m', 'Rating': '7.1', 'Voting': '(19K)'}
{'Title': '7. Presence', 'Genre': 'Thriller', 'Duration': '1h 24m', 'Rating': '6.2', 'Voting': '(16K)'}
{'Title': '8. The Order', 'Genre': 'Thriller', 'Duration': '1h 56m', 'Rating': '6.8', 'Voting': '(36K)'}
{'Title': '9. Longlegs', 'Genre': 'Thriller', 'Duration': '1h 41m', 'Rating': '6.6', 'Voting': '(177K)'}
{'Title': '10. Kraven the Hunter', 'Genre': 

In [None]:
#common csv converting code

import pandas as pd


df=pd.DataFrame(movies_list)

# 1. Remove leading numbers from "Title"
df["Title"] = df["Title"].str.replace(r"^\d+\.\s*", "", regex=True)

# 2. Remove parentheses from "Voting"
df["Voting"] = df["Voting"].str.replace(r"[()]", "", regex=True)

# 3. Reset index to start from 1
df.index = df.index + 1

df.to_csv(r"D:\Guvi Ds\thriller.csv", index=False, encoding="utf-8")
print("CSV file saved successfully in D:\\Guvi Ds!")



CSV file saved successfully in D:\Guvi Ds!


In [8]:
# Romance movies 

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time

# Initialize WebDriver
driver = webdriver.Chrome()
driver.get("https://www.imdb.com/search/title/?title_type=feature&release_date=2024-01-01,2024-12-31&genres=romance")

time.sleep(3)  # Waiting for the page to load

genre = "Romance"

# Scroll Until No More New Data Loads
scrolling = True
movies_per_page = 50  # Assuming each load gives 50 movies
total_movies_needed = 500
current_movies = 0

while scrolling and current_movies < total_movies_needed:
    old_page_source = driver.page_source  # Save old page source
    
    # Scroll down to load more data
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)  # Allow time for new data to load
    
    new_page_source = driver.page_source  # Save new page source

    if new_page_source == old_page_source:
        try:
            # Locate and click the "See More" button if present
            see_more_button = driver.find_element(By.XPATH, "//span[contains(@class, 'ipc-see-more')]")
            ActionChains(driver).move_to_element(see_more_button).click().perform()
            time.sleep(2)  # Wait for new content to load
        except Exception:
            scrolling = False  # Stop scrolling if button isn't found
    else:
        current_movies += movies_per_page  # Increment count

# Extract movie containers
movie_blocks = driver.find_elements(By.CSS_SELECTOR, "li.ipc-metadata-list-summary-item")

movies_list = []

for movie in movie_blocks:
    try:
        title = movie.find_element(By.CSS_SELECTOR, "h3.ipc-title__text").text.strip()
    except:
        title = "N/A"

    try:
        duration_element = movie.find_element(By.XPATH, ".//span[contains(@class, 'dli-title-metadata-item') and (contains(text(),'h') or contains(text(),'m'))]")
        duration = duration_element.text.strip() if duration_element.text.strip() else "N/A"
    except:
        duration = "N/A"

    try:
        rating = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--rating").text.strip()
    except:
        rating = "N/A"

    try:
        voting = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--voteCount").text.strip()
    except:
        voting = "N/A"

    movie_data = {
        "Title": title,
        "Genre": genre,
        "Duration": duration,
        "Rating": rating,
        "Voting": voting,
    }
    movies_list.append(movie_data)

# Print results
for movie in movies_list:
    print(movie)

# Close the driver
driver.quit()


{'Title': '1. Anora', 'Genre': 'Romance', 'Duration': '2h 19m', 'Rating': '7.6', 'Voting': '(141K)'}
{'Title': '2. Wicked', 'Genre': 'Romance', 'Duration': '2h 40m', 'Rating': '7.5', 'Voting': '(139K)'}
{'Title': '3. Babygirl', 'Genre': 'Romance', 'Duration': '1h 54m', 'Rating': '6.0', 'Voting': '(46K)'}
{'Title': '4. It Ends with Us', 'Genre': 'Romance', 'Duration': '2h 10m', 'Rating': '6.4', 'Voting': '(80K)'}
{'Title': '5. We Live in Time', 'Genre': 'Romance', 'Duration': '1h 48m', 'Rating': '7.0', 'Voting': '(47K)'}
{'Title': '6. Challengers', 'Genre': 'Romance', 'Duration': '2h 11m', 'Rating': '7.1', 'Voting': '(150K)'}
{'Title': '7. The Count of Monte-Cristo', 'Genre': 'Romance', 'Duration': '2h 58m', 'Rating': '7.6', 'Voting': '(32K)'}
{'Title': '8. Queer', 'Genre': 'Romance', 'Duration': '2h 17m', 'Rating': '6.5', 'Voting': '(17K)'}
{'Title': '9. The Crow', 'Genre': 'Romance', 'Duration': '1h 51m', 'Rating': '4.7', 'Voting': '(32K)'}
{'Title': '10. The Fall Guy', 'Genre': 'Roma

In [None]:
#common csv converting code

import pandas as pd


df=pd.DataFrame(movies_list)

# 1. Remove leading numbers from "Title"
df["Title"] = df["Title"].str.replace(r"^\d+\.\s*", "", regex=True)

# 2. Remove parentheses from "Voting"
df["Voting"] = df["Voting"].str.replace(r"[()]", "", regex=True)

# 3. Reset index to start from 1
df.index = df.index + 1

df.to_csv(r"D:\Guvi Ds\romance.csv", index=False, encoding="utf-8")
print("CSV file saved successfully in D:\\Guvi Ds!")



CSV file saved successfully in D:\Guvi Ds!


In [10]:
# Mystery movies 

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time

# Initialize WebDriver
driver = webdriver.Chrome()
driver.get("https://www.imdb.com/search/title/?title_type=feature&release_date=2024-01-01,2024-12-31&genres=mystery")

time.sleep(3)  # Waiting for the page to load

genre = "Mystery"

# Scroll Until No More New Data Loads
scrolling = True
movies_per_page = 50  # Assuming each load gives 50 movies
total_movies_needed = 500
current_movies = 0

while scrolling and current_movies < total_movies_needed:
    old_page_source = driver.page_source  # Save old page source
    
    # Scroll down to load more data
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)  # Allow time for new data to load
    
    new_page_source = driver.page_source  # Save new page source

    if new_page_source == old_page_source:
        try:
            # Locate and click the "See More" button if present
            see_more_button = driver.find_element(By.XPATH, "//span[contains(@class, 'ipc-see-more')]")
            ActionChains(driver).move_to_element(see_more_button).click().perform()
            time.sleep(2)  # Wait for new content to load
        except Exception:
            scrolling = False  # Stop scrolling if button isn't found
    else:
        current_movies += movies_per_page  # Increment count

# Extract movie containers
movie_blocks = driver.find_elements(By.CSS_SELECTOR, "li.ipc-metadata-list-summary-item")

movies_list = []

for movie in movie_blocks:
    try:
        title = movie.find_element(By.CSS_SELECTOR, "h3.ipc-title__text").text.strip()
    except:
        title = "N/A"

    try:
        duration_element = movie.find_element(By.XPATH, ".//span[contains(@class, 'dli-title-metadata-item') and (contains(text(),'h') or contains(text(),'m'))]")
        duration = duration_element.text.strip() if duration_element.text.strip() else "N/A"
    except:
        duration = "N/A"

    try:
        rating = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--rating").text.strip()
    except:
        rating = "N/A"

    try:
        voting = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--voteCount").text.strip()
    except:
        voting = "N/A"

    movie_data = {
        "Title": title,
        "Genre": genre,
        "Duration": duration,
        "Rating": rating,
        "Voting": voting,
    }
    movies_list.append(movie_data)

# Print results
for movie in movies_list:
    print(movie)

# Close the driver
driver.quit()


{'Title': '1. Nosferatu', 'Genre': 'Mystery', 'Duration': '2h 12m', 'Rating': '7.3', 'Voting': '(175K)'}
{'Title': '2. Blink Twice', 'Genre': 'Mystery', 'Duration': '1h 42m', 'Rating': '6.5', 'Voting': '(99K)'}
{'Title': '3. Paddington in Peru', 'Genre': 'Mystery', 'Duration': '1h 46m', 'Rating': '6.7', 'Voting': '(16K)'}
{'Title': '4. Juror #2', 'Genre': 'Mystery', 'Duration': '1h 54m', 'Rating': '7.0', 'Voting': '(90K)'}
{'Title': '5. Smile 2', 'Genre': 'Mystery', 'Duration': '2h 7m', 'Rating': '6.7', 'Voting': '(96K)'}
{'Title': '6. The Watchers', 'Genre': 'Mystery', 'Duration': '1h 42m', 'Rating': '5.7', 'Voting': '(55K)'}
{'Title': '7. Trap', 'Genre': 'Mystery', 'Duration': '1h 45m', 'Rating': '5.8', 'Voting': '(128K)'}
{'Title': '8. F*** Marry Kill', 'Genre': 'Mystery', 'Duration': '1h 37m', 'Rating': '5.2', 'Voting': '(1.6K)'}
{'Title': '9. The Rule of Jenny Pen', 'Genre': 'Mystery', 'Duration': '1h 44m', 'Rating': '6.5', 'Voting': '(533)'}
{'Title': '10. Red One', 'Genre': 'Mys

In [None]:
#common csv converting code

import pandas as pd


df=pd.DataFrame(movies_list)

# 1. Remove leading numbers from "Title"
df["Title"] = df["Title"].str.replace(r"^\d+\.\s*", "", regex=True)

# 2. Remove parentheses from "Voting"
df["Voting"] = df["Voting"].str.replace(r"[()]", "", regex=True)

# 3. Reset index to start from 1
df.index = df.index + 1

df.to_csv(r"D:\Guvi Ds\mystery.csv", index=False, encoding="utf-8")
print("CSV file saved successfully in D:\\Guvi Ds!")



CSV file saved successfully in D:\Guvi Ds!


In [None]:
# Comedy movies 

from selenium import webdriver 
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time

# Initialize WebDriver
driver = webdriver.Chrome()
driver.get("https://www.imdb.com/search/title/?title_type=feature&release_date=2024-01-01,2024-12-31&genres=comedy")

time.sleep(3)  # Waiting for the page to load

genre = "Comedy"

# Scroll Until No More New Data Loads
scrolling = True
movies_per_page = 50  # Assuming each load gives 50 movies
total_movies_needed = 500
current_movies = 0

while scrolling and current_movies < total_movies_needed:
    old_page_source = driver.page_source  # Save old page source
    
    # Scroll down to load more data
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)  # Allow time for new data to load
    
    new_page_source = driver.page_source  # Save new page source

    if new_page_source == old_page_source:
        try:
            # Locate and click the "See More" button if present
            see_more_button = driver.find_element(By.XPATH, "//span[contains(@class, 'ipc-see-more')]")
            ActionChains(driver).move_to_element(see_more_button).click().perform()
            time.sleep(2)  # Wait for new content to load
        except Exception:
            scrolling = False  # Stop scrolling if button isn't found
    else:
        current_movies += movies_per_page  # Increment count

# Extract movie containers
movie_blocks = driver.find_elements(By.CSS_SELECTOR, "li.ipc-metadata-list-summary-item")

movies_list = []

for movie in movie_blocks:
    try:
        title = movie.find_element(By.CSS_SELECTOR, "h3.ipc-title__text").text.strip()
    except:
        title = "N/A"

    try:
        duration_element = movie.find_element(By.XPATH, ".//span[contains(@class, 'dli-title-metadata-item') and (contains(text(),'h') or contains(text(),'m'))]")
        duration = duration_element.text.strip() if duration_element.text.strip() else "N/A"
    except:
        duration = "N/A"

    try:
        rating = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--rating").text.strip()
    except:
        rating = "N/A"

    try:
        voting = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--voteCount").text.strip()
    except:
        voting = "N/A"

    movie_data = {
        "Title": title,
        "Genre": genre,
        "Duration": duration,
        "Rating": rating,
        "Voting": voting,
    }
    movies_list.append(movie_data)

# Print results
for movie in movies_list:
    print(movie)

# Close the driver
driver.quit()


{'Title': '1. Anora', 'Genre': 'Comedy', 'Duration': '2h 19m', 'Rating': '7.6', 'Voting': '(141K)'}
{'Title': '2. Emilia Pérez', 'Genre': 'Comedy', 'Duration': '2h 12m', 'Rating': '5.4', 'Voting': '(83K)'}
{'Title': '3. A Real Pain', 'Genre': 'Comedy', 'Duration': '1h 30m', 'Rating': '7.1', 'Voting': '(73K)'}
{'Title': '4. Sonic the Hedgehog 3', 'Genre': 'Comedy', 'Duration': '1h 50m', 'Rating': '6.9', 'Voting': '(51K)'}
{'Title': '5. Riff Raff', 'Genre': 'Comedy', 'Duration': '1h 43m', 'Rating': '6.3', 'Voting': '(754)'}
{'Title': '6. Despicable Me 4', 'Genre': 'Comedy', 'Duration': '1h 34m', 'Rating': '6.2', 'Voting': '(62K)'}
{'Title': '7. Borderlands', 'Genre': 'Comedy', 'Duration': '1h 41m', 'Rating': '4.7', 'Voting': '(44K)'}
{'Title': '8. Paddington in Peru', 'Genre': 'Comedy', 'Duration': '1h 46m', 'Rating': '6.7', 'Voting': '(16K)'}
{'Title': '9. Saturday Night', 'Genre': 'Comedy', 'Duration': '1h 49m', 'Rating': '6.9', 'Voting': '(28K)'}
{'Title': '10. Deadpool & Wolverine', 

In [None]:
#common csv converting code

import pandas as pd


df=pd.DataFrame(movies_list)

# 1. Remove leading numbers from "Title"
df["Title"] = df["Title"].str.replace(r"^\d+\.\s*", "", regex=True)

# 2. Remove parentheses from "Voting"
df["Voting"] = df["Voting"].str.replace(r"[()]", "", regex=True)

# 3. Reset index to start from 1
df.index = df.index + 1

df.to_csv(r"D:\Guvi Ds\comedy.csv", index=False, encoding="utf-8")
print("CSV file saved successfully in D:\\Guvi Ds!")



CSV file saved successfully in D:\Guvi Ds!
