#**Import Libraries**

In [1]:
import os
import time
import requests
import pandas as pd
from bs4 import BeautifulSoup

#**Functions**

##Convert Array to String

In [None]:
def convertArrToString(arr):
    string = ""
    if len(arr) != 1:
        for i in arr:
            string += i
            string += ", "
    else:
        string = arr[0]
    return string

##Create CSV File

In [None]:
def create_empty_csv_file(file_path):
    """
    Create an empty CSV file with row headers.
    The row headers are: Film Title, IMDB Rating, Release Year, Genre, Director, Movie Image, Stars
    Parameters:
    - file_path: The file path where the CSV file will be saved.
    """
    # Ensure the folder exists, create it if it doesn't
    folder_path = os.path.dirname(file_path)
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    # Create an empty DataFrame with the specified headers
    headers = [
        "Film Title",
        "IMDb Rating",
        "Release Year",
        "Genre",
        "Director",
        "Movie Image",
        "Stars",
    ]
    df = pd.DataFrame(columns=headers)
    # Save to CSV file
    df.to_csv(file_path, index=False)
    print(f"Empty CSV file '{file_path}' created successfully.")


# Example usage:
# csv_file_path = "empty_evaluation_metrics.csv"
# create_empty_csv_file(csv_file_path)

##Append to CSV File

In [None]:
def append_to_csv(
    csv_file_path, filmTitle, rating, releaseYear, genre, director, movieImg, stars
):
    data = {
        "Film Title": [filmTitle],
        "IMDb Rating": [rating],
        "Release Year": [releaseYear],
        "Genre": [genre],
        "Director": [director],
        "Movie Image": [movieImg],
        "Stars": [stars],
    }
    df = pd.DataFrame(data)
    df.to_csv(csv_file_path, mode="a", header=False, index=False)

#**Parameters**

In [None]:
filePath = "/content"
URL_list = "https://m.imdb.com/chart/top/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
}
csv_file_path = os.path.join(filePath, "results.csv")
create_empty_csv_file(csv_file_path)

Empty CSV file '/content/results.csv' created successfully.


#**Connect with IMDB**

In [None]:
r = requests.get(URL_list, headers=headers)

#**Get Data**

In [None]:
soup = BeautifulSoup(r.text , 'html.parser')

In [None]:
linkTags = soup.find_all('a', class_='ipc-lockup-overlay ipc-focusable')
links = [tag['href'] for tag in linkTags]

#**Get All the Data from each Movie**

In [None]:
for link in links[:-1]:
    r = requests.get("https://m.imdb.com" + link, headers=headers)
    if r:
        soup = BeautifulSoup(r.text, "html.parser")
        movieTitle = soup.find("span", {"class": "hero__primary-text"}).text
        listItems = soup.find("ul", {"class": "ipc-inline-list ipc-inline-list--show-dividers sc-d8941411-2 cdJsTz baseAlt"})
        releaseYear = (
            listItems
            .find(
                "a",
                {
                    "class": "ipc-link ipc-link--baseAlt ipc-link--inherit-color",
                    "role": "button",
                },
            )
            .text
        )
        rating = soup.find("span", {"class": "sc-bde20123-1 cMEQkK"}).text
        genresTags = soup.find_all("span", {"class": "ipc-chip__text"})
        genres = [tag.text for tag in genresTags[:-1]]
        genres = convertArrToString(genres)
        listItems = soup.find_all("li", {"class": "ipc-metadata-list__item"})
        directorsTag = listItems[0].find_all(
            "a",
            {
                "class": "ipc-metadata-list-item__list-content-item ipc-metadata-list-item__list-content-item--link"
            },
        )
        directors = [tag.text for tag in directorsTag]
        directors = convertArrToString(directors)
        castTags = soup.find_all("a", {"class": "sc-bfec09a1-1 gCQkeh"})
        cast = [tag.text for tag in castTags]
        cast = convertArrToString(cast)
        movieImg = (
            "https://m.imdb.com/"
            + soup.find("a", {"class": "ipc-lockup-overlay ipc-focusable"})["href"]
        )
        append_to_csv(
            csv_file_path,
            movieTitle,
            rating,
            releaseYear,
            genres,
            directors,
            movieImg,
            cast,
        )

In [None]:
DF=pd.read_csv("/content/results.csv")
DF

Unnamed: 0,Film Title,IMDb Rating,Release Year,Genre,Director,Movie Image,Stars
0,The Shawshank Redemption,9.3,1994,Drama,Frank Darabont,https://m.imdb.com//title/tt0111161/mediaviewe...,"Tim Robbins, Morgan Freeman, Bob Gunton, Willi..."
1,The Godfather,9.2,1972,"Crime, Drama,",Francis Ford Coppola,https://m.imdb.com//title/tt0068646/mediaviewe...,"Marlon Brando, Al Pacino, James Caan, Diane Ke..."
2,The Dark Knight,9.0,2008,"Action, Crime, Drama,",Christopher Nolan,https://m.imdb.com//title/tt0468569/mediaviewe...,"Christian Bale, Heath Ledger, Aaron Eckhart, M..."
3,The Godfather Part II,9.0,1974,"Crime, Drama,",Francis Ford Coppola,https://m.imdb.com//title/tt0071562/mediaviewe...,"Al Pacino, Robert De Niro, Robert Duvall, Dian..."
4,12 Angry Men,9.0,1957,"Crime, Drama,",Sidney Lumet,https://m.imdb.com//title/tt0050083/mediaviewe...,"Henry Fonda, Lee J. Cobb, Martin Balsam, John ..."
...,...,...,...,...,...,...,...
245,It Happened One Night,8.1,1934,"Comedy, Romance,",Frank Capra,https://m.imdb.com//title/tt0025316/mediaviewe...,"Clark Gable, Claudette Colbert, Walter Connoll..."
246,Aladdin,8.0,1992,"Animation, Adventure, Comedy,","Ron Clements, John Musker,",https://m.imdb.com//title/tt0103639/mediaviewe...,"Scott Weinger, Robin Williams, Linda Larkin, J..."
247,Dances with Wolves,8.0,1990,"Adventure, Drama, Western,",Kevin Costner,https://m.imdb.com//title/tt0099348/mediaviewe...,"Kevin Costner, Mary McDonnell, Graham Greene, ..."
248,"Paris, Texas",8.1,1984,Drama,Wim Wenders,https://m.imdb.com//title/tt0087884/mediaviewe...,"Harry Dean Stanton, Nastassja Kinski, Dean Sto..."
