## Import the relevant libraries and webpage needed for the webscraping

In [57]:
import requests 
from bs4 import BeautifulSoup 
import pandas as pd

In [2]:
r = requests.get('https://www.imdb.com/list/ls571125270/')

In [3]:
soup = BeautifulSoup(r.text, 'html.parser') 

In [4]:
results = soup.find_all('div', attrs={'class':'lister-item-content'})

In [5]:
len(results)

32

In [6]:
results[0:3]

[<div class="lister-item-content">
 <h3 class="lister-item-header">
 <span class="lister-item-index unbold text-primary">1.</span>
 <a href="/title/tt9362722/">Spider-Man: Across the Spider-Verse</a>
 <span class="lister-item-year text-muted unbold">(2023)</span>
 </h3>
 <p class="text-muted text-small">
 <span class="certificate">PG</span>
 <span class="ghost">|</span>
 <span class="runtime">140 min</span>
 <span class="ghost">|</span>
 <span class="genre">
 Animation, Action, Adventure            </span>
 </p>
 <div class="ipl-rating-widget">
 <div class="ipl-rating-star small">
 <span class="ipl-rating-star__star">
 <svg class="ipl-icon ipl-star-icon" fill="#000000" height="24" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
 <path d="M0 0h24v24H0z" fill="none"></path>
 <path d="M12 17.27L18.18 21l-1.64-7.03L22 9.24l-7.19-.61L12 2 9.19 8.63 2 9.24l5.46 4.73L5.82 21z"></path>
 <path d="M0 0h24v24H0z" fill="none"></path>
 </svg>
 </span>
 <span class="ipl-rating-sta

## Define functions to extracts columns to create the data set

In [8]:
def get_name(result):
    try:
        name  = result.find("a").text
    except AttributeError:
        name  = ""
    return name   

def get_year(result):
    try:
        year  = result.find("span",{"class" :"lister-item-year"}).text.strip("()")
    except AttributeError:
        year  = ""
    return year   

def get_genre(result):
    try:
        genre  = result.find("span",{"class" : "genre"}).text.strip()
    except AttributeError:
        genre  = ""
    return genre  

def get_ratings(results):
        rating_span = result.find("span", {"class": "ipl-rating-star__rating"})
        rating = rating_span.text if rating_span is not None else 'N/A'
        return rating
    
def get_description(results):
    try:
        desc  = result.find("p", {"class": ""}).text.strip("").replace("\n","")
    except AttributeError:
        desc  = ""
    return desc   


## create the data set from the webscraped data 

In [52]:
data =[]
for result in results:
    name  = get_name(result)
    year  = get_year(result)
    genre = get_genre(result)
    ratings = get_ratings(results)
    description = get_description(result)
    
    data.append(( name,year, genre, ratings, description))

print(data)





## Create a dataframe from the data set

In [53]:

moviedata = pd.DataFrame(data,columns =['name','year','genre','ratings','description'])

moviedata.head()


Unnamed: 0,name,year,genre,ratings,description
0,Spider-Man: Across the Spider-Verse,2023,"Animation, Action, Adventure",8.7,"Miles Morales catapults across the Multiverse,..."
1,Lightyear,2022,"Animation, Action, Adventure",6.1,While spending years attempting to return home...
2,Guillermo del Toro's Pinocchio,2022,"Animation, Drama, Family",7.6,A father's wish magically brings a wooden boy ...
3,Turning Red,2022,"Animation, Adventure, Comedy",7.0,A thirteen-year-old girl named Mei Lee is torn...
4,Disenchanted,2022,"Animation, Adventure, Comedy",5.6,"Fifteen years after her happily ever after, Gi..."


## write the data set to csv

In [60]:

moviedata.to_csv("movie.csv",index = False, encoding  = 'utf-8')