Python-World · tusharnankani · Sep 11, 2020 · Sep 8, 2020 · Sep 10, 2020 · Sep 10, 2020
diff --git a/projects/Movie Information Scraper/README.md b/projects/Movie Information Scraper/README.md
@@ -0,0 +1,17 @@
+# Movie Information Scraper
+This script obtains movie details by scraping IMDB website.
+
+### Prerequisites
+* bs4
+* requests
+* Run `pip install -r requirements.txt` to install required external modules.
+
+### How to run the script
+Execute `python3 movieInfoScraper.py` and type in the movie name when prompted.
+
+### Screenshot/GIF showing the sample use of the script
+<!--Remove the below lines and add yours -->
+![Screenshot of the Output](Screenshot.png)
+
+## Author Name
+Anandha Krishnan Aji
diff --git a/projects/Movie Information Scraper/Screenshot.png b/projects/Movie Information Scraper/Screenshot.png
diff --git a/projects/Movie Information Scraper/movieInfoScraper.py b/projects/Movie Information Scraper/movieInfoScraper.py
@@ -0,0 +1,100 @@
+from bs4 import BeautifulSoup
+import requests
+
+# Function to get Movie Details
+
+
+def getMovieDetails(movieName):
+    # Base URL of IMDB website
+    url = 'https://www.imdb.com'
+
+    # Query to find movie title
+    query = '/search/title?title='
+
+    # Empty dictionary to store movie Details
+    movieDetails = {}
+
+    # Query formed
+    movienamequery = query+'+'.join(movieName.strip().split(' '))
+
+    # WebPage is obtained and parsed
+    html = requests.get(url+movienamequery+'&title_type=feature')
+    bs = BeautifulSoup(html.text, 'html.parser')
+
+    # Gets the first movie that appears in title section
+    result = bs.find('h3', {'class': 'lister-item-header'})
+
+    if result is None:
+        return None
+
+    movielink = url+result.a.attrs['href']
+    movieDetails['name'] = result.a.text
+
+    # Gets the page with movie details
+    html = requests.get(movielink)
+    bs = BeautifulSoup(html.text, 'html.parser')
+    # Year
+    try:
+        movieDetails['year'] = bs.find('span', {'id': 'titleYear'}).a.text
+    except AttributeError:
+        movieDetails['year'] = 'Not available'
+    subtext = bs.find('div', {'class': 'subtext'})
+
+    # Rating,Genres,Runtime,Release Date,
+
+    movieDetails['genres'] = [
+        i.text for i in subtext.findAll('a', {'title': None})]
+    try:
+        movieDetails['rating'] = bs.find(
+            'div', {'class': 'ratingValue'}).span.text
+        movieDetails['runtime'] = subtext.time.text.strip()
+    except AttributeError:
+        movieDetails['rating'] = 'Not yet rated'
+        movieDetails['runtime'] = 'Not available'
+    movieDetails['release_date'] = subtext.find(
+        'a', {'title': 'See more release dates'}).text.strip()
+
+    # Gets the credit section of the page
+    creditSummary = bs.findAll('div', {'class': 'credit_summary_item'})
+
+    # Directors,Writers and Cast
+    movieDetails['directors'] = [i.text for i in creditSummary[0].findAll('a')]
+    movieDetails['writers'] = [i.text for i in creditSummary[1].findAll(
+        'a') if 'name' in i.attrs['href']]
+    try:
+        movieDetails['cast'] = [i.text for i in creditSummary[2].findAll(
+            'a') if 'name' in i.attrs['href']]
+
+    # For some films, writer details are not provided
+    except IndexError:
+        movieDetails['cast']=movieDetails['writers']
+        movieDetails['writers']='Not found'
+
+    # The plot is seperate AJAX call and does not come in the html page, So one more request to plotsummary page
+    html = requests.get(movielink+'plotsummary')
+    bs = BeautifulSoup(html.text, 'html.parser')
+
+    # Plot
+    movieDetails['plot'] = bs.find(
+        'li', {'class': 'ipl-zebra-list__item'}).p.text.strip()
+
+    # Returns the dictionary with movie details
+    return movieDetails
+
+
+if __name__ == "__main__":
+    movieName = input('Enter the movie name whose details are to be fetched\n')
+    movieDetails = getMovieDetails(movieName)
+    if movieDetails is None:
+        print('No movie of this name found !!!!!')
+        quit()
+    print('\n{movie} ({year})'.format(
+        movie=movieDetails['name'], year=movieDetails['year']))
+    print('Rating:', movieDetails['rating'])
+    print('Runtime:', movieDetails['runtime'])
+    print('Release Date:', movieDetails['release_date'])
+    print('Genres:', ', '.join(movieDetails['genres']))
+    print('Director:', ', '.join(movieDetails['directors']))
+    print('Writer:', ', '.join(movieDetails['writers']))
+    print('Cast:', ', '.join(movieDetails['cast']))
+    print('Plot Summary:\n', movieDetails['plot'])
diff --git a/projects/Movie Information Scraper/requirements.txt b/projects/Movie Information Scraper/requirements.txt
@@ -0,0 +1,2 @@
+bs4==0.0.1
+requests==2.23.0