In [None]:
## Import all the packages needed

# Access AWS services
import boto3

# Perform the Google search
from googlesearch import search 

# Process images
import cv2
import matplotlib.pyplot as plt

# Scraping
import requests
from scrapy import Selector
import re


In [None]:
## Step 1: create a function which processes the image and extract the necessary information

### Use AWS Rekognition to check if there is a TV in the image;
### If yes, we will use the bouding box to focus the search and extraction of the text within the image;
### If no TV, no need to do anything further;


def extract_text_from_image(image, rekog):
    
    # Use Rekognition from AWS to detect objects in the picture 
    rekog_tv_response = rekog.detect_labels(# The image needs to be an S3 Object
                                            Image=image)

    # Check if a TV has been found with a set confidence level. 
    for label in rekog_tv_response['Labels']:
        # Find the TV label
        if label['Name'] == 'TV':
            for instance in label['Instances']:
                # Only count instances with confidence > 85
                if (instance['Confidence'] > 85):
                    # Extract the bounding box
                    top, left, height, width = instance['BoudingBox']['Top'], instance['BoudingBox']['Left'], instance['BoudingBox']['Height'], instance['BoudingBox']['Width']
                    tv_image = image[top:top+width, left:left+height]
                    # Recognize the text
                    rekog_text_response = rekog.detect_labels(# The image needs to be an S3 Object
                                                              Image=tv_image)
                    # Extract the text
                    text = []
                    # Iterate over the TextDetections in the response dictionary
                    for text_detection in rekog_text_response['TextDetections']:
                        # If TextDetection type is Line, append it to lines list
                        if text_detection['Type'] == 'LINE':
                            # Append the detected text
                            text.append(text_detection['DetectedText'])
                    
                    return text
                

In [None]:
## Step 2: create a function that returns the IMDB URL page of the TV show/movie 

### Use Google to search for the right IMDB page
### Return the URL page to be scraped

def get_imdb_url(text):
    # Build the query that will be used to find the right IMDB page 
    query_google = str(text) + ' on IMDB'
    
    # Perform the query and return only one result 
    url_imdb = [*search(query_google, tld="com", lang='en', start= 0, stop=1, pause=5)]
    
    return url_imdb
  

In [None]:
## Compile the functions to create the application pipeline

### Load the image
### Identify the title of TV show/Movie
### Identify its IMDB page
### Scrape the page for the rating
### Return the result on the image



# Set up the AWS connection
rekog = boto3.client('rekognition', region_name = region_name, 
                      # Set up AWS credentials 
                      aws_access_key_id = AWS_KEY_ID, 
                      aws_secret_access_key = AWS_SECRET)

# Load the image
image = cv2.imread('...')

# Get the title
title = extract_text_from_image(image, rekog)

# Get the IMDB page
url_imdb = get_imdb_url(title)

# Extract the rating
rating = get_rating(url_imdb)

# Output the result
cv2.putText(image, rating, (50,100), cv2.FONT_HERSHEY_DUPLEX, 3, (255,0,0), 8)
cv2.imshow(image)
