# Creation of an Alexa Skill to Retrieve Movie Information from IMDB

* Pablo Alonso
* Marcos Cedenilla

Created using Amazon's software, Alexa Developer Console, we first set the invocation name for our skill.

More information in https://medium.com/@marcoscedenillabonet/creating-an-alexa-skill-to-fetch-movie-information-from-imdb-c1954fe8af13

## Creating the Language Model

In this case, Alexa allows the creation of different Intents, which are essentially routines for our skill. Besides the default intents, we added 5 more: the 4 proposed (Rating and Vote Count, Director, Duration, and Movie Synopsis) and one additional of our own, showing similar movies to a given one (using the More Like This section from the IMDB page).

## Code

We need to modify the default code provided by Alexa to add functionality to the different Intents we created earlier.

Below is the code for the Lambda function. In the provided code, by using `str(director[0])`, when `len(director)==1`, we solve the formatting issue shown in the demo video, where a single director is displayed with brackets and quotes.


In [None]:

import logging
import ask_sdk_core.utils as ask_utils

from ask_sdk_core.skill_builder import SkillBuilder
from ask_sdk_core.dispatch_components import AbstractRequestHandler
from ask_sdk_core.dispatch_components import AbstractExceptionHandler
from ask_sdk_core.handler_input import HandlerInput

from ask_sdk_model import Response
from get_imdb import get_info_film

from ask_sdk_core.utils import is_intent_name, get_slot_value

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class LaunchRequestHandler(AbstractRequestHandler):
    """Handler for Skill Launch."""
    def can_handle(self, handler_input):
        # type: (HandlerInput) -> bool

        return ask_utils.is_request_type("LaunchRequest")(handler_input)

    def handle(self, handler_input):
        # type: (HandlerInput) -> Response
        speak_output = "Welcome to info films, What movie information do you want to know?"

        return (
            handler_input.response_builder
                .speak(speak_output)
                .ask(speak_output)
                .response
        )

class HelloWorldIntentHandler(AbstractRequestHandler):
    """Handler for Hello World Intent."""
    def can_handle(self, handler_input):
        # type: (HandlerInput) -> bool
        return ask_utils.is_intent_name("HelloWorldIntent")(handler_input)

    def handle(self, handler_input):
        # type: (HandlerInput) -> Response
        speak_output = "Hello World!"

        return (
            handler_input.response_builder
                .speak(speak_output)
                # .ask("add a reprompt if you want to keep the session open for the user to respond")
                .response
        )

class HelpIntentHandler(AbstractRequestHandler):
    """Handler for Help Intent."""
    def can_handle(self, handler_input):
        # type: (HandlerInput) -> bool
        return ask_utils.is_intent_name("AMAZON.HelpIntent")(handler_input)

    def handle(self, handler_input):
        # type: (HandlerInput) -> Response
        speak_output = "You can say hello to me! How can I help?"

        return (
            handler_input.response_builder
                .speak(speak_output)
                .ask(speak_output)
                .response
        )


class CancelOrStopIntentHandler(AbstractRequestHandler):
    """Single handler for Cancel and Stop Intent."""
    def can_handle(self, handler_input):
        # type: (HandlerInput) -> bool
        return (ask_utils.is_intent_name("AMAZON.CancelIntent")(handler_input) or
                ask_utils.is_intent_name("AMAZON.StopIntent")(handler_input))

    def handle(self, handler_input):
        # type: (HandlerInput) -> Response
        speak_output = "Goodbye!"

        return (
            handler_input.response_builder
                .speak(speak_output)
                .response
        )

class FallbackIntentHandler(AbstractRequestHandler):
    """Single handler for Fallback Intent."""
    def can_handle(self, handler_input):
        # type: (HandlerInput) -> bool
        return ask_utils.is_intent_name("AMAZON.FallbackIntent")(handler_input)

    def handle(self, handler_input):
        # type: (HandlerInput) -> Response
        logger.info("In FallbackIntentHandler")
        speech = "Hmm, I'm not sure. You can say Hello or Help. What would you like to do?"
        reprompt = "I didn't catch that. What can I help you with?"

        return handler_input.response_builder.speak(speech).ask(reprompt).response

class SessionEndedRequestHandler(AbstractRequestHandler):
    """Handler for Session End."""
    def can_handle(self, handler_input):
        # type: (HandlerInput) -> bool
        return ask_utils.is_request_type("SessionEndedRequest")(handler_input)

    def handle(self, handler_input):
        # type: (HandlerInput) -> Response

        # Any cleanup logic goes here.

        return handler_input.response_builder.response


class IntentReflectorHandler(AbstractRequestHandler):
    """The intent reflector is used for interaction model testing and debugging.
    It will simply repeat the intent the user said. You can create custom handlers
    for your intents by defining them above, then also adding them to the request
    handler chain below.
    """
    def can_handle(self, handler_input):
        # type: (HandlerInput) -> bool
        return ask_utils.is_request_type("IntentRequest")(handler_input)

    def handle(self, handler_input):
        # type: (HandlerInput) -> Response
        intent_name = ask_utils.get_intent_name(handler_input)
        speak_output = "You just triggered " + intent_name + "."

        return (
            handler_input.response_builder
                .speak(speak_output)
                # .ask("add a reprompt if you want to keep the session open for the user to respond")
                .response
        )


class CatchAllExceptionHandler(AbstractExceptionHandler):
    """Generic error handling to capture any syntax or routing errors. If you receive an error
    stating the request handler chain is not found, you have not implemented a handler for
    the intent being invoked or included it in the skill builder below.
    """
    def can_handle(self, handler_input, exception):
        # type: (HandlerInput, Exception) -> bool
        return True

    def handle(self, handler_input, exception):
        # type: (HandlerInput, Exception) -> Response
        logger.error(exception, exc_info=True)

        speak_output = "Sorry, I had trouble doing what you asked. Please try again."

        return (
            handler_input.response_builder
                .speak(speak_output)
                .ask(speak_output)
                .response
        )

# The SkillBuilder object acts as the entry point for your skill, routing all request and response
# payloads to the handlers above. Make sure any new handlers or interceptors you've
# defined are included below. The order matters - they're processed top to bottom.

# Clases definidas por nostros

class Score(AbstractRequestHandler):
    # Handler for ScoreIntent
    def can_handle(self, handler_input):
        # type: (HandlerInput) -> bool
        return ask_utils.is_intent_name("ScoreIntent")(handler_input) # Intent name
    def handle(self, handler_input):
        film = get_slot_value(handler_input=handler_input, slot_name="Movie") # Slot value
        title, score, votes = get_info_film(film, ("Score", "Number of votes")) # Extern Code
             
        if score:
            speak_output = 'The score for ' + title + ' is ' + score + ' over 10, with ' + votes + ' votes.'
        else:
            speak_output = 'I couldn`t find the rating for ' + title + '. Can you ask in a different way?'
        return (handler_input.response_builder
                    .speak(speak_output)
                    .ask("Would you like to know anything else?")
                    .response
                    )
class Director(AbstractRequestHandler):
    """Handler for DirectorIntent."""
    def can_handle(self, handler_input):
        # type: (HandlerInput) -> bool
        return ask_utils.is_intent_name("DirectorIntent")(handler_input) # Intent name
    def handle(self, handler_input):
        film = get_slot_value(handler_input=handler_input, slot_name="Movie") # Slot value
        title, director = get_info_film(film, ("Director",)) # Extern Code
        
        # Since it could be a list, if that’s the case, we process the list appropriately to ensure it is suitable for natural language
        if director:
            aux_r = ""
            if len(director)>1:
                for i in director[:-1]:
                    aux_r += ", " + str(i)
                    
                aux_r+= " y " + director[-1]
                aux_r = aux_r[2:]
                speak_output = "The directors of " + title + " are " + aux_r
            else:
                speak_output = 'The director of ' + title + ' is ' + str(director[0])
        else:
            speak_output = 'I couldn`t find the director for ' + title + '. Can you ask in a different way?'
        return (handler_input.response_builder
                    .speak(speak_output)
                    .ask("Would you like to know anything else?")
                    .response
                    )
class Duration(AbstractRequestHandler):
    """Handler for DurationIntent."""
    def can_handle(self, handler_input):
        # type: (HandlerInput) -> bool
        return ask_utils.is_intent_name("DurationIntent")(handler_input) # Intent name
    def handle(self, handler_input):
        film = get_slot_value(handler_input=handler_input, slot_name="Movie") # Slot value
        title, duration = get_info_film(film, ("Duration",)) # Extern Code
             
        if duration:
            speak_output = 'The length of ' + title + ' is ' + duration
        else:
            speak_output = 'I couldn`t find the duration for ' + title + '. Can you ask in a different way?'
        return (handler_input.response_builder
                    .speak(speak_output)
                    .ask("Would you like to know anything else?")
                    .response
                    )
class Synopsis(AbstractRequestHandler):
    """Handler for SynopsisIntent"""
    def can_handle(self, handler_input):
        # type: (HandlerInput) -> bool
        return ask_utils.is_intent_name("SynopsisIntent")(handler_input) # Intent name
    def handle(self, handler_input):
        film = get_slot_value(handler_input=handler_input, slot_name="Movie") # Slot value
        title, synopsis = get_info_film(film, ("Synopsis",)) # Extern Code
             
        if synopsis:
            speak_output = 'The synopsis of ' + title + ' is ' + synopsis
        else:
            speak_output = 'I couldn`t find the synopsis for ' + title + '. Can you ask in a different way?'
        return (handler_input.response_builder
                    .speak(speak_output)
                    .ask("Would you like to know anything else?")
                    .response
                    )
class Similar(AbstractRequestHandler):
    """Handler for SimilarIntent"""
    def can_handle(self, handler_input):
        # type: (HandlerInput) -> bool
        return ask_utils.is_intent_name("SimilarIntent")(handler_input) # Intent name
    def handle(self, handler_input):
        film = get_slot_value(handler_input=handler_input, slot_name="Movie") # Slot value
        title, similar = get_info_film(film, ("Recomendations",)) # Extern Code
        
        # Since it could be a list, if that’s the case, we process the list appropriately to ensure it is suitable for natural language
        if similar:
            aux_r = ""
            for i in similar[:-1]:
                aux_r += ", " + str(i)
                
            aux_r+= " y " + similar[-1] 
            aux_r = aux_r[2:]
            speak_output = 'Similar films to ' + title + ' are ' + aux_r
        else:
            speak_output = 'I couldn`t find the similar films for  ' + title + '. Can you ask in a different way?'
        return (handler_input.response_builder
                    .speak(speak_output)
                    .ask("Would you like to know anything else?")
                    .response
                    )


sb = SkillBuilder()

sb.add_request_handler(LaunchRequestHandler())
sb.add_request_handler(HelloWorldIntentHandler())
sb.add_request_handler(HelpIntentHandler())
sb.add_request_handler(CancelOrStopIntentHandler())
sb.add_request_handler(FallbackIntentHandler())
sb.add_request_handler(SessionEndedRequestHandler())
# New intents
sb.add_request_handler(Score())
sb.add_request_handler(Director())
sb.add_request_handler(Synopsis())
sb.add_request_handler(Duration())
sb.add_request_handler(Similar())
sb.add_request_handler(IntentReflectorHandler()) # make sure IntentReflectorHandler is last so it doesn't override your custom intent handlers

sb.add_exception_handler(CatchAllExceptionHandler())

Finally, we include the code that scrapes the movie data. Additionally, we add the functionality to ask for specific information, find recommended movies, and a cache for previously searched movies. This will be implemented as a limited-size dictionary, where the keys are the titles of the movies to search for, and the values are another dictionary with the information of each movie, including the complete information even if only the rating is requested.

In [10]:
import re
import urllib
import urllib.request as request


cache = dict()

def get_info_film(name: str, info_req: list):
    
    # Search the info in cache
    if len(cache)>0:
        information = cache.get(name)
        if information:
            results_f = []
            results_f.append(information["Title"])
            for i in info_req:
                results_f.append(information[i])
            return results_f
          

    req = request.Request("https://www.imdb.com/find/?q=" + urllib.parse.quote(name), headers={'Accept-Language': 'es'})
    
    req.add_header('User-Agent', 'Mozilla/5.0')
    information = dict()

    f = request.urlopen(req)
    s = f.read().decode()
    f.close()
    list_urls = re.findall('<a.*?href="(/title.*?)"', s)
    url_og = "https://www.imdb.com"

    req2 = request.Request(url_og + list_urls[0])
    req2.add_header('User-Agent', 'Mozilla/5.0')

    f2 = request.urlopen(req2)
    s2 = f2.read().decode()
    f2.close()
    title = re.findall(r'hero__pageTitle.*?>([^<]*)</span>',s2)
    if title:
        title = title[0].replace("&#x27;", "\'")
    information["Title"] = title
    score = re.findall(r'hero-rating-bar__aggregate-rating__score.*?>(\d*\.*\d*)</span>',s2)
    if score:
        score = score[0]
    information["Score"] = score 
    votes = re.findall(r'hero-rating-bar__aggregate-rating__score.*?(\d*[A-Z])</div>',s2)
    if votes:
        votes = votes[0]
    information ["Number of votes"] = votes
    synopsis = re.findall(r'plot-xs_to_m.*?>([^<]*)</span>', s2)
    if synopsis:
        synopsis = synopsis[0].replace("&#x27;", "\'")
    information["Synopsis"] = synopsis
    director = re.findall(r'href="\/name\/\w+\/\?ref_=tt_ov_dr">([^<]*)', s2)
    if director:
        director = list(map(lambda s: s.replace("&#x27;", "\'"), list(set(director))))
    information["Director"] = director
    duration = re.findall(r'data-testid="title-techspec_runtime">.*?<div class="ipc-metadata-list-item__content-container">(.*?)</div>', s2)
    if duration:
        duration = duration[0]
        duration = re.sub(r"\W+"," ", duration)
    information["Duration"] = duration
    similar = re.findall(r'<span data-testid="title">([^<]*)</span>',s2)
    if similar:
        similar = list(set(similar))
        if len(similar)>5:
            similar = similar[:5]
        similar = list(map(lambda s: s.replace("&#x27;", "\'"), similar))
    information["Recomendations"] = similar
        
    # Manage the cache length 
    if len(cache) == 33:
        del cache[next(iter(cache))]
        
    # Add info to cache
    cache[name] =  information
    
    # Result will be a list
    results_f = []
    results_f.append(information["Title"])
    del information["Title"]
    for i in info_req:
        results_f.append(information[i])
    return results_f

In [13]:
get_info_film("Interstellar", ["Director", "Duration", "Synopsis", "Recomendations"])

['Interstellar',
 ['Christopher Nolan'],
 '2 hours 49 minutes',
 'When Earth becomes uninhabitable in the future, a farmer and ex-NASA pilot, Joseph Cooper, is tasked to pilot a spacecraft, along with a team of researchers, to find a new planet for humans.',
 ['El lobo de Wall Street',
  'Pulp Fiction',
  'Joker',
  'Cadena perpetua',
  'Django desencadenado']]