# Movies recommendations with Azure Open AI & Azure Cognitive Search
## Part 3 - Recommandations

<img src="https://github.com/retkowsky/images/blob/master/movies_search.png?raw=true">

In [None]:
# %pip install azure-search-documents --pre --upgrade

In [None]:
# %pip install youtube-search-python

In [None]:
import json
import movieposters as mp
import openai
import os
import pytz
import requests
import sys
import time

from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.models import Vector

from datetime import datetime
#from dotenv import load_dotenv
from IPython.display import IFrame, display
from io import BytesIO
from PIL import Image
from youtubesearchpython import VideosSearch, Comments

In [None]:
sys.version

In [None]:
local_tz = pytz.timezone(requests.get("https://ipinfo.io").json()["timezone"])
print("Local time:", datetime.now(local_tz).strftime("%d-%b-%Y %H:%M:%S"))

In [None]:
print("Open AI version:", openai.__version__)

Load the configuration settings from GitHub Codespace Secrets.

In [None]:
#load_dotenv("azure.env")


openai.api_type: str = "azure"
# openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = os.getenv("AZURE_OPENAI_API_KEY")
# openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")
# openai.api_version = os.getenv("OPENAI_API_VERSION")
openai.api_version = os.getenv("AZURE_OPENAI_MODEL_CHAT_VERSION")

acs_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
acs_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")

Make sure it includes the name of your embedding model deployment!

In [None]:
# Azure Open AI embeddings model to use
embeddings_engine = "text-embedding-ada-002-Unai"

- Vector search is in public preview
- Model name: text-embedding-ada-002
- Model version: 2
- API version: 2023-05-15

In [None]:
# Azure Cognitive Search index name to create
index_name = "moviereview"

## 1. Functions

definition to delete existing search index

In [None]:
def acs_delete_index(index_name):
    """
    Deleting an Azure Cognitive Search index
    Input: index name (string)
    """
    start = time.time()
    search_client = SearchIndexClient(
        endpoint=acs_endpoint, credential=AzureKeyCredential(acs_key)
    )

    try:
        print("Deleting the Azure Cognitive Search index:", index_name)
        search_client.delete_index(index_name)
        print("Done. Elapsed time:", round(time.time() - start, 2), "secs")
    except:
        print("Cannot delete index. Check the index name.")

In [None]:
def acs_index_stats(index_name):
    """
    Get statistics about Azure Cognitive Search index
    Input: index name (string)
    """
    url = (
        acs_endpoint
        + "/indexes/"
        + index_name
        + "/stats?api-version=2021-04-30-Preview"
    )
    headers = {
        "Content-Type": "application/json",
        "api-key": acs_key,
    }
    response = requests.get(url, headers=headers)
    print("Azure Cognitive Search index status for:", index_name, "\n")

    if response.status_code == 200:
        res = response.json()
        print(json.dumps(res, indent=2))
        document_count = res["documentCount"]
        storage_size = res["storageSize"]

    else:
        print("Request failed with status code:", response.status_code)

    return document_count, storage_size

In [None]:
def acs_index_status(index_name):
    """
    Azure Cognitive Search index status
    Input: index name (string)
    """
    print("Azure Cognitive Search Index:", index_name, "\n")

    headers = {"Content-Type": "application/json", "api-key": acs_key}
    params = {"api-version": "2021-04-30-Preview"}
    index_status = requests.get(
        acs_endpoint + "/indexes/" + index_name, headers=headers, params=params
    )

    try:
        print(json.dumps((index_status.json()), indent=5))
    except:
        print("Request failed with status code:", response.status_code)

Definition to create embeddings (vector representation) for your data using Open AI Vector Embedding models

In [None]:
def azure_openai_text_embeddings(text):
    """
    Generating embeddings from text using Azure Open AI
    Input: text
    Output: vector embedding
    """
    embeddings = openai.Embedding.create(
        input=text,
        deployment_id=embeddings_engine,
    )
    embeddings = embeddings["data"][0]["embedding"]

    return embeddings

In [None]:
def get_list(movielist):
    """
    Get list of movies
    Input: movies list (list)
    Output: results (text)
    """
    pattern = r"\d+\.\s(.+)"
    matches = re.findall(pattern, movielist)
    for idx, movie in enumerate(matches, start=1):
        print(f"Movie {idx}: {movie}.")

    return matches

## 2. Azure Cognitive Search index

### Azure Cognitive Search vector store
<img src="https://github.com/retkowsky/images/blob/master/vector_search_architecture.png?raw=true">

In [None]:
acs_index_status(index_name)

In [None]:
document_count, storage_size = acs_index_stats(index_name)

In [None]:
print("Number of documents in the index =", f"{document_count:,}")
print("Size of the index =", round(storage_size / (1024 * 1024), 2), "MB")

## 3. Functions

Make sure your include your model deployment name as engine!

In [None]:
def azure_chatgpt_movies(prompt, max_retries=3, retry_delay=1):
    """
    Chat GPT with Azure Open AI
    Input: prompt (text)
    Output: results (string)
    """
    for _ in range(max_retries):
        try:
            response = openai.ChatCompletion.create(
                engine="gpt-35-turbo-unai",  # Should be deployed in the AOAI studio
                messages=[
                    {
                        "role": "system",
                        "content": "You are an AI assistant that helps people to find \
                        information about movies.",
                    },
                    {"role": "user", "content": prompt},
                ],
                temperature=0.9,
                max_tokens=800,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
                stop=None,
            )

            resp = response["choices"][0]["message"]["content"]

            print("\033[1;31;34mPrompt:", prompt)
            print("\n\033[1;31;32mResponse:", resp)
            print("\033[1;31;35m")
            print("Powered by Azure AI")
            # Local time
            local_tz = pytz.timezone(
                requests.get("https://ipinfo.io").json()["timezone"]
            )
            print(datetime.now(local_tz).strftime("%d-%b-%Y %H:%M:%S"))

            return resp

        except Exception as e:
            print("Error:", str(e))
            print("Retrying...")
            time.sleep(retry_delay)

    print("Maximum retries exceeded. Unable to get a response.")

    return None

In [None]:
def get_movie_poster(imdb_id):
    """
    Display movie poster from its IMDB id
    Input: imdb id (string)
    Output: display image file and save as an image file
    """
    poster = mp.get_poster(id=imdb_id)
    response = requests.get(poster)

    if response.status_code == 200:
        img = Image.open(BytesIO(response.content))
        display(img)
        # Save as a local file
        os.makedirs("movieposter", exist_ok=True)
        posterfile = os.path.join("movieposter", "movie_" + str(imdb_id) + ".jpg")
        img.save(posterfile)
    else:
        print("Failed to fetch the movie image.")

In [None]:
def get_trailer(movietitle):
    """
    Get youtube video trailer
    Input: movie title (string)
    Output: Display the trailer video
    """
    # Search video on youtube
    videosSearch = VideosSearch(movietitle, limit=1)
    video_results = videosSearch.result().get("result")

    if video_results:
        print("\033[1;31;34m")
        video_id = video_results[0].get("id")
        video_url = f"https://www.youtube.com/watch?v={video_id}"
        print("\033[1;31;34m", video_url)
        vid = IFrame(
            src=f"https://www.youtube.com/embed/{video_id}", width="560", height="315"
        )
        display(vid)

        # Get the first 20 Youtube video comments
        print("YouTube video comments for", movietitle, ":")
        comments = Comments.get(video_id)
        print("\033[1;31;32m")

        for idx, comment in enumerate(comments.get("result")):
            print(f"Comment {idx+1:02}: {comment.get('content')}")
        print("\033[0m")
    else:
        print("No video found on YouTube.")

In [None]:
def get_all_movies_trailers(movies_list):
    """
    Get movie trailers from a list of movies
    Input: movies list (string)
    Output: Display the trailer video for each element of the list
    """
    for movie in movies_list:
        print(movie)
        try:
            get_trailer(movie)
        except:
            print("Error")
        print()

In [None]:
def acs_movies_with_posters(title):
    """
    Cross Field Search
    Input: title (string)
    Ouput: results (string)
    """
    search_client = SearchClient(
        acs_endpoint, index_name, credential=AzureKeyCredential(acs_key)
    )

    vector = Vector(value=azure_openai_text_embeddings(title), k=1, fields="embed_title")

    results = search_client.search(
        search_text=None,
        vectors=[vector],
        select=[
            "imdb_id",
            "title",
            "cast",
            "director",
            "description",
            "genres",
            "year",
        ],
        top=1,
    )

    for result in results:
        if result["@search.score"] >= 0.9:
            print("\033[1;31;34m")
            print(f"Movie title: {result['title']}")
            print(f"Movie ID: {result['imdb_id']}")
            # Get movie poster
            get_movie_poster(result["imdb_id"])
            print("\033[1;31;32m")
            print(f"Description: {result['description']}")
            print("\033[0m")
            print(f"Cast: {result['cast']}")
            print(f"Director: {result['director']}")
            print(f"Genres: {result['genres']}")
            print(f"Year: {result['year']}")
            print("\033[1;31;35m")
            print(f"Score = {result['@search.score']}")
        
        else:
            print("\033[1;31;91m")
            print(
                "Note: The movie",
                title,
                "is not available in the Azure Cognitive search index",
            )
            print("\033[0m")

In [None]:
def acs_movies_results(movies_list):
    """
    Find movies using Azure Cognitive Search from a list of movies
    Input: movies list (list)
    Output: results
    """
    for movie in movies_list:
        acs_movies_with_posters(movie)

# 4. Recommandation system

In [None]:
genres_list = [
    "Action",
    "Adventure",
    "Fantasy",
    "Science Fiction",
    "Crime",
    "Thriller",
    "Animation",
    "Family",
    "Western",
    "Romance",
    "Horror",
    "Mystery",
    "History",
    "War",
    "Drama",
    "Comedy",
    "Music",
]

len(genres_list)

In [None]:
actors_list = [
    "Robert De Niro",
    "Al Pacino",
    "Jack Nicholson",
    "Dustin Hoffman",
    "Clint Eastwood",
    "Harrison Ford",
    "Tom Hanks",
    "Denzel Washington",
    "Morgan Freeman",
    "Mel Gibson",
    "Arnold Schwarzenegger",
    "Bruce Willis",
    "Robin Williams",
    "Kevin Costner",
    "Sean Connery",
    "Johnny Depp",
    "Samuel L. Jackson",
    "Jeff Goldblum",
    "Eddie Murphy",
    "Kurt Russell",
    "Michael Douglas",
    "Bill Murray",
    "John Travolta",
    "Richard Gere",
    "Nicolas Cage",
    "Tom Cruise",
    "Keanu Reeves",
    "Kevin Bacon",
    "Gene Hackman",
    "Danny DeVito",
    "Val Kilmer",
    "John Cusack",
    "Tim Robbins",
    "Steve Martin",
    "Patrick Swayze",
    "Tom Selleck",
    "Wesley Snipes",
    "Edward Norton",
    "Alec Baldwin",
    "Jim Carrey",
    "Will Smith",
    "Brad Pitt",
    "George Clooney",
    "Sean Penn",
    "Robert Redford",
    "Richard Dreyfuss",
    "Mark Wahlberg",
    "Johnny Depp",
    "Billy Crystal",
    "Harvey Keitel",
]

len(actors_list)

In [None]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999

## 1. Based on some preferences
### Test 1

In [None]:
mytopgenres = ", ".join([genres_list[0], genres_list[3], genres_list[4]])
mytopgenres

In [None]:
mytopactors = ", ".join(
    [actors_list[2], actors_list[3], actors_list[4], actors_list[10], actors_list[20]]
)
mytopactors

In [None]:
prompt = f"I want to see a movie. Can you recommend me 10 movies? \
My favorite genres are: {mytopgenres} and my favorite actors are: {mytopactors}.\
 I want you to save only the movie title in a numbered list with the release year."

answer = azure_chatgpt_movies(prompt)

In [None]:
movies_list = get_list(answer)

In [None]:
acs_movies_with_posters(movies_list)

In [None]:
get_all_movies_trailers(movies_list)

## Test 2

In [None]:
mytopgenres = ", ".join([genres_list[8]])
print(mytopgenres)

mytopactors = ", ".join(
    [actors_list[30], actors_list[40], actors_list[45], actors_list[47]]
)
print(mytopactors)

In [None]:
prompt = f"I want to see a movie. Show me a list of 3 movies. \
My favorite genres are: {mytopgenres} and my favorite actors are: {mytopactors}.\
 I want you to save only the movie title in a numbered list with the release year."

answer = azure_chatgpt_movies(prompt)

In [None]:
movies_list = get_list(answer)

In [None]:
acs_movies_with_posters(movies_list)

In [None]:
get_all_movies_trailers(movies_list)

## 2. Based on some history
### Test 1

In [None]:
myfavoritemovies = "Jaws, 47 Meters Down, The Shallows"

In [None]:
prompt = f"My favorites movies are: {myfavoritemovies}.\
 Can you suggest me some 3 similar movies made before 2015? \
 I want you to save only the movie title in a numbered list with the release year."

answer = azure_chatgpt_movies(prompt)

In [None]:
movies_list = get_list(answer)

In [None]:
acs_movies_with_posters(movies_list)

In [None]:
get_all_movies_trailers(movies_list)

### Test 2

In [None]:
myfavoritemovies = "Manhattan, French connection, Serpico"

In [None]:
prompt = f"My favorites movies are: {myfavoritemovies}.\
 Can you suggest me some 5 similar movies? \
 I want you to save only the movie title in a numbered list with the release year."

answer = azure_chatgpt_movies(prompt)

In [None]:
movies_list = get_list(answer)

In [None]:
acs_movies_with_posters(movies_list)

In [None]:
get_all_movies_trailers(movies_list)

## 3. Post Processing

In [None]:
acs_index_status(index_name)

In [None]:
document_count, storage_size = acs_index_stats(index_name)

### We can delete the index if needed

In [None]:
# acs_delete_index(index_name)

> Go to the next notebook