# Movies recommendation with Azure Open AI & Azure Cognitive Search
## Part 2: search examples


<img src="https://github.com/retkowsky/images/blob/master/movies_search.png?raw=true">

In [None]:
# %pip install azure-search-documents --pre --upgrade

In [None]:
# %pip install youtube-search-python

In [None]:
import gradio as gr
import json
import movieposters as mp
import openai
import os
import pytz
import re
import requests
import sys
import time

from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.models import Vector

from datetime import datetime
#from dotenv import load_dotenv
from IPython.display import IFrame, display
from io import BytesIO
from PIL import Image
from youtubesearchpython import VideosSearch, Comments

In [None]:
sys.version

In [None]:
local_tz = pytz.timezone(requests.get("https://ipinfo.io").json()["timezone"])
print("Local time:", datetime.now(local_tz).strftime("%d-%b-%Y %H:%M:%S"))

In [None]:
print("Open AI version:", openai.__version__)

In [None]:
#load_dotenv("azure.env")


openai.api_type: str = "azure"
# openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = os.getenv("AZURE_OPENAI_API_KEY")
# openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")
# openai.api_version = os.getenv("OPENAI_API_VERSION")
openai.api_version = os.getenv("AZURE_OPENAI_MODEL_CHAT_VERSION")

acs_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
acs_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")

In [None]:
# Azure Open AI embeddings model to use
embeddings_engine = "text-embedding-ada-002-Unai"

- Vector search is in public preview
- Model name: text-embedding-ada-002
- Model version: 2
- API version: 2023-05-15

In [None]:
# Azure Cognitive Search index name to create
index_name = "moviereview"

## 1. Functions

In [None]:
def acs_delete_index(index_name):
    """
    Deleting an Azure Cognitive Search index
    Input: an Azure Cognitive Search index name
    """
    start = time.time()
    search_client = SearchIndexClient(
        endpoint=acs_endpoint, credential=AzureKeyCredential(acs_key)
    )
    
    try:
        print("Deleting the Azure Cognitive Search index:", index_name)
        search_client.delete_index(index_name)
        print("Done. Elapsed time:", round(time.time() - start, 2), "secs")
    except:
        print("Cannot delete index. Check the index name.")


In [None]:
def acs_index_stats(index_name):
    """
    Get statistics about Azure Cognitive Search index
    Input: an Azure Cognitive Search index name
    """
    url = (
        acs_endpoint
        + "/indexes/"
        + index_name
        + "/stats?api-version=2021-04-30-Preview"
    )
    headers = {
        "Content-Type": "application/json",
        "api-key": acs_key,
    }
    response = requests.get(url, headers=headers)
    print("Azure Cognitive Search index status for:", index_name, "\n")

    if response.status_code == 200:
        res = response.json()
        print(json.dumps(res, indent=2))
        document_count = res["documentCount"]
        storage_size = res["storageSize"]

    else:
        print("Request failed with status code:", response.status_code)

    return document_count, storage_size

In [None]:
def acs_index_status(index_name):
    """
    Azure Cognitive Search index status
    Input: an Azure Cognitive Search index name
    """
    print("Azure Cognitive Search Index:", index_name, "\n")

    headers = {"Content-Type": "application/json", "api-key": acs_key}
    params = {"api-version": "2021-04-30-Preview"}
    index_status = requests.get(
        acs_endpoint + "/indexes/" + index_name, headers=headers, params=params
    )

    try:
        print(json.dumps((index_status.json()), indent=5))
    except:
        print("Request failed with status code:", response.status_code)

In [None]:
def openai_text_embeddings(text):
    """
    Generating embeddings from text using Azure Open AI
    Input: a text
    Output: a vector embedding made by Azure Open AI
    """
    embeddings = openai.Embedding.create(
        input=text,
        deployment_id=embeddings_engine,
    )
    embeddings = embeddings["data"][0]["embedding"]

    return embeddings

In [None]:
def get_list(movielist, disp=True):
    """
    Extract movies from a list of movies
    Input: movies list (list)
    Output: Numbered list (string)
    """
    pattern = r"\d+\.\s(.+)"
    matches = re.findall(pattern, movielist)
    for idx, movie in enumerate(matches, start=1):
        if disp:
            print(f"Movie {idx}: {movie}.")

    return matches

In [None]:
acs_index_status(index_name)

In [None]:
document_count, storage_size = acs_index_stats(index_name)

In [None]:
print("Number of documents in the index =", f"{document_count:,}")
print("Size of the index =", round(storage_size / (1024 * 1024), 2), "MB")

In [None]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

## Get documents

Get document by giving the fil identifier

In [None]:
def acs_get_documents(imdbid):
    """
    Get document by its id number
    Input: an imdbid (string)
    Output: results (string)
    """
    search_client = SearchClient(acs_endpoint, index_name, AzureKeyCredential(acs_key))
    result = search_client.get_document(key=imdbid)

    print("\033[1;31;34m")
    print("Movies:", result["title"])
    print("\033[1;31;32m")
    print("IMDB ID:", result["imdb_id"])
    print("Casting:", result["cast"])
    print("Director:", result["director"])
    print("Description:", result["description"])
    print("Genres:", result["genres"])
    print("Year:", result["year"])
    print("\033[0m")

In [None]:
acs_get_documents("tt0070328")

### Simple query

In [None]:
def acs_full_query(query, nb=5):
    """
    Full text search using Azure Cognitive Search
    Input: a query (string)
    Output: results (text)
    """
    search_client = SearchClient(
        acs_endpoint, index_name, credential=AzureKeyCredential(acs_key)
    )

    results = search_client.search(search_text=query)
    
    idx = 1
    for result in results:
        if idx <= nb:
            print("\033[1;31;34m")
            print(result["title"])
            print("\033[1;31;32m")
            print("IMDB ID:", result["imdb_id"])
            print("Casting:", result["cast"])
            print("Director:", result["director"])
            print("Description:", result["description"])
            print("Genres:", result["genres"])
            print("Year:", result["year"])
            print("\033[0m")
            idx += 1

In [None]:
acs_full_query("Terminator", nb=5)

## 3. Pure Vector Search

In [None]:
def acs_vectorsearch_query(query, topn=5):
    """
    Find movies from Azure Cognitive Search using the embedding overview field
    Pure Vector Search
    Input: a query (string)
    Output: results (list)
    """
    start = time.time()
    search_client = SearchClient(
        acs_endpoint, index_name, credential=AzureKeyCredential(acs_key)
    )

    vector = Vector(
        value=openai_text_embeddings(query), k=topn, fields="embed_overview"
    )

    results = search_client.search(
        search_text=None,
        vectors=[vector],
        select=["imdb_id", "title", "cast", "director", "description", "genres", "year"],
    )

    movies_list = []

    idx = 1
    for result in results:
        print("\033[1;31;34m")
        print(f"{idx} Movie title: {result['title']}")
        print("IMDB ID:", result["imdb_id"])
        print("\033[1;31;32m")
        print(f"Description: {result['description']}")
        print("\033[0m")
        print(f"Cast: {result['cast']}")
        print(f"Director: {result['director']}")
        print(f"Genres: {result['genres']}")
        print(f"Year: {result['year']}")
        print("\033[1;31;35m")
        print(f"Score = {result['@search.score']}")
        idx += 1

        movies_list.append(result["title"])

    elapsed = time.time() - start
    print("\033[0m")
    print(
        "\nElapsed time: "
        + time.strftime(
            "%H:%M:%S.{}".format(str(elapsed % 1)[2:])[:15], time.gmtime(elapsed)
        )
    )

    return movies_list

**Multi-lingual search**. Use a multi-lingual embeddings model to represent your document in multiple languages in a single vector space to find documents regardless of the language they are in.


In [None]:
acs_vectorsearch_query("Je veux regarder un James Bond")

In [None]:
acs_vectorsearch_query("I want some musical movies")

## 4. Multiple search queries with Azure Cognitive Search

### 4.1 Multi-Vector search

**Multi-query vector search** sends **multiple queries across multiple vector fields** in your search index. This query example looks for similarity in both embed_title and embed_overview, but sends in two different query embeddings respectively. This scenario is ideal for multi-modal use cases where you want to search over a textVector field and an imageVector field. You can also use this scenario if you have different embedding models with different dimensions in your search index. This also displays scores using Reciprocal Rank Fusion (RRF).

In [None]:
start = time.time()

query = "I want to watch a film about a sinking ship"

search_client = SearchClient(
    acs_endpoint, index_name, credential=AzureKeyCredential(acs_key)
)

vector1 = Vector(value=openai_text_embeddings(query), k=5, fields="embed_title")
vector2 = Vector(value=openai_text_embeddings(query), k=5, fields="embed_overview")

results = search_client.search(
    search_text=None,
    vectors=[vector1, vector2],
    select=["imdb_id", "title", "cast", "director", "description", "genres", "year"],
)

idx = 1
for result in results:
    print("\033[1;31;34m")
    print(f"{idx} Movie title: {result['title']}")
    print("IMDB ID:", result["imdb_id"])
    print("\033[1;31;32m")
    print(f"Description: {result['description']}")
    print("\033[0m")
    print(f"Cast: {result['cast']}")
    print(f"Director: {result['director']}")
    print(f"Genres: {result['genres']}")
    print(f"Year: {result['year']}")
    print("\033[1;31;35m")
    print(f"Score = {result['@search.score']}")
    idx += 1

elapsed = time.time() - start
print("\033[0m")
print(
    "\nElapsed time: "
    + time.strftime(
        "%H:%M:%S.{}".format(str(elapsed % 1)[2:])[:15], time.gmtime(elapsed)
    )
)

## 4.2 Cross-field vector search

A **cross-field vector query** sends a **single query across multiple vector fields in your search index**. This query example looks for similarity in both "embed_title" and "embed_overview" and displays scores using **Reciprocal Rank Fusion (RRF)**.

For hybrid search scoring, Cognitive Search uses Reciprocal Rank Fusion (RRF). In information retrieval, RRF combines the results of different search methods to produce a single, more accurate and relevant result. (Here, a search method refers to methods such as vector search and full-text search.) RRF is based on the concept of reciprocal rank, which is the inverse of the rank of the first relevant document in a list of search results. 

At a basic level, RRF works by taking the search results from multiple methods, assigning a reciprocal rank score to each document in the results, and then combining these scores to create a new ranking. The main idea behind this method is that documents appearing in the top positions across multiple search methods are likely to be more relevant and should be ranked higher in the combined result.

In [None]:
start = time.time()

query = "show me a film that talks about some rock bands"

search_client = SearchClient(
    acs_endpoint, index_name, credential=AzureKeyCredential(acs_key)
)

vector = Vector(
    value=openai_text_embeddings(query), k=5, fields="embed_title, embed_overview"
)

results = search_client.search(
    search_text=None,
    vectors=[vector],
    select=["imdb_id", "title", "cast", "director", "description", "genres", "year"],
)

idx = 1
for result in results:
    print("\033[1;31;34m")
    print(f"{idx} Movie title: {result['title']}")
    print("IMDB ID:", result["imdb_id"])
    print("\033[1;31;32m")
    print(f"Description: {result['description']}")
    print("\033[0m")
    print(f"Cast: {result['cast']}")
    print(f"Director: {result['director']}")
    print(f"Genres: {result['genres']}")
    print(f"Year: {result['year']}")
    print("\033[1;31;35m")
    print(f"Score = {result['@search.score']}")
    idx += 1

elapsed = time.time() - start
print("\033[0m")
print(
    "\nElapsed time: "
    + time.strftime(
        "%H:%M:%S.{}".format(str(elapsed % 1)[2:])[:15], time.gmtime(elapsed)
    )
)

## 4.3 Pure vector search with filter
We can specify a filter on the vector search

In [None]:
myfilter = "director eq 'James Cameron'"
myfilter

In [None]:
start = time.time()

query = "show me a movie about a robot"

search_client = SearchClient(
    acs_endpoint, index_name, credential=AzureKeyCredential(acs_key)
)

vector = Vector(value=openai_text_embeddings(query), k=5, fields="embed_overview")

results = search_client.search(
    search_text=None,
    vectors=[vector],
    filter=myfilter,
    select=["imdb_id", "title", "cast", "director", "description", "genres", "year"],
)

idx = 1
for result in results:
    print("\033[1;31;34m")
    print(f"{idx} Movie title: {result['title']}")
    print("IMDB ID:", result["imdb_id"])
    print("\033[1;31;32m")
    print(f"Description: {result['description']}")
    print("\033[0m")
    print(f"Cast: {result['cast']}")
    print(f"Director: {result['director']}")
    print(f"Genres: {result['genres']}")
    print(f"Year: {result['year']}")
    print("\033[1;31;35m")
    print(f"Score = {result['@search.score']}")
    idx += 1

elapsed = time.time() - start
print("\033[0m")
print(
    "\nElapsed time: "
    + time.strftime(
        "%H:%M:%S.{}".format(str(elapsed % 1)[2:])[:15], time.gmtime(elapsed)
    )
)

## 4.4 Hybrid search

**Hybrid search consists of keyword queries and vector queries** in a single search request.

Vector search is implemented at the field level, which means you can build queries that include vector fields and searchable text fields. The queries execute in parallel and the results are merged into a single response. Optionally, add semantic search (preview) for even more accuracy with L2 reranking using the same language models that power Bing.

In [None]:
start = time.time()

query = "show me a film with Stallone"

search_client = SearchClient(acs_endpoint, index_name, AzureKeyCredential(acs_key))

vector = Vector(value=openai_text_embeddings(query), k=10, fields="embed_overview")

results = search_client.search(
    search_text=query,
    vectors=[vector],
    select=["imdb_id", "title", "cast", "director", "description", "genres", "year"],
    top=10,
)

idx = 1
for result in results:
    print("\033[1;31;34m")
    print(f"{idx} Movie title: {result['title']}")
    print("IMDB ID:", result["imdb_id"])
    print("\033[1;31;32m")
    print(f"Description: {result['description']}")
    print("\033[0m")
    print(f"Cast: {result['cast']}")
    print(f"Director: {result['director']}")
    print(f"Genres: {result['genres']}")
    print(f"Year: {result['year']}")
    print("\033[1;31;35m")
    print(f"Score = {result['@search.score']}")
    idx += 1

elapsed = time.time() - start
print("\033[0m")
print(
    "\nElapsed time: "
    + time.strftime(
        "%H:%M:%S.{}".format(str(elapsed % 1)[2:])[:15], time.gmtime(elapsed)
    )
)

## 4.5 Hybrid search with a filter

In [None]:
myfilter = "year eq '2000' or director eq 'William Friedkin'"

In [None]:
start = time.time()

query = "show me a movie"

search_client = SearchClient(acs_endpoint, index_name, AzureKeyCredential(acs_key))

vector = Vector(value=openai_text_embeddings(query), k=10, fields="embed_overview")

results = search_client.search(
    search_text=query,
    vectors=[vector],
    filter=myfilter,
    select=["imdb_id", "title", "cast", "director", "description", "genres", "year"],
    top=10,
)

idx = 1
for result in results:
    print("\033[1;31;34m")
    print(f"{idx} Movie title: {result['title']}")
    print("IMDB ID:", result["imdb_id"])
    print("\033[1;31;32m")
    print(f"Description: {result['description']}")
    print("\033[0m")
    print(f"Cast: {result['cast']}")
    print(f"Director: {result['director']}")
    print(f"Genres: {result['genres']}")
    print(f"Year: {result['year']}")
    print("\033[1;31;35m")
    print(f"Score = {result['@search.score']}")
    idx += 1

elapsed = time.time() - start
print("\033[0m")
print(
    "\nElapsed time: "
    + time.strftime(
        "%H:%M:%S.{}".format(str(elapsed % 1)[2:])[:15], time.gmtime(elapsed)
    )
)

# 4.6 Semantic Hybrid Search

You need to update semantic search on your service from the Azure Portal!

In [None]:
query = "show me rommmantttic movies"

search_client = SearchClient(
    acs_endpoint, index_name, credential=AzureKeyCredential(acs_key)
)

vector = Vector(value=openai_text_embeddings(query), k=5, fields="embed_overview")

results = search_client.search(  
    search_text=query,  
    vectors=[vector],
    select = ["imdb_id", "title", "cast", "director", "description", "genres", "year"],
    query_type="semantic", query_language="en-us", semantic_configuration_name='my-semantic-config',
    query_caption="extractive", query_answer="extractive",
)

semantic_answers = results.get_answers()
for answer in semantic_answers:
    if answer.highlights:
        print(f"Semantic Answer: {answer.highlights}")
    else:
        print(f"Semantic Answer: {answer.text}")
    print(f"Semantic Answer Score: {answer.score}\n")

idx = 1
for result in results:
    if idx <= 10:
        print("\033[1;31;34m")
        print(f"{idx} Movie title: {result['title']}")
        print("IMDB ID:", result["imdb_id"])
        print("\033[1;31;32m")
        print(f"Description: {result['description']}")
        print("\033[0m")
        print(f"Genres: {result['genres']}")
        print("\033[1;31;35m")
        print(f"Score = {result['@search.score']}")
        idx += 1

## 5. ChatGPT and Azure Cognitive Search

**temperature** controls the "creativity" of the generated text, between 0 and 2. A higher temperature will result in more diverse and unexpected responses, while a lower temperature will result in more conservative and predictable responses. The default value for temperature is 1.0, but you can experiment with different values to see what works best for your use case. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. We generally recommend altering this or top_p but not both.

**top_p** - An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. 1 is the default value. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.

**Make sure your model deployment is updated!**

In [None]:
def azure_chatgpt_movies(prompt, max_retries=3, retry_delay=1):
    """
    Chat GPT with Azure Open AI
    Input: prompt (text)
    Output: results (text)
    """
    for _ in range(max_retries):
        try:
            response = openai.ChatCompletion.create(
                engine="gpt-35-turbo-unai",  # Should be deployed in the AOAI studio
                messages=[
                    {
                        "role": "system",
                        "content": "You are an AI assistant that helps people to find \
                        information about movies."
                    },
                    {"role": "user", "content": prompt},
                ],
                temperature=0.9,
                max_tokens=400,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
                stop=None,
            )

            resp = response["choices"][0]["message"]["content"]

            print("\033[1;31;34mPrompt:", prompt)
            print("\n\033[1;31;32mResponse:", resp)
            print("\033[1;31;35m")
            print("Powered by Azure AI")
            # Local time
            local_tz = pytz.timezone(
                requests.get("https://ipinfo.io").json()["timezone"]
            )
            print(datetime.now(local_tz).strftime("%d-%b-%Y %H:%M:%S"))

            return resp

        except Exception as e:
            print("Error:", str(e))
            print("Retrying...")
            time.sleep(retry_delay)

    print("Maximum retries exceeded. Unable to get a response.")

    return None

In [None]:
def get_movie_poster(imdb_id):
    """
    Display movie poster from its IMDB id
    Input: imdb_id (text)
    Output: PIL Image and saved image
    """
    poster = mp.get_poster(id=imdb_id)
    response = requests.get(poster)

    if response.status_code == 200:
        img = Image.open(BytesIO(response.content))
        display(img)
        # Save as a local file
        os.makedirs("movieposter", exist_ok=True)
        posterfile = os.path.join("movieposter", "movie_" + str(imdb_id) + ".jpg")
        img.save(posterfile)
    else:
        print("Failed to fetch the movie image.")

In [None]:
def get_trailer(movietitle):
    """
    Get youtube video trailer from its title
    Input: movie title (string)
    Output: Display the movie trailer video
    """
    # Search video on youtube
    videosSearch = VideosSearch(movietitle, limit=1)
    video_results = videosSearch.result().get("result")

    if video_results:
        print("\033[1;31;34m")
        video_id = video_results[0].get("id")
        video_url = f"https://www.youtube.com/watch?v={video_id}"
        print("\033[1;31;34m", video_url)
        vid = IFrame(
            src=f"https://www.youtube.com/embed/{video_id}", width="560", height="315"
        )
        display(vid)

        # Get the first 20 Youtube video comments
        print("YouTube video comments for", movietitle, ":")
        comments = Comments.get(video_id)
        print("\033[1;31;32m")

        for idx, comment in enumerate(comments.get("result")):
            print(f"Comment {idx+1:02}: {comment.get('content')}")
        print("\033[0m")
    else:
        print("No video found on YouTube.")

In [None]:
def get_all_trailers(movies_list):
    """
    Get movie trailers from a list of movies
    Input: movies list (list)
    Output: vDisplay the video files and saved them
    """
    for movie in movies_list:
        print(movie)
        try:
            get_trailer(movie)
        except:
            print("Error")
        print()

In [None]:
def acs_find_movies_with_posters(title):
    """
    Cross Field Search using Azure Cognitive Search
    Input: movie title (string)
    Output: results (text)
    """
    search_client = SearchClient(
        acs_endpoint, index_name, credential=AzureKeyCredential(acs_key)
    )

    vector = Vector(value=openai_text_embeddings(title), k=1, fields="embed_title")

    results = search_client.search(
        search_text=None,
        vectors=[vector],
        select=[
            "imdb_id",
            "title",
            "cast",
            "director",
            "description",
            "genres",
            "year",
        ],
        top=1,
    )

    for result in results:
        if result['@search.score'] >= 0.9:
            print("\033[1;31;34m")
            print(f"Movie title: {result['title']}")
            print(f"IMDB ID: {result['imdb_id']}")
            # Get movie poster
            get_movie_poster(result["imdb_id"])
            print("\033[1;31;32m")
            print(f"Description: {result['description']}")
            print("\033[0m")
            print(f"Cast: {result['cast']}")
            print(f"Director: {result['director']}")
            print(f"Genres: {result['genres']}")
            print(f"Year: {result['year']}")
            print("\033[1;31;35m")
            print(f"Score = {result['@search.score']}")
        else:
            print("Note: The movie", title, "is not available in the Azure Cognitive search index")

In [None]:
def get_acs_movies_results(movies_list):
    """
    Find movies using Azure Cognitive Search from a list of movies
    Input: movies list (list)
    Output: run the acs_find_movies_with_posters function on all element of the input list
    """
    for movie in movies_list:
        acs_find_movies_with_posters(movie)

### Tests

In [None]:
azure_chatgpt_movies("Who are you?")

In [None]:
azure_chatgpt_movies("Can you display the name of the actors and characters of the Titanic movie in a json list?")

In [None]:
azure_chatgpt_movies("When was made the Titanic movie?")

In [None]:
azure_chatgpt_movies("Who is the director of the shining? What was the release year?")

In [None]:
azure_chatgpt_movies("What are the main characters of Million Dollar Baby? Who is the director? What is the release year?")

In [None]:
azure_chatgpt_movies("Share with me some details about the 'Les choses de la vie' movie")

## 6. ChatGPT and Azure Cognitive Search and additional results (trailers)

### Test 1

In [None]:
prompt = "I want to see an action movie from the 70's in New York City.\
 I want you to save only the movie title in a numbered list with the release year."

answer = azure_chatgpt_movies(prompt)

In [None]:
movies_list = get_list(answer)

In [None]:
get_acs_movies_results(movies_list)

In [None]:
get_all_trailers(movies_list)

### Test 2

In [None]:
prompt = "What are the movies of Sylvester Stallone between 1985 and 1990?.\
 I want you to save only the movie title in a numbered list with the release year."

answer = azure_chatgpt_movies(prompt)

In [None]:
movies_list = get_list(answer)

In [None]:
get_acs_movies_results(movies_list)

In [None]:
get_all_trailers(movies_list)

### Test 3

In [None]:
prompt = "What are the top 3 movies about WW2 in terms of audience?\
 I want you to save only the movie title in a numbered list with the release year."

answer = azure_chatgpt_movies(prompt)

In [None]:
movies_list = get_list(answer)

In [None]:
get_acs_movies_results(movies_list)

In [None]:
get_all_trailers(movies_list)

### Test 4

In [None]:
prompt = "What is the name of the movie about two teenagers using a computer in the 80's?\
 I want you to save only the movie title in a numbered list with the release year."

answer = azure_chatgpt_movies(prompt)

In [None]:
movies_list = get_list(answer)

In [None]:
get_acs_movies_results(movies_list)

In [None]:
get_all_trailers(movies_list)

### Test 5

In [None]:
prompt = "What is the movie made by Steven Spielberg about crime prediction?\
 I want you to save only the movie title in a numbered list with the release year."

answer = azure_chatgpt_movies(prompt)

In [None]:
movies_list = get_list(answer)

In [None]:
get_acs_movies_results(movies_list)

In [None]:
get_all_trailers(movies_list)

## 7. Web apps

### 6.1 ChatGPT with azure Open AI webapp

Make sure you include your GPT deployment name!

In [None]:
def gradio_chatgpt_function(prompt, max_retries=3, retry_delay=1):
    """
    ChatGPT with Azure Open AI
    Input: prompt (string)
    Output: Azure Open AI ChatGPT results (string)
    """
    for _ in range(max_retries):
        try:
            response = openai.ChatCompletion.create(
                engine="gpt-35-turbo-unai",  # Should be deployed in the AOAI studio
                messages=[
                    {
                        "role": "system",
                        "content": "You are an AI assistant that helps people to find \
                        information about movies. You will respond with the movie title name \
                        and the date of the released movie in a numbered list. \
                        One possible answer is : 1 Terminator (1984) 2 Mad Max (1979) 3 Abyss (1989)"
                    },
                    {"role": "user", "content": prompt},
                ],
                temperature=0.9,
                max_tokens=200,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
                stop=None,
            )
            resp = response["choices"][0]["message"]["content"]
            
            return resp

        except Exception as e:
            time.sleep(retry_delay)

    return None

<img src="https://github.com/retkowsky/Azure-OpenAI-demos/blob/main/Movies%20recommender%20usecase%20demo/webapp1.png?raw=true">

In [None]:
logo = "https://github.com/retkowsky/images/blob/master/azure_openai_logo.png?raw=true"
image = "<center> <img src= {} width=500px></center>".format(logo)
header = "Azure ChatGPT Movie Application"
article = "Serge Retkowsky | Microsoft | 2023 | Powered by Azure Open AI"

samples = [
    "Show me some movies in NYC from the seventies",
    "Show me a list of movies like Terminator",
    "I want some musical movies",
    "Show me some movies like Jaws",
    "I want to see some movies with Stallone",
    "I love 'La folie des grandeurs'. What do you recommend me to watch?",
    "Display some movies names about dragons",
    "Display some movies names with hard rock bands",
]

inputs = gr.Textbox(lines=3, label="What do you want to search?")
outputs = gr.Textbox(label="Azure Open AI results")

azure_chatgpt_movie_webapp = gr.Interface(
    gradio_chatgpt_function,
    inputs,
    outputs,
    title=header,
    description=image,
    examples=samples,
    article=article,
    theme="gradio/monochrome",  # https://huggingface.co/spaces/gradio/theme-gallery
)

azure_chatgpt_movie_webapp.launch(share=True)

### 7.2 ChatGPT with azure Open AI and Azure Cognitive Search webapp

In [None]:
def azure_chatgpt(prompt):
    """
    Azure Chat GPT to give movies answers from a prompt
    Input: prompt (string)
    Output: results (str)
    """
    response = openai.ChatCompletion.create(
        engine="gpt-35-turbo-unai",  # Should be deployed in the AOAI studio
        messages=[
            {
                "role": "system",
                "content": "You are an AI assistant that helps people to find \
                information about movies. You must respond with a numbered list that contains \
                the movie title name and the date of the released movie. \
                If there is only one answer please add 1 before the movie title and the date. \
                When the answer is multiple you should have an example like that one: \
                Another possible answer is : 1 Terminator (1984) 2 Mad Max (1979) 3 Abyss (1989)"
            },
            {"role": "user", "content": prompt},
        ],
        temperature=0, # 
        max_tokens=800,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None,)

    # Get only the movie(s)
    movies_titles = response["choices"][0]["message"]["content"]
    
    return movies_titles

In [None]:
def get_acs_infos(title):
    """
    Get the movies information from the title using Azure Cognitive Search
    Input: title movie (string)
    Output: results (text)
    """
    search_client = SearchClient(
        acs_endpoint, index_name, credential=AzureKeyCredential(acs_key)
    )

    vector = Vector(value=openai_text_embeddings(title), k=1, fields="embed_title")
    
    results = search_client.search(
        search_text=None,
        vectors=[vector],
        select=[
            "imdb_id",
            "title",
            "cast",
            "director",
            "description",
            "genres",
            "year",
        ],
        top=1,
    )

    for result in results:
        if result['@search.score'] >= 0.9:
            textresult = (
                f"{result['title']}  (score = {result['@search.score']})\n"
                f"Imdb id: {result['imdb_id']}\n"
                f"Description: {result['description']}\n"
                f"Director: {result['director']}\n"
                f"Cast: {result['cast']}\n"
                f"Genres: {result['genres']}\n"
                f"Year: {result['year']}\n\n"
            )
        else:
            #textresult = f"Note: the movie '{result['title']}' is not available in our vector database.\n"
            continue
        
        return textresult

In [None]:
def from_gpt_to_acs(gpt_result):
    """
    Generation of results to display for the webapp
    Input: movies list from azure open ai chatgpt (string)
    Output: results (string)
    """
    text_to_display = ''

    if gpt_result.count('\n') > 0:
        lines = gpt_result.splitlines()
        movies_list = []
        for line in lines:
            movies_list.append(line)
        
        # Doing the search in acs
        for movie in movies_list:
            infos = get_acs_infos(movie)
            text_to_display = text_to_display + str(infos)
    else:
        text_to_display = get_acs_infos(gpt_result)

    return text_to_display

In [None]:
def gradio_movieapp_function(prompt, topn=5):
    """
    Movie app with Azure Open AI and Azure Cognitive Search
    """   
    text_to_display = ''
    try:
        movies_titles = azure_chatgpt(prompt)
        text_to_display = from_gpt_to_acs(movies_titles)

    except Exception as e:
        pass
    
    return text_to_display

<img src="https://github.com/retkowsky/Azure-OpenAI-demos/blob/main/Movies%20recommender%20usecase%20demo/webapp2.png?raw=true">

In [None]:
logo = "https://github.com/retkowsky/images/blob/master/movies_search.png?raw=true"
image = "<center> <img src= {} width=1000px></center>".format(logo)
header = "Movie Application using Azure Open AI and Azure Cognitive Search"
article = "Serge Retkowsky | Microsoft | 2023 | Powered by Azure Open AI and Azure Cognitive Search"

samples = [
    "Show me some movies in NYC from the seventies",
    "I want to see some movie with De Niro",
    "Show me some movies directed by Steven Spielberg",
    "Do you have some 'end of the world' movies?",
    "Show me some movies with a dragon",
    "Show me a movie where 2 teenagers where using a computer in the 80's",
    "Show me some James Bond movies with Sean Connery",
]

inputs = gr.Textbox(lines=3, label="What do you want to search?")
outputs = gr.Textbox(label="Azure Open AI and Azure Cognitive Search results")

azure_movie_webapp = gr.Interface(
    gradio_movieapp_function,
    inputs,
    outputs,
    title=header,
    description=image,
    examples=samples,
    article=article,
    theme="HaleyCH/HaleyCH_Theme",  # https://huggingface.co/spaces/gradio/theme-gallery
)

azure_movie_webapp.launch(share=True)

> Go to the next notebook