# Generation using Retrieved Data

## Setup OpenAI API

In [2]:
import os

import azure.identity
import dotenv
import openai

# Set up OpenAI client based on environment variables
dotenv.load_dotenv()
AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE")
AZURE_OPENAI_ADA_DEPLOYMENT = os.getenv("AZURE_OPENAI_ADA_DEPLOYMENT")
AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")

azure_credential = azure.identity.AzureDeveloperCliCredential(
    tenant_id=os.getenv("AZURE_TENANT_ID"))
token_provider = azure.identity.get_bearer_token_provider(azure_credential,
                                                          "https://cognitiveservices.azure.com/.default")
openai_client = openai.AzureOpenAI(
    api_version="2024-06-01",
    azure_endpoint=f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com",
    azure_ad_token_provider=token_provider)

### Recommendation System

In [93]:
import json
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import faiss

# 1. Load movie data
with open("movies.json", "r") as f:
    data = json.load(f)

movies = data["movies"]
titles = [movie["title"] for movie in movies]
texts = [f"{movie['title']} {movie['plot']}" for movie in movies]

# 2. Load FAISS index
index = faiss.read_index("movie_embeddings.index")
num_vectors = index.ntotal
dimension = index.d

# 3. Reconstruct embeddings from index
embeddings = np.vstack([index.reconstruct(i) for i in range(num_vectors)])

# 4. Generate query embedding
query_text = "A space adventure with aliens and distant planets"
response = openai_client.embeddings.create(
    model=AZURE_OPENAI_ADA_DEPLOYMENT,
    input=[query_text]
)
query_vector = np.array(response.data[0].embedding).astype(
    'float32').reshape(1, -1)
print(f"[User] {query_text}")

# 5. Compute cosine similarity and select top k results
similarities = cosine_similarity(query_vector, embeddings)[0]
df = pd.DataFrame({
    "Title": titles,
    "Similarity": similarities,
    "Index": np.arange(len(titles))
}).sort_values("Similarity", ascending=False)
k = 5
top_movies = df.head(k)

# 6. Build descriptions for LLM prompt
descriptions = []
for _, row in top_movies.iterrows():
    movie = movies[int(row["Index"])]
    desc = f"Title: {movie['title']}\nYear: {movie['year']}\nGenres: {', '.join(movie['genres'])}\nPlot: {movie['plot']}"
    descriptions.append(desc)

movie_descriptions = "\n\n".join(descriptions)

# 7. Build prompt and call OpenAI chat LLM
prompt = f"""
A user is looking for a movie recommendation similar to: "{query_text}".

Here are the top recommended movies:

{movie_descriptions}

Based on these, write a friendly recommendation message in 2-3 sentences, highlighting which movie the user should watch first and why.
"""

response = openai_client.chat.completions.create(
    model=AZURE_OPENAI_DEPLOYMENT_NAME,
    messages=[
        {"role": "system", "content": "You are a helpful movie recommendation assistant."},
        {"role": "user", "content": prompt}
    ],
    temperature=0.7,
    max_tokens=500
)

recommendation = response.choices[0].message.content

# Final output
print("\n[Assistant] Movie recommendations:\n")
print(recommendation)

[User] A space adventure with aliens and distant planets

[Assistant] Movie recommendations:

If you're in the mood for a thrilling space adventure, I highly recommend starting with "Interstellar." Its stunning visuals and thought-provoking themes about love and sacrifice in the face of cosmic challenges will keep you captivated. Plus, the exploration of distant planets and the concept of wormholes adds an exciting layer of sci-fi intrigue that perfectly fits your interest in alien worlds! Enjoy your journey through the stars!
