In [7]:
import ast
import json
import os

import pandas as pd
from dotenv import load_dotenv
from IPython.display import display
from openai import OpenAI


In [None]:
load_dotenv(".env")
api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI(api_key=api_key)

In [None]:
def get_completion(system_prompt, user_prompt, model="gpt-4o-mini"):
    completion = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        max_tokens=4000,
        temperature=0.9,
        response_format={
            "type": "json_object",
        },
    )

    completion = completion.to_dict()

    content = completion["choices"][0]["message"]["content"]

    return content

#### AI Application: Customers Reviews Insider 

`reviews.csv` has been created by collecting a number of reviews of an *Apple iPhone 16 Pro* on [Amazon](https://www.amazon.co.uk/Apple-iPhone-16-Pro-128/dp/B0DGHSYPYK/ref=cm_cr_arp_d_product_top?ie=UTF8)

Let's develop an AI application that can:

1. Analyse Sentiment of a given review, also known as **Sentiment Analysis**.
2. Extract the main topics of a given review, also known as **Topics Extraction**.
3. Extract the main themes across all topics.
4. Summarise what the customers are saying about extracted themes, also known as **Text Summarisation**.
5. Summarise the summaries of extracted themes.

In [None]:
df_reviews = pd.read_csv("data/reviews.csv", encoding="ISO-8859-1")
display(df_reviews)

In [None]:
review_1 = df_reviews.iloc[0][["title", "review"]].to_dict()
display(review_1)

**1. Sentiment Analysis**

Sentiment analysis is a very common technique to determine the attitude or emotional tone conveyed in the text, whether it's **positive**, **negative**, or **neutral**.

<span style="color:green">
Task 1: Write a system prompt with the task of detecting the Sentiment of a review. 

The output should be a structured <code>JSON</code> in this format:

```json
{
    "sentiment": "positive, negative, or neutral"
}
```
</span>

In [None]:
system_prompt = """
You are an experienced customer service representative for a large online retailer. 

Your task is to:
* Review a customer review.
* Determine the sentiment of the review.
    
Return the response as a json in this format:
{
    "sentiment": "positive, negative, or neutral"
}
"""
system_prompt

<span style="color:green">
Task 2: Write a user prompt to detect the sentiment of a specific review.
</span>

In [None]:
user_prompt = f"""
Determine the sentiment of the following review which also includes the title:

{review_1}
"""
user_prompt

<span style="color:green">
Task 3: Test your prompt for one review.
</span>

In [None]:
completion = get_completion(system_prompt, user_prompt)
completion = json.loads(completion)
completion

<span style="color:green">
Task 4: Turn your code into a <code>get_sentiment</code> function.
</span>

In [None]:
def get_sentiment(review):
    system_prompt = """
    You are an experienced customer service representative for a large online retailer. 
    
    Your task is to:
    * Review a customer review.
    * Determine the sentiment of the review. 

    Return the response as a json in this format:
    {
        "sentiment": "positive, negative, or neutral"
    }
    """

    user_prompt = f"""
    Determine the sentiment of the following review which also includes the title:

    {review}
    """
    completion = get_completion(system_prompt, user_prompt)
    completion = json.loads(completion)
    return completion["sentiment"]

<span style="color:green">
Task 5: Use your function to get the sentiment for all reviews.
</span>

In [None]:
for ind, row in df_reviews.iterrows():
    review = row[["title", "review"]].to_dict()
    sentiment = get_sentiment(review)
    df_reviews.loc[ind, "sentiment"] = sentiment

display(df_reviews)

<span style="color:green">
Task 6: Manually validate the detected sentiment.
</span>

<span style="color:green">
Task 7: Extra: can you think of techniques to preprocess the review in order to improve the accuracy?
</span>

Techniques to improve accuracy:
1. Remove special characters such as emojis and hashtags
2. Lowercase all text to create uniformity
3. Correct spelling errors 

**2. Topics Extraction**

Topics Extraction helps in identifying the main topics within a text.

<span style="color:green">
Task 1: Write a system prompt with the task of extracting the main 5 topics of a review. 

The output should be a structured <code>JSON</code> in this format:

```json
{
    "topics": [list of extracted topics]
}
```
</span>

In [None]:
system_prompt = """
You are an experienced customer service representative for a large online retailer. 

Your task is to:
* Review a customer review.
* Identify key topics in the review. 
* Extract the top 5 key topics.

Return the response as a json in this format:
{
    "topics": [list of extracted topics]
}
"""
system_prompt

<span style="color:green">
Task 2: Write a user prompt to extract the topics of a specific review.
</span>

In [None]:
user_prompt = f"""
Determine the main topics of the following review which also includes the title:

{review}
"""
user_prompt

<span style="color:green">
Task 3: Test your prompt for one review.
</span>

In [None]:
completion = get_completion(system_prompt, user_prompt)
completion = json.loads(completion)
completion

<span style="color:green">
Task 4: Turn your code into a <code>get_topics</code> function.
</span>

In [None]:
def get_topics(review):
    system_prompt = """
    You are an experienced customer service representative for a large online retailer. 

    Your task is to:
    * Review a customer review.
    * Identify key topics in the review. 
    * Extract the top 5 key topics.

    Return the response as a json in this format:
    {
        "topics": [list of extracted topics]
    }
    """

    user_prompt = f"""
    Determine the main topics of the following review which also includes the title:

    {review}
    """
    completion = get_completion(system_prompt, user_prompt)
    completion = json.loads(completion)
    return completion["topics"]

<span style="color:green">
Task 5: Use your function to get the topics for all reviews.
</span>

In [None]:
for ind, row in df_reviews.iterrows():
    review = row[["title", "review"]].to_dict()
    topics = get_topics(review)
    df_reviews.loc[ind, "topics"] = str(topics)

display(df_reviews[["title", "review", "topics"]])

<span style="color:green">
Task 6: Manually validate the extracted topics.
</span>

**3. Themes Extraction**

Themes Extraction helps in identifying the main themes across a large number of extracted topics.

In [None]:
topics = df_reviews["topics"].apply(lambda x: ast.literal_eval(x)).to_list()
display(topics)

<span style="color:green">
Task 1: Write a system prompt with the task of identifying the top 10 main themes across all extracted topics. 

The output should be a structured <code>JSON</code> in this format:

```json
{
    "themes": [list of identified themes]
}
```
</span>

In [None]:
system_prompt = """
You are an experienced customer service representative for a large online retailer. 

Your task is to:
...

Return the response as a json in this format:
...
"""
system_prompt

<span style="color:green">
Task 2: Write a user prompt to identify key themes from all extracted topics.
</span>

In [None]:
user_prompt = f"""

{...}
"""
user_prompt

<span style="color:green">
Task 3: Test your prompt on extracted topics.
</span>

In [None]:
completion = get_completion(system_prompt, user_prompt)
completion = json.loads(completion)
completion

<span style="color:green">
Task 4: Turn your code into a <code>get_themes</code> function and test it.
</span>

In [None]:
def get_themes(topics):
    system_prompt = """
    You are an experienced customer service representative for a large online retailer. 

    Your task is to:
    ...

    Return the response as a json in this format:
    ...
    """

    user_prompt = f"""

    {...}
    """

    completion = get_completion(system_prompt, user_prompt)
    completion = json.loads(completion)

    return completion["themes"]

In [None]:
themes = get_themes(...)
display(themes)

<span style="color:green">
Task 5: Manually validate the extracted themes.
</span>

**4. Text Summarisation**

Summarise what the customers are saying about extracted themes.

In [None]:
reviews = df_reviews["review"].to_list()
reviews

<span style="color:green">
Task: Write a function that takes <code>reviews</code>, and <code>themes</code> as an input and returns what the customers are saying about every extracted theme in this format:

```json
{
    "theme": "summery",
}
```
</span>

In [None]:
def get_theme_summary(reviews, themes):
    system_prompt = """
    You are an experienced customer service representative for a large online retailer. 

    Your task is to:
    ...

    Return the response as a json in this format:
    ...
    """

    user_prompt = f"""
    ... 
    {...}

    ...
    {...}
    """

    completion = get_completion(system_prompt, user_prompt)
    completion = json.loads(completion)

    return completion

In [None]:
summaries = get_theme_summary(..., ...)
display(summaries)

<span style="color:green">
Task: Change the output to be in this format:

```json
{
    "theme": {
        "summery": "summery",
        "positive": ["list of positive sentences"],
        "negative": ["list of negative sentences"],
    }
}
```
</span>

In [None]:
def get_theme_summary(reviews, themes):
    system_prompt = """
    You are an experienced customer service representative for a large online retailer. 

    Your task is to:
    ...

    Return the response as a json in this format:
    ...
    """

    user_prompt = f"""
    ...
    {...}

    # ...
    {...}
    """

    completion = get_completion(system_prompt, user_prompt)
    completion = json.loads(completion)

    return completion

In [None]:
summaries = get_theme_summary(..., ...)
display(summaries)

**5. Overall Summarisation**

Summarise the summaries of all extracted themes.

<span style="color:green">
Task: Write a function that takes <code>summaries</code> as an input and returns an overall summary of what the customers are saying in this format:

```json
{
    "summery": "summery",
}
```
</span>

In [None]:
def get_summary(summaries):
    system_prompt = """
    You are an experienced customer service representative for a large online retailer.

    Your task is to:
    ...

    Return the response as a json in this format:
    ...
    """

    user_prompt = f"""
    ...
    {...}

    """

    completion = get_completion(system_prompt, user_prompt)
    completion = json.loads(completion)

    return completion["summery"]

In [None]:
summary = get_summary(...)
display(summary)