In [None]:
import ast
import json
import os

import pandas as pd
from dotenv import load_dotenv
from IPython.display import display
from openai import OpenAI

In [None]:
load_dotenv(".env")
api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI(api_key=api_key)

In [None]:
def get_completion(system_prompt, user_prompt, model="gpt-4o-mini"):
    completion = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        max_tokens=100,
        temperature=0.9,
        response_format={
            "type": "json_object",
        },
    )

    completion = completion.to_dict()

    content = completion["choices"][0]["message"]["content"]

    return content

#### AI Application: Customers Reviews Insider 

`reviews.csv` has been created by collecting a number of reviews of an *Apple iPhone 16 Pro* on [Amazon](https://www.amazon.co.uk/Apple-iPhone-16-Pro-128/dp/B0DGHSYPYK/ref=cm_cr_arp_d_product_top?ie=UTF8)

Let's develop an AI application that can:

1. Analyse Sentiment of a given review, also known as **Sentiment Analysis**
2. Extract the main topics of a given review, also known as **Topics Extraction**
3. Classify the main topics into a number of categories, also known as **Text Categorisation**
4. Summarise what the customers are saying, also known as **Text Summarisation**

In [None]:
reviews = pd.read_csv("data/reviews.csv", encoding="ISO-8859-1")
display(reviews)

In [None]:
review = reviews.iloc[0][["title", "review"]].to_dict()
display(review)

**1. Sentiment Analysis**

Sentiment analysis is a very widely used technique to determine the attitude or emotional tone conveyed in the text, whether it's positive, negative, or neutral.

**Tasks**: 

1. Write a system prompt with the task of detecting the Sentiment of a review. The output should be a structured `JSON` in this format:

```json
{
    "sentiment": "positive, negative, or neutral"
}
```

2. Write a user prompt to detect the sentiment of a specific review.
3. Test your prompt for one review.
4. Turn your code into a `get_sentiment` function.
5. Use your function to get the sentiment for all reviews.
6. Manually validate the detected sentiment.
7. Extra: can you think of techniques to preprocess the review in order to improve the accuracy? 

In [None]:
system_prompt = """
You are an experienced customer service representative for a large online retailer. Your task is to review a customer review and determine its sentiment. 

Return the response as a json in this format:
{
    "sentiment": "positive, negative, or neutral"
}
"""
system_prompt

In [None]:
user_prompt = f"""
Please determine the sentiment of the following review which also includes the title:

{review}
"""
user_prompt

In [None]:
completion = get_completion(system_prompt, user_prompt)
completion = json.loads(completion)
print(completion)

In [None]:
def get_sentiment(review):
    system_prompt = """
    You are an experienced customer service representative for a large online retailer. Your task is to review a customer review and determine its sentiment. 

    Return the response as a json in this format:
    {
        "sentiment": "positive, negative, or neutral"
    }
    """

    user_prompt = f"""
    Please determine the sentiment of the following review which also includes the title:

    {review}
    """
    completion = get_completion(system_prompt, user_prompt)
    completion = json.loads(completion)
    return completion["sentiment"]

In [None]:
for ind, row in reviews.iterrows():
    review = row[["title", "review"]].to_dict()
    sentiment = get_sentiment(review)
    reviews.loc[ind, "sentiment"] = sentiment

In [None]:
display(reviews)

Techniques to improve accuracy:
1. Remove special characters such as emojis and hashtags
2. Lowercase all text to create uniformity
3. Correct spelling errors 

**2. Topics Extraction**

Topics Extraction helps in identifying the main themes or topics within a text.

**Tasks**: 

1. Write a system prompt with the task of extracting the main topics of a review. The output should be a structured `JSON` in this format:

```json
{
    "topics": [list of extracted topics]
}
```

2. Write a user prompt to extract the topics of a specific review.
3. Test your prompt for one review.
4. Turn your code into a `get_topics` function.
5. Use your function to get the topics for all reviews.
6. Manually validate the extracted topics.

In [None]:
system_prompt = """

"""
system_prompt

In [None]:
user_prompt = f"""
{review}
"""
user_prompt

In [None]:
completion = get_completion(system_prompt, user_prompt)
completion = json.loads(completion)
print(completion)

In [None]:
def get_topics(review):
    system_prompt = """
    """

    user_prompt = f"""
    {review}
    """
    completion = get_completion(system_prompt, user_prompt)
    completion = json.loads(completion)
    return completion["topics"]

In [None]:
for ind, row in reviews.iterrows():
    review = row[["title", "review"]].to_dict()
    topics = get_topics(...)
    reviews.loc[ind, "topics"] = str(...)

In [None]:
display(reviews)

**3. Themes Extraction**

**Tasks**: 

1. Write a system prompt with the task of identifying the top 10 main themes across all extracted topics. The output should be a structured `JSON` in this format:

```json
{
    "themes": [list of 10 identified themes]
}
```

2. Write a user prompt to identify the themes from all extracted topics.
3. Test your prompt.
4. Turn your code into a `get_themes` function.
5. Manually validate the extracted topics.

In [None]:
system_prompt = """

"""
system_prompt

In [None]:
topics = reviews["topics"].apply(lambda x: ast.literal_eval(x)).to_list()
topics

In [None]:
user_prompt = f"""
{topics}
"""
user_prompt

In [None]:
completion = get_completion(system_prompt, user_prompt)
completion = json.loads(completion)

In [None]:
completion