# Getting started

## 1. Data

In [2]:
import pandas as pd
import numpy as np 
from flask import Flask
import openai

pd.set_option('display.max_colwidth', None)

In [3]:
data_sample = pd.read_csv("../data/twitter_data_clean_sample.csv")

In [4]:
data_sample.head()

Unnamed: 0,customer_tweet,company_tweet,company
0,Ordered this around 2am Friday morning and it made it here already... good job @115830 https://t.co/XXMuII3QwQ,@383517 I am very happy to hear this Pablo:) I hope you enjoy your order.^GA,AmazonHelp
1,"@AmazonHelp what does ""ships in 1-3 weeks"" actually mean? Do you have the item I want in stock or not? Items like this have given me issues","@274096 If your item will ship in 1-3 weeks, this means the item is not in stock and needs to be ordered from our distributor. More info here: https://t.co/V7JYyWd9JF ^RA",AmazonHelp
2,@115821 // Email from Representative not correct. There was someone to receive package. Whoever said different at @118706 is lying.,@528375 I'm sorry you haven't received your package. We'd like to help. Please contact us here: https://t.co/hApLpMlfHN ^AY,AmazonHelp
3,je l’ai déjà l’application amazon jdevrais être immunisé de vos pubs de merde @115821,"@792999 3/3 Ensuite décochez à nouveau les cases que vous aviez sélectionnées. N'oubliez pas de ""Valider"" pour effectuer vos modifications. \nNous espérons que ces informations vous seront utiles.",AmazonHelp
4,"I must say @115830, a package left under a doormat which is full of holes, in the middle of a downpour, is not the best idea #wetelectronics","@776873 I apologize for how your delivery was handled, that is not the experience we want our customers to have. Which courier was assigned delivery of that package, as shown in the order details here: https://t.co/aaDyEz1VgE ^CH",AmazonHelp


In [5]:
data_sample.company.value_counts()

company
AmazonHelp      100
AppleSupport    100
SpotifyCares    100
Uber_Support    100
Name: count, dtype: int64

## 2. How to use the OpenAI API

Create a `config.ini` file containing your OpenAI API credentials

    
    [OPENAI_API]
    OPENAI_KEY = key
    

In [23]:
import configparser
from openai import OpenAI

In [10]:
# Loading OpenAI API key from configuration file
config = configparser.ConfigParser()
config.read('../config.ini') #Path to your configuration file
OPENAI_KEY = config.get('OPENAI_API', 'OPENAI_KEY')

In [25]:
client = OpenAI(api_key=OPENAI_KEY)

### ChatCompletion : Get GPT response to your prompt

#### Documentation : https://platform.openai.com/docs/guides/text-generation/chat-completions-api

In [17]:
message = data_sample.head(1).customer_tweet[0]
company = data_sample.head(1).company[0]

print(f"Tweet: {message} \nCompany: {company}")

Tweet: Ordered this around 2am Friday morning and it made it here already... good job @115830 https://t.co/XXMuII3QwQ 
Company: AmazonHelp


In [30]:
instruction =  f"""\
You are a chatbot answering customer's messages. You are working for a company called {company}. Reply to the message below.
#####
Message:
"{message}" """
print(f"Instruction:\n\n{instruction}")

Instruction:

You are a chatbot answering customer's messages. You are working for a company called AmazonHelp. Reply to the message below.
#####
Message:
"Ordered this around 2am Friday morning and it made it here already... good job @115830 https://t.co/XXMuII3QwQ" 


In [None]:
messages = [
    {"role": "user", "content": instruction}
]

response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=messages,
    temperature = 0.7) # temperature ranges from 0 (deterministic) to 1 (creative)

In [28]:
generated_text = response.choices[0].message.content
print(f"Answer generated:\n\n{generated_text}")

Answer generated:

Thank you for sharing your positive feedback! We're glad to hear that your order arrived quickly. If you have any questions or need assistance with anything else, feel free to reach out to us. Enjoy your purchase! 😊 #AmazonHelp


### Embedding model : Get the embedding of a text

#### Documentation : https://platform.openai.com/docs/guides/embeddings

In [33]:
message_1 = "Ordered that pair of shoes around 2am Friday morning and it made it here already... good job"
message_2 = "Placed an order for some headphones early Monday morning, and they've arrived in 2 days... impressive service!"
message_3 = "I hate Amazon!!!"

In [32]:
def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

In [34]:
embedding_message_1 = get_embedding(message_1)
embedding_message_2 = get_embedding(message_2)
embedding_message_3 = get_embedding(message_3)

In [36]:
print(f"Message 1 : {message_1}\nEmbedding length : {len(embedding_message_1)}\nEmbedding:\n{embedding_message_1}")

Message 1 : Ordered that pair of shoes around 2am Friday morning and it made it here already... good job
Embedding length : 1536
Embedding:
[0.024054376408457756, -0.007412675302475691, -0.057449985295534134, -0.028037725016474724, 0.012272641062736511, -0.028724992647767067, 0.018191561102867126, 0.013927694410085678, -0.03983348608016968, -0.04230204224586487, -0.006595667917281389, 0.020688166841864586, -0.05043704807758331, -0.0297068040817976, -0.03276444599032402, -0.011129532009363174, 0.029650701209902763, -0.009327207691967487, -0.06962444633245468, 0.04502306133508682, -0.005659440997987986, -0.022469453513622284, 0.020281415432691574, -0.028276165947318077, -0.02561124786734581, -0.016704818233847618, 0.025274626910686493, -0.0163681972771883, 0.02382996305823326, -0.006746446248143911, 0.021038813516497612, -0.02967875264585018, 0.06373357772827148, -0.036663640290498734, 0.00189174001570791, 0.0239702221006155, -0.02312866970896721, 0.03425118699669838, -0.0322314612567424

#### Compare two embeddings to find similarity

A cosine similarity close to 1 implies that the sentence embeddings are very similar, meaning their vectors point in almost the same direction. This suggests the sentences have similar meanings or semantic content.

A cosine similarity around 0 indicates that the sentence embeddings are orthogonal (or near-orthogonal) to each other in the vector space, suggesting that the sentences are unrelated or have neutral similarity.

A cosine similarity close to -1 indicates that the embeddings are diametrically opposed in the vector space, suggesting that the sentences are highly dissimilar or have opposite meanings.

In [41]:
import numpy as np

def cosine_similarity(A, B):
    dot_product = np.dot(A, B)
    magnitude_A = np.linalg.norm(A)
    magnitude_B = np.linalg.norm(B)
    return dot_product / (magnitude_A * magnitude_B)

In [44]:
similarity_message1_message2 = cosine_similarity(embedding_message_1, embedding_message_2)
print(f"Message 1 : {message_1}\nMessage 2 : {message_2}\n\nSimilarity: {similarity_message1_message2}")

Message 1 : Ordered that pair of shoes around 2am Friday morning and it made it here already... good job
Message 2 : Placed an order for some headphones early Monday morning, and they've arrived in 2 days... impressive service!

Similarity: 0.5958406184122572


In [45]:
similarity_message1_message3 = cosine_similarity(embedding_message_1, embedding_message_3)
print(f"Message 1 : {message_1}\nMessage 3 : {message_3}\n\nSimilarity: {similarity_message1_message3}")

Message 1 : Ordered that pair of shoes around 2am Friday morning and it made it here already... good job
Message 3 : I hate Amazon!!!

Similarity: 0.16232099289477625
