## Installations

In [1]:
!pip install ipykernel pandas



## Read CSV

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("./data/customer_support_tickets.csv")

In [4]:
df.head()

Unnamed: 0,Ticket ID,Customer Name,Customer Email,Customer Age,Customer Gender,Product Purchased,Date of Purchase,Ticket Type,Ticket Subject,Ticket Description,Ticket Status,Resolution,Ticket Priority,Ticket Channel,First Response Time,Time to Resolution,Customer Satisfaction Rating
0,1,Marisa Obrien,carrollallison@example.com,32,Other,GoPro Hero,2021-03-22,Technical issue,Product setup,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Social media,2023-06-01 12:15:36,,
1,2,Jessica Rios,clarkeashley@example.com,42,Female,LG Smart TV,2021-05-22,Technical issue,Peripheral compatibility,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Chat,2023-06-01 16:45:38,,
2,3,Christopher Robbins,gonzalestracy@example.com,48,Other,Dell XPS,2020-07-14,Technical issue,Network problem,I'm facing a problem with my {product_purchase...,Closed,Case maybe show recently my computer follow.,Low,Social media,2023-06-01 11:14:38,2023-06-01 18:05:38,3.0
3,4,Christina Dillon,bradleyolson@example.org,27,Female,Microsoft Office,2020-11-13,Billing inquiry,Account access,I'm having an issue with the {product_purchase...,Closed,Try capital clearly never color toward story.,Low,Social media,2023-06-01 07:29:40,2023-06-01 01:57:40,3.0
4,5,Alexander Carroll,bradleymark@example.com,67,Female,Autodesk AutoCAD,2020-02-04,Billing inquiry,Data loss,I'm having an issue with the {product_purchase...,Closed,West decision evidence bit.,Low,Email,2023-06-01 00:12:42,2023-06-01 19:53:42,1.0


In [5]:
df.columns

Index(['Ticket ID', 'Customer Name', 'Customer Email', 'Customer Age',
       'Customer Gender', 'Product Purchased', 'Date of Purchase',
       'Ticket Type', 'Ticket Subject', 'Ticket Description', 'Ticket Status',
       'Resolution', 'Ticket Priority', 'Ticket Channel',
       'First Response Time', 'Time to Resolution',
       'Customer Satisfaction Rating'],
      dtype='object')

In [6]:
product_ticket_description_df = df[['Product Purchased', 'Ticket Description']].reset_index().rename(columns={'index': 'ID'})

In [7]:
product_ticket_description_df.head()

Unnamed: 0,ID,Product Purchased,Ticket Description
0,0,GoPro Hero,I'm having an issue with the {product_purchase...
1,1,LG Smart TV,I'm having an issue with the {product_purchase...
2,2,Dell XPS,I'm facing a problem with my {product_purchase...
3,3,Microsoft Office,I'm having an issue with the {product_purchase...
4,4,Autodesk AutoCAD,I'm having an issue with the {product_purchase...


In [8]:
product_ticket_description_df.tail()

Unnamed: 0,ID,Product Purchased,Ticket Description
8464,8464,LG OLED,My {product_purchased} is making strange noise...
8465,8465,Bose SoundLink Speaker,I'm having an issue with the {product_purchase...
8466,8466,GoPro Action Camera,I'm having an issue with the {product_purchase...
8467,8467,PlayStation,I'm having an issue with the {product_purchase...
8468,8468,Philips Hue Lights,There seems to be a hardware problem with my {...


In [9]:
# Create a new column with the product name inserted in the ticket description
product_ticket_description_df['Updated Ticket Description'] = df.apply(
    lambda row: row['Ticket Description'].replace('{product_purchased}', row['Product Purchased']),
    axis=1
)

In [10]:
product_ticket_description_df.head()

Unnamed: 0,ID,Product Purchased,Ticket Description,Updated Ticket Description
0,0,GoPro Hero,I'm having an issue with the {product_purchase...,I'm having an issue with the GoPro Hero. Pleas...
1,1,LG Smart TV,I'm having an issue with the {product_purchase...,I'm having an issue with the LG Smart TV. Plea...
2,2,Dell XPS,I'm facing a problem with my {product_purchase...,I'm facing a problem with my Dell XPS. The Del...
3,3,Microsoft Office,I'm having an issue with the {product_purchase...,I'm having an issue with the Microsoft Office....
4,4,Autodesk AutoCAD,I'm having an issue with the {product_purchase...,I'm having an issue with the Autodesk AutoCAD....


In [11]:
product_ticket_description_df = product_ticket_description_df[['ID', 'Product Purchased', 'Updated Ticket Description']]
product_ticket_description_df.head()

Unnamed: 0,ID,Product Purchased,Updated Ticket Description
0,0,GoPro Hero,I'm having an issue with the GoPro Hero. Pleas...
1,1,LG Smart TV,I'm having an issue with the LG Smart TV. Plea...
2,2,Dell XPS,I'm facing a problem with my Dell XPS. The Del...
3,3,Microsoft Office,I'm having an issue with the Microsoft Office....
4,4,Autodesk AutoCAD,I'm having an issue with the Autodesk AutoCAD....


In [12]:
product_ticket_description_df.rename(columns={
    'Updated Ticket Description': 'ticket_description',
    'Product Purchased': 'product_purchased'
    }, inplace=True)

In [13]:
product_ticket_description_df.head()

Unnamed: 0,ID,product_purchased,ticket_description
0,0,GoPro Hero,I'm having an issue with the GoPro Hero. Pleas...
1,1,LG Smart TV,I'm having an issue with the LG Smart TV. Plea...
2,2,Dell XPS,I'm facing a problem with my Dell XPS. The Del...
3,3,Microsoft Office,I'm having an issue with the Microsoft Office....
4,4,Autodesk AutoCAD,I'm having an issue with the Autodesk AutoCAD....


In [14]:
print(product_ticket_description_df.iloc[0].ticket_description)

I'm having an issue with the GoPro Hero. Please assist.

Your billing zip code is: 71701.

We appreciate that you have requested a website address.

Please double check your email address. I've tried troubleshooting steps mentioned in the user manual, but the issue persists.


In [15]:
product_ticket_description_df["ticket_description"] = product_ticket_description_df.apply(
    lambda row: row.ticket_description.strip().replace('\n', '').replace('\n', ''),
    axis=1
)

In [16]:
print(product_ticket_description_df.iloc[0].ticket_description)

I'm having an issue with the GoPro Hero. Please assist.Your billing zip code is: 71701.We appreciate that you have requested a website address.Please double check your email address. I've tried troubleshooting steps mentioned in the user manual, but the issue persists.


In [17]:
product_ticket_description_df["product_purchased"] = product_ticket_description_df.apply(
    lambda row: row.product_purchased.strip(),
    axis=1
)

In [18]:
print(product_ticket_description_df.iloc[0].product_purchased)

GoPro Hero


In [19]:
product_ticket_description_df.shape

(8469, 3)

In [20]:
product_ticket_description_df.to_csv("./data/product_ticket_description.csv", index=False)

## Creating Embeddings

In [21]:
%pip install -qU langchain-openai python-decouple

Note: you may need to restart the kernel to use updated packages.


### Load Envs

In [22]:
from langchain_openai import OpenAIEmbeddings

In [23]:
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-large",
    # With the `text-embedding-3` class
    # of models, you can specify the size
    # of the embeddings you want returned.
    dimensions=1024
)

In [24]:
df = pd.read_csv("./data/product_ticket_description.csv")

In [25]:
df.head()

Unnamed: 0,ID,product_purchased,ticket_description
0,0,GoPro Hero,I'm having an issue with the GoPro Hero. Pleas...
1,1,LG Smart TV,I'm having an issue with the LG Smart TV. Plea...
2,2,Dell XPS,I'm facing a problem with my Dell XPS. The Del...
3,3,Microsoft Office,I'm having an issue with the Microsoft Office....
4,4,Autodesk AutoCAD,I'm having an issue with the Autodesk AutoCAD....


In [30]:
df = df[:250]

In [31]:
df.shape

(250, 4)

In [36]:
df["product_purchased_embeddings"] = df.apply(
    lambda row: list(embeddings.embed_query(row.product_purchased)),
    axis=1
)

In [41]:
df.dtypes

ID                                int64
product_purchased                object
ticket_description               object
product_purchased_embeddings     object
ticket_description_embeddings    object
dtype: object

In [42]:
len(df["product_purchased_embeddings"][0])

1024

In [43]:
df["product_purchased_embeddings"][0]

[0.014811846427619457,
 0.015992820262908936,
 0.0028310271445661783,
 0.04560547694563866,
 0.006086984183639288,
 0.031124742701649666,
 -0.015551334246993065,
 0.0864870548248291,
 0.011754557490348816,
 0.040704984217882156,
 -0.0029110463801771402,
 0.02243851125240326,
 0.027239669114351273,
 -0.057437293231487274,
 0.02120235189795494,
 0.006407061591744423,
 -0.04430309310555458,
 0.008382710628211498,
 -0.0237960796803236,
 -0.006208392791450024,
 0.03611353039741516,
 -0.05094745382666588,
 -0.03670953959226608,
 -0.00573931448161602,
 0.027901897206902504,
 -0.004144447389990091,
 -0.0032007715199142694,
 0.04441346228122711,
 -0.027504561468958855,
 -0.02664366364479065,
 0.03584863990545273,
 0.028784869238734245,
 -0.01870795711874962,
 0.02891731448471546,
 0.04231640696525574,
 0.09589070081710815,
 0.03202978894114494,
 0.013178348541259766,
 0.014922217465937138,
 0.003385643707588315,
 0.024899795651435852,
 0.004726656712591648,
 -0.0409478023648262,
 0.032096013426

In [44]:
df["ticket_description_embeddings"] = df.apply(
    lambda row: list(embeddings.embed_query(row.ticket_description)),
    axis=1
)

In [45]:
df.head()

Unnamed: 0,ID,product_purchased,ticket_description,product_purchased_embeddings,ticket_description_embeddings
0,0,GoPro Hero,I'm having an issue with the GoPro Hero. Pleas...,"[0.014811846427619457, 0.015992820262908936, 0...","[0.026763655245304108, 0.030567917972803116, -..."
1,1,LG Smart TV,I'm having an issue with the LG Smart TV. Plea...,"[0.0044128731824457645, -0.01259020809084177, ...","[-0.007816504687070847, 0.0021440007258206606,..."
2,2,Dell XPS,I'm facing a problem with my Dell XPS. The Del...,"[-0.022125372663140297, 0.0597047284245491, 0....","[-0.007718625478446484, 0.08504624664783478, 0..."
3,3,Microsoft Office,I'm having an issue with the Microsoft Office....,"[-0.019686367362737656, 0.0008507751626893878,...","[-0.016548125073313713, -0.016119062900543213,..."
4,4,Autodesk AutoCAD,I'm having an issue with the Autodesk AutoCAD....,"[-0.004201975185424089, -0.012518677860498428,...","[-0.009693021886050701, -0.04226860776543617, ..."


In [46]:
df.to_csv("./data/product_ticket_description_embeddings.csv", index=False)