# Conversation Analysis using Transformer NLP

In [None]:
import pandas as pd
from io import StringIO

In [None]:
from google.colab import files

uploaded = files.upload()
uploaded

Saving Call_Logs (1).csv to Call_Logs (1).csv


{'Call_Logs (1).csv': b',Logs\n0,"Date: 17/04/2024\nTime: 15:45:37\n\nAgent: Good afternoon, thank you for calling Fresh Fare Meal Kits customer service. My name is Sarah, how can I assist you today?\n\nClient: Hi Sarah, I\'m calling because I received my meal kit delivery yesterday, but unfortunately, one of the ingredients was missing.\n\nAgent: I\'m sorry to hear that! Let me take a look at your account. Can I have your name and order number, please?\n\nClient: Sure, my name is Emma Smith, and my order number is 123456789.\n\nAgent: Thank you, Emma. I see your account here. Could you please specify which ingredient was missing from your delivery?\n\nClient: It was the fresh basil for the pasta dish.\n\nAgent: I apologize for the inconvenience, Emma. We\'ll make sure to get that sorted out for you right away. Would you prefer to have the missing basil sent in your next delivery, or would you like us to reimburse you for the missing ingredient?\n\nClient: It would be great if you coul

In [None]:
# Convert byte string to normal string and create a DataFrame
decoded_data = uploaded['Call_Logs (1).csv'].decode('utf-8')
data_io = StringIO(decoded_data)

# Creating DataFrame
data = pd.read_csv(data_io)
data.head()

Unnamed: 0.1,Unnamed: 0,Logs
0,0,Date: 17/04/2024\nTime: 15:45:37\n\nAgent: Goo...
1,1,Date: 17/04/2024\nTime: 16:25:45\n\nAgent: Goo...
2,2,Date: 18/04/2024\nTime: 09:15:26\n\nAgent: Goo...
3,3,Date: 18/04/2024\nTime: 12:15:30\n\nAgent: Goo...
4,4,Date: 07/04/2024\nTime: 10:45:52\n\nAgent: Goo...


In [None]:
# Define a function to extract date, time, and conversations from each log
def extract_info(df):
  lines = df['Logs'].split('\n')
  date = lines[0].split(': ')[1]
  time = lines[1].split(': ')[1]
  conv = "\n".join([line for line in lines[3:] if line != ""])

  return date, time, conv

# Apply the function to each row of the DataFrame
data[['Date', 'Time', 'Conversation']] = data.apply(extract_info, axis=1, result_type="expand")

# Drop the original 'Logs' column
data.drop(['Logs', 'Unnamed: 0'], axis=1, inplace=True)
data.head()

Unnamed: 0,Date,Time,Conversation
0,17/04/2024,15:45:37,"Agent: Good afternoon, thank you for calling F..."
1,17/04/2024,16:25:45,"Agent: Good afternoon, thank you for reaching ..."
2,18/04/2024,09:15:26,"Agent: Good morning, you're speaking with Jess..."
3,18/04/2024,12:15:30,"Agent: Good afternoon, and thank you for calli..."
4,07/04/2024,10:45:52,"Agent: Good morning, and thank you for contact..."


# Conversation Summary

Consider the [bart-large-cnn-samsun](https://huggingface.co/philschmid/bart-large-cnn-samsum) model. It is a [BART](https://huggingface.co/docs/transformers/model_doc/bart) model has been trained twice: first for general text summarization and then specifically for dialogue.

In [None]:
from transformers import pipeline

In [None]:
# Create a pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
# empty list to store
output_list = []

for conv in data['Conversation']:
  summary = summarizer(conv)[0]['summary_text']
  output_list.append(summary)

In [None]:
# Add the summary to the data
data['summary'] = output_list
data.head()

Unnamed: 0,Date,Time,Conversation,summary
0,17/04/2024,15:45:37,"Agent: Good afternoon, thank you for calling F...","Client: Hi Sarah, I'm calling because I receiv..."
1,17/04/2024,16:25:45,"Agent: Good afternoon, thank you for reaching ...",Client: I've just received my delivery and the...
2,18/04/2024,09:15:26,"Agent: Good morning, you're speaking with Jess...","Client: Hi Jessica, I'm calling because I've b..."
3,18/04/2024,12:15:30,"Agent: Good afternoon, and thank you for calli...","Client: I received my meal kit delivery today,..."
4,07/04/2024,10:45:52,"Agent: Good morning, and thank you for contact...","Client: I received my delivery yesterday, but ..."


# Cancellation Detection

We want to prioritize conversations where clients express a desire to cancel their subscription. To achieve this, let's use [Zero-shot classification] - "facebook/bart-large-mnli" model from Hugging Face's Transformer library. This model allows us to assign labels to text samples without requiring pre-defined categories, making it suitable for our task.



In [None]:
#Set the classification pipeline
classifier = pipeline("zero-shot-classification",
                      model = "facebook/bart-large-mnli")

config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
# Setting the labels
labels = ['cancellation', 'other']

In [None]:
# create a list for the outcomes
classification = []
for conv in data['Conversation']:
  classification.append(classifier(conv, labels)['labels'][0])

In [None]:
# Create a columns
data['Cancellation'] = [True if cls == 'cancellation' else False for cls in classification]

In [None]:
data.head()

Unnamed: 0,Date,Time,Conversation,summary,Cancellation
0,17/04/2024,15:45:37,"Agent: Good afternoon, thank you for calling F...","Client: Hi Sarah, I'm calling because I receiv...",False
1,17/04/2024,16:25:45,"Agent: Good afternoon, thank you for reaching ...",Client: I've just received my delivery and the...,False
2,18/04/2024,09:15:26,"Agent: Good morning, you're speaking with Jess...","Client: Hi Jessica, I'm calling because I've b...",True
3,18/04/2024,12:15:30,"Agent: Good afternoon, and thank you for calli...","Client: I received my meal kit delivery today,...",False
4,07/04/2024,10:45:52,"Agent: Good morning, and thank you for contact...","Client: I received my delivery yesterday, but ...",False


# Cancellation Reason Inference

Let's use Google's open-source LLM [Flan T5 model](https://huggingface.co/google/flan-t5-base) to understand why the customers chose to leave our service. This versatile model, trained through prompting, is adept at a variety of language tasks—we just need to ask the right questions!

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

In [None]:
# Set up the model
model = "google/flan-t5-base"
tokenizer = T5Tokenizer.from_pretrained(model)
model = T5ForConditionalGeneration.from_pretrained(model)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [None]:
#Build a function to infer the cancellation reasons
def cancellation_reason(df):
  if df['Cancellation'] == False:
    return 'None'
  else:
    # prompt the model
    prompt = f"""
    {df['Conversation']}

    What are the issues that led the client to cancel their subscription?

    """

    # Convert the prompt into tokens, feed it to the model
    input = tokenizer(prompt, return_tensors = "pt").input_ids
    output = model.generate(input, max_new_tokens = 50, min_length = 20)
    return tokenizer.decode(output[0], skip_special_tokens = True)

In [None]:
# Apply the function
data['Cancellation_reasons'] = data.apply(cancellation_reason, axis = 1)

In [None]:
data.head()

Unnamed: 0,Date,Time,Conversation,summary,Cancellation,Cancellation_reasons
0,17/04/2024,15:45:37,"Agent: Good afternoon, thank you for calling F...","Client: Hi Sarah, I'm calling because I receiv...",False,
1,17/04/2024,16:25:45,"Agent: Good afternoon, thank you for reaching ...",Client: I've just received my delivery and the...,False,
2,18/04/2024,09:15:26,"Agent: Good morning, you're speaking with Jess...","Client: Hi Jessica, I'm calling because I've b...",True,The client's last three deliveries have been l...
3,18/04/2024,12:15:30,"Agent: Good afternoon, and thank you for calli...","Client: I received my meal kit delivery today,...",False,
4,07/04/2024,10:45:52,"Agent: Good morning, and thank you for contact...","Client: I received my delivery yesterday, but ...",False,
