In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
import nltk
from nltk.corpus import stopwords
import string

# Sample data: Replace this with your actual support ticket data
data = pd.DataFrame({
    'ticket_id': [1, 2, 3, 4, 5],
    'issue': [
        'Internet is not working',
        'Unable to login',
        'Slow computer performance',
        'Payment gateway issue',
        'Software installation error'
    ]
})

# Preprocessing
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
def preprocess_text(text):
    text = text.lower()
    text = ''.join([char for char in text if char not in string.punctuation])
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

data['cleaned_issue'] = data['issue'].apply(preprocess_text)

# Feature extraction
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(data['cleaned_issue'])

# Clustering
num_clusters = 2  # Define the number of clusters you want
model = KMeans(n_clusters=num_clusters)
model.fit(X)

# Adding cluster labels to data
data['cluster'] = model.labels_

# Analyzing patterns in clusters
for cluster in range(num_clusters):
    print(f"Cluster {cluster}:")
    cluster_tickets = data[data['cluster'] == cluster]
    print(cluster_tickets['issue'].values)
    print()

# Example of analyzing specific cluster
cluster_to_analyze = 0
cluster_tickets = data[data['cluster'] == cluster_to_analyze]
print(f"Patterns in Cluster {cluster_to_analyze}:")
print(cluster_tickets['issue'].value_counts())


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Cluster 0:
['Internet is not working' 'Unable to login' 'Slow computer performance'
 'Payment gateway issue']

Cluster 1:
['Software installation error']

Patterns in Cluster 0:
issue
Internet is not working      1
Unable to login              1
Slow computer performance    1
Payment gateway issue        1
Name: count, dtype: int64


In [None]:
import pandas as pd

# Sample data: Replace this with your actual support ticket data and solutions
data = pd.DataFrame({
    'problem': [
        'Internet is not working',
        'Unable to login',
        'Slow computer performance',
        'Payment gateway issue',
        'Software installation error'
    ],
    'solution': [
        'Please restart your router and check if the problem persists.',
        'Ensure you are using the correct username and password. If the problem continues, reset your password.',
        'Try closing unnecessary applications and restart your computer.',
        'Check if your payment method is valid and try again. If the issue persists, contact support.',
        'Refer to the installation guide and ensure all steps are followed. If issues continue, reinstall the software.'
    ]
})

# Function to find and suggest solution for a given issue
def find_solution(issue):
    for i, row in data.iterrows():
        if issue.lower() in row['problem'].lower():
            return row['solution']
    return "Solution not found. Please contact support for assistance."

# Example usage
new_issue = 'The computer is running very slow'
suggested_solution = find_solution(new_issue)

print(f'Issue: {new_issue}')
print(f'Suggested Solution: {suggested_solution}')


Issue: The computer is running very slow
Suggested Solution: Solution not found. Please contact support for assistance.


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


   ticket_id                        issue sentiment     score priority
0          1      Internet is not working  NEGATIVE  0.999731      Low
1          2              Unable to login  NEGATIVE  0.984691      Low
2          3    Slow computer performance  NEGATIVE  0.999794      Low
3          4        Payment gateway issue  NEGATIVE  0.984926      Low
4          5  Software installation error  NEGATIVE  0.999690      Low


In [None]:
import pandas as pd
from transformers import pipeline

# Sample data: Replace this with your actual support ticket data
data = pd.DataFrame({
    'ticket_id': [1, 2, 3, 4, 5],
    'issue': [
        'Internet is not working',
        'Unable to login',
        'Slow computer performance',
        'Payment gateway issue',
        'Software installation error'
    ]
})

# Initialize sentiment analysis pipeline
sentiment_pipeline = pipeline('sentiment-analysis')

# Function to analyze sentiment
def analyze_sentiment(issue):
    result = sentiment_pipeline(issue)[0]
    return result['label'], result['score']

# Flag high-priority tickets
def flag_high_priority_tickets(data, threshold=0.5):
    data['sentiment'], data['score'] = zip(*data['issue'].apply(analyze_sentiment))
    data['priority'] = data['score'].apply(lambda x: 'High' if x < threshold else 'Low')
    return data

# Example usage
high_priority_tickets = flag_high_priority_tickets(data, threshold=0.2)

print(high_priority_tickets)


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


   ticket_id                        issue sentiment     score priority
0          1      Internet is not working  NEGATIVE  0.999731      Low
1          2              Unable to login  NEGATIVE  0.984691      Low
2          3    Slow computer performance  NEGATIVE  0.999794      Low
3          4        Payment gateway issue  NEGATIVE  0.984926      Low
4          5  Software installation error  NEGATIVE  0.999690      Low


In [3]:
!pip install "gspread==6.1.3"

Collecting gspread==6.1.3
  Downloading gspread-6.1.3-py3-none-any.whl.metadata (11 kB)
Downloading gspread-6.1.3-py3-none-any.whl (57 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/57.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gspread
  Attempting uninstall: gspread
    Found existing installation: gspread 6.1.4
    Uninstalling gspread-6.1.4:
      Successfully uninstalled gspread-6.1.4
Successfully installed gspread-6.1.3


In [4]:
import gspread

key = "AIzaSyBL-bqY8ywZidFzMjce97sKUxJfaXRSF_g"
gc = gspread.api_key(key)
sh = gc.open_by_key("1tyxACc95GD88T2Me_xhktYbc14P6-BBZkOWlT7MUaeU")

In [5]:
import pandas as pd

worksheet = sh.sheet1 # or sh.get_worksheet(0) for the first sheet

# Get all values from the worksheet
data = worksheet.get_all_values()

# Convert to DataFrame
df = pd.DataFrame(data[1:], columns=data[0])  # First row is header

df.head()

Unnamed: 0,id,subject,body,answer,type,queue,priority,language,business_type,tag_1,tag_2,tag_3,tag_4,tag_5,tag_6,tag_7,tag_8,tag_9
0,1001352387736,Urgent: Critical impact on enterprise network ...,"Dear Customer Support Team, We are experiencin...",Subject: Re: Urgent: Critical impact on enterp...,Incident,Technical Support,high,en,IT Services,Service Outage,Network Issue,Urgent Issue,Technical Support,Problem Resolution,Critical Failure,System Crash,Service Disruption,
1,1004699418379,Intermittent Cursor Freezing Issue on Dell XPS,"Dear Customer Support,<br><br>I hope this mess...","Dear <name>,\n\n\nThank you for reaching out r...",Incident,Product Support,low,en,Tech Online Store,Technical Support,Product Support,Hardware Failure,Problem Resolution,Urgent Issue,Service Recovery,Documentation Request,,
2,1006966905046,Dringend: Unterstützung für die Datenwiederher...,"Hallo, wir haben severe Datenverluste in MySQL...","Hallo, vielen Dank, dass Sie uns kontaktiert h...",Incident,Technical Support,high,de,IT Services,Data Breach,Backup Restore,Technical Support,Urgent Issue,Software Bug,Problem Resolution,,,
3,1009231330404,Anfrage zu den MacBook Air M1 Funktionen,"Sehr geehrtes Kundenserviceteam,\n\n\nich hoff...","Sehr geehrter <name>,\n\n\nvielen Dank für Ihr...",Request,Sales and Pre-Sales,low,de,Tech Online Store,Customer Service,Product Support,Sales Inquiry,Technical Guidance,Warranty Claim,General Inquiry,,,
4,1024440081041,Solicitação de Assistência com Erro de Instala...,"Caro Suporte ao Cliente,\n\n\nEstou enfrentand...","Caro <name>,\n\n\nObrigado por entrar em conta...",Problem,Technical Support,medium,pt,IT Services,Technical Support,Software Bug,Urgent Issue,Problem Resolution,Product Support,,,,


In [6]:
df_en = df.loc[df["language"].eq("en"), :]
df_en.head()

Unnamed: 0,id,subject,body,answer,type,queue,priority,language,business_type,tag_1,tag_2,tag_3,tag_4,tag_5,tag_6,tag_7,tag_8,tag_9
0,1001352387736,Urgent: Critical impact on enterprise network ...,"Dear Customer Support Team, We are experiencin...",Subject: Re: Urgent: Critical impact on enterp...,Incident,Technical Support,high,en,IT Services,Service Outage,Network Issue,Urgent Issue,Technical Support,Problem Resolution,Critical Failure,System Crash,Service Disruption,
1,1004699418379,Intermittent Cursor Freezing Issue on Dell XPS,"Dear Customer Support,<br><br>I hope this mess...","Dear <name>,\n\n\nThank you for reaching out r...",Incident,Product Support,low,en,Tech Online Store,Technical Support,Product Support,Hardware Failure,Problem Resolution,Urgent Issue,Service Recovery,Documentation Request,,
5,1024619926727,Exchange Request for Malfunctioning Dell XPS 13,"Dear Tech Online Store Customer Support,\n\n\n...","Dear <name>,\n\n\nThank you for reaching out a...",Change,Returns and Exchanges,medium,en,Tech Online Store,Returns and Exchanges,Product Replacement,Customer Service,Technical Support,Order Issue,,,,
6,1026487543175,Request for Server Administration Assistance,"Dear IT Services Customer Support, \n\n\nWe ar...",Subject: Re: Request for Server Administration...,Request,Product Support,medium,en,IT Services,Technical Support,IT Support,Performance Tuning,Problem Resolution,Technical Guidance,Service Recovery,,,
7,1027850274374,Immediate Attention Required: AWS Outage Concern,"Greetings IT Services Customer Support,\n\n\nI...","Hello <name>,\n\n\nWe appreciate you informing...",Incident,Technical Support,high,en,IT Services,Service Outage,Urgent Issue,IT Support,System Maintenance,Incident Report,Customer Service,,,


In [9]:
# prompt: make function for making dependency parser ui graph of body of row

import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")

def dependency_parser_ui(text):
  """
  Generates a dependency parse visualization for the given text.

  Args:
      text: The input text to analyze.

  Returns:
      None. Displays the dependency parse visualization in the notebook.
  """
  doc = nlp(text)
  displacy.render(doc, style="dep", jupyter=True, options={"distance": 120})

# Example usage (assuming df_with_prouct_name is defined):
# for index, row in df_with_prouct_name.head(2).iterrows():
#  dependency_parser_ui(row["body"])

In [11]:
def extract_issues(data):
    stop_verbs = {"write", "say", "report", "request", "concern", "purchase"}  # Uninformative verbs to skip
    extracted_issues = []

    for entry in data:
        doc = nlp(entry["body"])
        product = entry["products_ner_bert_large"]
        issues = []

        for sent in doc.sents:
            # Check if the product is mentioned in the sentence
            if product.lower() in sent.text.lower():
                # Start with the main ROOT of the sentence
                root_token = sent.root

                # Skip generic verbs and search for better alternatives
                if root_token.lemma_ in stop_verbs:
                    # Look for alternative verbs or nouns in the subtree
                    for child in root_token.children:
                        if child.pos_ in {"VERB"}: # , "NOUN"
                            root_token = child
                            break  # Take the first meaningful token

                # Capture the issue if the token is meaningful
                if root_token.pos_ in {"VERB"}: # , "NOUN"
                    issue = {
                        "verb": root_token.lemma_,
                        "description": " ".join([token.text for token in root_token.subtree]),
                    }
                    issues.append(issue)

        extracted_issues.append({"product": product, "issues": issues})

    return extracted_issues

In [12]:
df_en["body"].str.lower().map(lambda x: "experience" in x).sum() / df_en.shape[0] * 100

7.669616519174041

In [14]:
# prompt: Main Project

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
import nltk
from nltk.corpus import stopwords
import string
from transformers import pipeline
import gspread
import spacy
from spacy import displacy

# ... (rest of your existing code) ...

# Example usage for extract_issues function (assuming df_en is defined)
# extracted_issues_list = extract_issues(df_en.head(10).to_dict('records')) # Process only the first 10 records for demonstration
# print(extracted_issues_list)


# Example usage for dependency_parser_ui (assuming df_en is defined)
# for index, row in df_en.head(2).iterrows():
#     dependency_parser_ui(row["body"])

In [16]:
import pandas as pd
from transformers import pipeline

# Initialize the pre-trained model for sentiment analysis
sentiment_pipeline = pipeline('sentiment-analysis')

def analyze_tickets(tickets):
    # Analyze sentiment of each ticket
    tickets['sentiment'] = tickets['message'].apply(lambda x: sentiment_pipeline(x)[0]['label'])
    return tickets

def proactive_prevention(tickets):
    for idx, row in tickets.iterrows():
        if row['sentiment'] == 'NEGATIVE':
            # Take proactive action (e.g., notify support team, offer discount)
            print(f"Proactive action needed for ticket {row['ticket_id']}")
        else:
            print(f"Ticket {row['ticket_id']} is resolved or neutral")

# Example tickets data
tickets = pd.DataFrame({
    'ticket_id': [1, 2, 3],
    'message': [
        'I am having trouble with my order. It was supposed to arrive yesterday but it hasn\'t.',
        'The product I received is defective and I need a replacement.',
        'Can you help me reset my password?'
    ]
})

# Analyze the tickets
analyzed_tickets = analyze_tickets(tickets)
print(analyzed_tickets)

# Call the proactive prevention function
proactive_prevention(analyzed_tickets)


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


   ticket_id                                            message sentiment
0          1  I am having trouble with my order. It was supp...  NEGATIVE
1          2  The product I received is defective and I need...  NEGATIVE
2          3                 Can you help me reset my password?  NEGATIVE
Proactive action needed for ticket 1
Proactive action needed for ticket 2
Proactive action needed for ticket 3
