In [1]:
import pandas as pd
import re
import nltk
import spacy
from textblob import TextBlob
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize


In [2]:
nltk.download('punkt')
nltk.download('stopwords')

nlp = spacy.load("en_core_web_sm")
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [4]:
df = pd.read_csv("/content/support_tickets.csv")
print(df.head())

   ticket_id                                           raw_text
0        101  Hii, I cant login to my acount since yestarday...
1        102  Payment of 499rs deducted but subscrption not ...
2        103  App crashes when I open profile page in Samsun...
3        104  My order #ORD1234 not deliverd yet, its been 1...
4        105  I was charrged twice for the same service on 5...


In [5]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text


In [6]:
def correct_spelling(text):
    return str(TextBlob(text).correct())


In [7]:
def preprocess_text(text):
    doc = nlp(text)
    tokens = []
    for token in doc:
        if token.text not in stop_words and token.is_alpha:
            tokens.append(token.lemma_)
    return " ".join(tokens)


In [8]:
def extract_entities(text):
    doc = nlp(text)
    return [(ent.text, ent.label_) for ent in doc.ents]


In [9]:
def label_ticket(text):
    text = text.lower()
    if "login" in text or "password" in text or "account" in text:
        return "Login Issue"
    elif "payment" in text or "charged" in text or "refund" in text:
        return "Billing Issue"
    elif "crash" in text or "error" in text or "freezing" in text:
        return "App Issue"
    elif "order" in text or "deliver" in text or "damaged" in text:
        return "Delivery Issue"
    else:
        return "General Query"


In [10]:
df['cleaned_text'] = df['raw_text'].apply(clean_text)
df['spell_corrected'] = df['cleaned_text'].apply(correct_spelling)
df['processed_text'] = df['spell_corrected'].apply(preprocess_text)
df['named_entities'] = df['spell_corrected'].apply(extract_entities)
df['category'] = df['processed_text'].apply(label_ticket)


In [11]:
print(df[['ticket_id', 'raw_text', 'processed_text', 'named_entities', 'category']])


    ticket_id                                           raw_text  \
0         101  Hii, I cant login to my acount since yestarday...   
1         102  Payment of 499rs deducted but subscrption not ...   
2         103  App crashes when I open profile page in Samsun...   
3         104  My order #ORD1234 not deliverd yet, its been 1...   
4         105  I was charrged twice for the same service on 5...   
5         106  Password reset link not working, I tried 4 times.   
6         107     Refund still not processed after cancellation.   
7         108       The app is very slow and freezing sometimes.   
8         109  Unable to upload profile picture, shows error ...   
9         110           Why my premium plan suddenly downgraded?   
10        111          Got error code E502 while making payment.   
11        112      Chat support not responding for last 2 hours.   
12        113         Email verification link expired instantly.   
13        114          My account got locked wit

In [12]:
df.to_csv("processed_support_tickets.csv", index=False)
print("Processed dataset saved!")


Processed dataset saved!
