IMPORTING LIBRARIES

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score
import nltk
from nltk.corpus import stopwords
import string

Load and Preprocess the Data

In [3]:
# Example dataset
data = {
    'text': [
        "My internet is not working.",
        "I am unable to login to my account.",
        "The website is down.",
        "How do I reset my password?",
        "I need help with billing.",
        "I'm having trouble with the app."
    ],
    'category': [
        "Technical Support",
        "Account Issues",
        "Technical Support",
        "Account Issues",
        "Billing",
        "Technical Support"
    ]
}

df = pd.DataFrame(data)

# Preprocessing
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    text = text.lower()  # Lowercase
    text = ''.join([char for char in text if char not in string.punctuation])  # Remove punctuation
    text = ' '.join([word for word in text.split() if word not in stop_words])  # Remove stopwords
    return text

df['text'] = df['text'].apply(preprocess_text)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Split the Data into Training and Test Sets

In [4]:
X = df['text']
y = df['category']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Build and Train the Model

In [5]:
# Using a pipeline for simplicity
model = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('classifier', MultinomialNB())
])

# Train the model
model.fit(X_train, y_train)


Evaluate the Model

In [6]:
# Predictions
y_pred = model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.5

Classification Report:
                    precision    recall  f1-score   support

   Account Issues       0.00      0.00      0.00         1
Technical Support       0.50      1.00      0.67         1

         accuracy                           0.50         2
        macro avg       0.25      0.50      0.33         2
     weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Use the Model for Predictions

In [8]:
new_tickets = [
    "I can't access my account.",
    "The app crashes when I try to open it.",
    "How do I update my billing information?",
    "I need help with billing."
]

new_tickets_preprocessed = [preprocess_text(ticket) for ticket in new_tickets]
predicted_categories = model.predict(new_tickets_preprocessed)

for ticket, category in zip(new_tickets, predicted_categories):
    print(f"Ticket: {ticket} -> Predicted Category: {category}")


Ticket: I can't access my account. -> Predicted Category: Technical Support
Ticket: The app crashes when I try to open it. -> Predicted Category: Technical Support
Ticket: How do I update my billing information? -> Predicted Category: Technical Support
Ticket: I need help with billing. -> Predicted Category: Billing
