In [1]:
# Import necessary libraries
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Load the dataset from CSV
data = pd.read_csv('Tickets Dataset.csv')
data.head()

Unnamed: 0,S.No,Ticket Title,Description,Application Name
0,1,Software Bug Fix,Investigate and fix software bugs,Software
1,2,Email Migration,Migrate email accounts to a new platform,Email
2,3,Hardware Upgrade,Upgrade hardware components,Hardware
3,4,Security Incident,Respond to and investigate security incidents,Security
4,5,Financial Planning,Assist with financial planning and analysis,Accounting


In [3]:
# Split the dataset into training and testing sets
X = data['Ticket Title'] + ' ' + data['Description']  # Combine 'Title' and 'Description' for text input
y = data['Application Name']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Feature extraction using TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer(max_features=5000)  # You can adjust the number of features
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [5]:
# Train a Multinomial Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train_tfidf, y_train)

# Make predictions on the test set
y_pred = classifier.predict(X_test_tfidf)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.9
Classification Report:
               precision    recall  f1-score   support

  Accounting       1.00      1.00      1.00         2
         CRM       0.00      0.00      0.00         2
        Data       1.00      1.00      1.00         2
          HR       1.00      1.00      1.00         1
    Hardware       1.00      1.00      1.00         3
     Network       0.71      1.00      0.83         5
      Server       1.00      1.00      1.00         4
    Software       1.00      1.00      1.00         1

    accuracy                           0.90        20
   macro avg       0.84      0.88      0.85        20
weighted avg       0.83      0.90      0.86        20



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [6]:
import joblib

# Replace 'your_model' with your actual model
your_model = classifier

# Save the model to a .pkl file
joblib.dump(classifier, 'your_model.pkl')


['your_model.pkl']

In [7]:
joblib.dump(tfidf_vectorizer,'vectorizer.pkl')

['vectorizer.pkl']

In [8]:
'email', 'software'

('email', 'software')

In [10]:
# Transform the new statement using TF-IDF vectorization
new_statement_tfidf = tfidf_vectorizer.transform(['I need assistance with accounting reports and queries'])

# Predict the class label for the new statement
predicted_class = classifier.predict(new_statement_tfidf)

# Print the predicted class label
print("Predicted Class Label:", predicted_class[0])

Predicted Class Label: Accounting
