# Import Libraries and Load Data


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Load and Explore Data


In [None]:
ticket_data = pd.read_csv('customer_support_tickets.csv')

# Explore Data
print("Dataset Overview:")
print(ticket_data.head())
print("\nCategories:")
print(ticket_data['Category'].value_counts())

# Data Preprocessing


In [None]:
X = ticket_data['Ticket']
y = ticket_data['Category']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Vectorize the text data
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Model Training


In [None]:
nb_model = MultinomialNB()
nb_model.fit(X_train_vectorized, y_train)

# Model Evaluation


In [None]:
y_pred = nb_model.predict(X_test_vectorized)

# Print Accuracy and Classification Report
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy: {accuracy:.2f}")

# Classification Report
class_report = classification_report(y_test, y_pred)
print("\nClassification Report:")
print(class_report)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

# Visualizations


In [None]:
categories = ticket_data['Category'].unique()
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=categories, yticklabels=categories)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

# Visualize Important Features
feature_names = vectorizer.get_feature_names()
class_labels = nb_model.classes_

# Visualize Top Features for Each Category
for i, class_label in enumerate(class_labels):
    features_prob = nb_model.feature_log_prob_[i]
    top_features_indices = features_prob.argsort(
    )[-10:][::-1]  # Top 10 features
    top_features = [feature_names[idx] for idx in top_features_indices]

    plt.figure(figsize=(8, 6))
    plt.barh(top_features, features_prob[top_features_indices])
    plt.title(f'Top Features for {class_label}')
    plt.xlabel('Log Probability')
    plt.show()