# Step 1: Import Libraries

In [4]:
import sys
import os

# Add src folder to Python path
sys.path.append(os.path.abspath("../src"))

import pandas as pd
from utils.data_utils import load_data, save_sample


# Step 2: Load Dataset

In [5]:
data_path = "../data/raw/all_tickets_processed_improved_v3.csv"
df = load_data(data_path)


# Step 3: Explore Dataset

In [6]:
print("Shape:", df.shape)
print("Columns:", df.columns)
print(df.head())
print("\nCategory counts:")
print(df['Topic_group'].value_counts())


Shape: (47837, 2)
Columns: Index(['Document', 'Topic_group'], dtype='object')
                                            Document    Topic_group
0  connection with icon icon dear please setup ic...       Hardware
1  work experience user work experience user hi w...         Access
2  requesting for meeting requesting meeting hi p...       Hardware
3  reset passwords for external accounts re expir...         Access

Category counts:
Topic_group
Hardware                 13617
HR Support               10915
Access                    7125
Miscellaneous             7060
Storage                   2777
Purchase                  2464
Internal Project          2119
Administrative rights     1760
Name: count, dtype: int64


# Step 4: Save Sample

In [7]:
save_sample(df, "../data/processed/sample.csv", n=500)
print("Sample saved to data/processed/sample.csv")


Sample saved to data/processed/sample.csv


# 1. Import our model functions

In [5]:
import sys
sys.path.append("..")  # Adds project root to Python path

from src.models.tfidf_svm import load_dataset, train_tfidf_svm, save_model

# 2. Load dataset

In [9]:
csv_path = "/home/esra/CallCenterAI/data/raw/all_tickets_processed_improved_v3.csv"
df = load_dataset(csv_path)

# 3. Train TF-IDF + SVM

In [10]:
tfidf, svm = train_tfidf_svm(df)

Classification report:
                        precision    recall  f1-score   support

               Access       0.89      0.89      0.89      1425
Administrative rights       0.85      0.70      0.77       352
           HR Support       0.86      0.86      0.86      2183
             Hardware       0.82      0.86      0.84      2724
     Internal Project       0.88      0.85      0.86       424
        Miscellaneous       0.83      0.82      0.83      1412
             Purchase       0.96      0.89      0.92       493
              Storage       0.92      0.88      0.90       555

             accuracy                           0.86      9568
            macro avg       0.88      0.85      0.86      9568
         weighted avg       0.86      0.86      0.86      9568



# 4. Save the trained models

In [8]:
save_model(tfidf, svm, output_dir="../models")

Models saved in ../models/
