In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder

In [3]:
df = pd.read_csv('helpdesk_customer_tickets.csv')
df.sample(3)

Unnamed: 0,id,subject,body,answer,type,queue,priority,language,business_type,tag_1,tag_2,tag_3,tag_4,tag_5,tag_6,tag_7,tag_8,tag_9
1119,6594379873163,Solicitud de cambio en la configuración de AWS...,"Estimado soporte de servicios de TI, \r\n\r\nE...",Asunto: Re: Solicitud de cambio en la configur...,Change,Technical Support,high,es,IT Services,Technical Support,IT Support,Service Notification,General Inquiry,Problem Resolution,Technical Guidance,,,
103,1382139133742,Modificaciones Requeridas para el Sistema de T...,"Estimado Soporte al Cliente,\r\n\r\nEstoy escr...","Estimado <name>,\r\n\r\nGracias por contactarn...",Change,Customer Service,high,es,IT Consulting Firm,Technical Support,Customer Service,Software Bug,Problem Resolution,Technical Guidance,Customer Feedback,Feature Request,,
934,5265289500180,Problèmes avec le routeur Cisco ISR4331,"Bonjour, j'éprouve des déconnexions fréquentes...","Bonjour, Merci de nous avoir contactés. Pour l...",Problem,Technical Support,high,fr,Tech Online Store,Technical Support,Network Issue,Problem Resolution,IT Support,,,,,


In [4]:
df.dropna(subset=['body', 'subject'], inplace=True)

In [5]:

le = LabelEncoder()
df['type_encoded'] = le.fit_transform(df['type'])
df['queue_encoded'] = le.fit_transform(df['queue'])

df.sample(3)

Unnamed: 0,id,subject,body,answer,type,queue,priority,language,business_type,tag_1,tag_2,tag_3,tag_4,tag_5,tag_6,tag_7,tag_8,tag_9,type_encoded,queue_encoded
913,5146461049383,Problem mit Dell XPS 13 Überhitzung,"Hallo Kundenservice,\r\n\r\nich habe Probleme ...",Betreff: Re: Problem mit Dell XPS 13 Überhitzu...,Incident,Sales and Pre-Sales,medium,de,Tech Online Store,Technical Support,Product Support,Hardware Failure,Performance Tuning,System Maintenance,,,,,1,7
1281,7734998931865,Preocupaciones sobre el rendimiento de la bate...,Estimado equipo de soporte de Tech Online Stor...,"Estimado <name>,\r\n\r\nGracias por comunicars...",Request,Customer Service,medium,es,Tech Online Store,Technical Support,Product Support,Performance Tuning,Software Bug,Problem Resolution,,,,,3,1
901,5093618534568,Urgente: Optimizar la infraestructura de AWS p...,"Estimado Servicio al Cliente, Me pongo en cont...","Estimado <name>,\r\n\r\nGracias por ponerte en...",Request,IT Support,high,es,IT Services,Technical Support,IT Support,Urgent Issue,Problem Resolution,Service Notification,Technical Guidance,Performance Tuning,,,3,4


In [6]:
X = df[['subject', 'body', 'type_encoded', 'queue_encoded']]
y = df['priority']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
text_transformer = Pipeline([
    ('tfidf', TfidfVectorizer())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('subject_tfidf', text_transformer, 'subject'),
        ('body_tfidf', text_transformer, 'body')
    ],
    remainder='passthrough'
)

# Full pipeline with classifier
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

In [8]:
pipeline.fit(X_train, y_train)

In [9]:
y_pred = pipeline.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

        high       0.76      0.92      0.83       157
         low       0.87      0.50      0.63        40
      medium       0.79      0.69      0.74       116

    accuracy                           0.78       313
   macro avg       0.81      0.70      0.73       313
weighted avg       0.79      0.78      0.77       313

