In [33]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
import numpy as np
import joblib

In [34]:
df = pd.read_csv("./classification_dataset.csv")

In [35]:
df

Unnamed: 0,User Text,Intent
0,Show me the appointment status.,Show Status
1,Namaste,Greet
2,I want to change the time of my appointment.,Reschedule
3,Please move my reservation to a later time.,Reschedule
4,Could you please show me the status of my appo...,Show Status
...,...,...
165,Good Evening,Greet
166,I'm feeling down.,Not well
167,I'm not in great shape.,Not well
168,My current appointment conflicts with another ...,Reschedule


In [36]:
x = df["User Text"]
y = df["Intent"]

In [37]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [38]:
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(x_train)
X_test_tfidf = vectorizer.transform(x_test)

In [39]:
joblib.dump(vectorizer, "vectorizer")

['vectorizer']

In [40]:
svc_classifier = SVC()

svc_classifier.fit(X_train_tfidf, y_train)

In [41]:
y_pred = svc_classifier.predict(X_test_tfidf)

In [42]:
joblib.dump(svc_classifier, "svc_classifier")

['svc_classifier']

In [43]:
text = vectorizer.transform(["I want to reschedule"])

In [44]:
text

<1x186 sparse matrix of type '<class 'numpy.float64'>'
	with 3 stored elements in Compressed Sparse Row format>

In [45]:
svc_classifier.predict(text)

array(['Reschedule'], dtype=object)

In [46]:
y_pred

array(['Greet', 'Greet', 'Greet', 'Not well', 'Show Status', 'Greet',
       'Not well', 'Greet', 'Reschedule', 'Not well', 'Reschedule',
       'Not well', 'Show Status', 'Reschedule', 'Greet', 'Not well',
       'Schedule', 'Reschedule', 'Reschedule', 'Greet', 'Cancel',
       'Cancel', 'Show Status', 'Cancel', 'Reschedule', 'Schedule',
       'Schedule', 'Not well', 'Schedule', 'Reschedule', 'Reschedule',
       'Cancel', 'Reschedule', 'Not well'], dtype=object)

In [47]:
accuracy = accuracy_score(y_test, y_pred)
classification_report_output = classification_report(y_test, y_pred, target_names=np.unique(y))

In [48]:

print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report_output)

Accuracy: 0.7941176470588235
Classification Report:
               precision    recall  f1-score   support

      Cancel       1.00      0.67      0.80         6
       Greet       0.29      1.00      0.44         2
    Not well       1.00      0.58      0.74        12
  Reschedule       0.78      1.00      0.88         7
    Schedule       1.00      1.00      1.00         4
 Show Status       1.00      1.00      1.00         3

    accuracy                           0.79        34
   macro avg       0.84      0.88      0.81        34
weighted avg       0.91      0.79      0.81        34

