# Loading Libraries

In [1]:
!pip install spacy
!python -m spacy download en_core_web_sm
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from sklearn.feature_extraction.text import TfidfVectorizer
import spacy
nlp = spacy.load('en_core_web_sm')

Collecting en-core-web-sm==3.6.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m89.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


# Loading the data, setting the path

In [41]:
train_path = "/kaggle/input/ihqid-1mg/IHQID-1mg/train.csv"
test_path = "/kaggle/input/ihqid-1mg/IHQID-1mg/test.csv"

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

train_df = train_df[['question_english', 'Manual_Intent']]
test_df = test_df[['question_english', 'Manual_Intent']]
test_df.columns = ['input', 'target']
train_df.columns = ['input', 'target']

# Preprocess the data using Spacy

In [42]:
label_mapping = {label: idx for idx, label in enumerate(train_df['target'].unique())}
train_df['target'] = train_df['target'].map(label_mapping)
test_df['target'] = test_df['target'].map(label_mapping)

# X_train = train_df['input'].values
y_train = train_df['target'].values

# X_test = test_df['input'].values
y_test = test_df['target'].values

# vectorizer = TfidfVectorizer() 
# X_train = vectorizer.fit_transform(X_train)
# X_test = vectorizer.transform(X_test)

X_train = [doc.vector for doc in nlp.pipe(train_df['input'].values)]
X_test = [doc.vector for doc in nlp.pipe(test_df['input'].values)]

# Training

In [43]:
clf = svm.SVC(kernel='rbf', C=2)
clf.fit(X_train, y_train)

# Testing

In [45]:
# Make predictions on the test data
y_pred = clf.predict(X_test)

# Calculate the macro F1 score
macro_f1 = f1_score(y_test, y_pred, average='macro')

# Calculate the confusion matrix
confusion = confusion_matrix(y_test, y_pred)

# Generate a classification report
class_report = classification_report(y_test, y_pred)

print("Macro F1 Score:", macro_f1)
print("Confusion Matrix:")
print(confusion)
print("Classification Report:")
print(class_report)

Macro F1 Score: 0.5392180925666199
Confusion Matrix:
[[54  0  0  0]
 [ 2  2  6  3]
 [ 1  1 20  2]
 [ 4  0 12  5]]
Classification Report:
              precision    recall  f1-score   support

           0       0.89      1.00      0.94        54
           1       0.67      0.15      0.25        13
           2       0.53      0.83      0.65        24
           3       0.50      0.24      0.32        21

    accuracy                           0.72       112
   macro avg       0.64      0.56      0.54       112
weighted avg       0.71      0.72      0.68       112

