In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from skopt import BayesSearchCV
from skopt.space import Categorical,Real,Integer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score,f1_score,recall_score
import pickle


In [2]:
import numpy as np
data_distil_bert =  np.load("features_distilbert.npz")
final_data_distil_bert = []
for each in data_distil_bert:
    for each_one in data_distil_bert[each]:
        final_data_distil_bert.append(each_one)

features = np.array(final_data_distil_bert)

In [3]:
df = pd.read_csv("labels_generated.csv")
labels = df["intent"]

In [4]:
train_features, test_features, train_labels, test_labels = train_test_split(features, labels,test_size=0.3,random_state=0,stratify=labels)
    

In [5]:
def train(train_features,train_labels):
    svm_classifier = SVC()
    params = {
    'C' : Real(0.1,10),
    'gamma' : Real(0.001,5),
    'max_iter' : Integer(100,500)
    }
    bayes_search = BayesSearchCV(svm_classifier,params, cv=5)
    bayes_search.fit(train_features,train_labels)
    return bayes_search

In [6]:
pickle.dump(train(train_features, train_labels),open('distil_bert/svm.sav','wb'))



In [7]:
model = pickle.load(open('distil_bert/svm.sav', 'rb'))
out=model.predict(test_features)
print("Accuracy : ",accuracy_score(out,test_labels)*100, flush=True)
print("Precision : ",precision_score(out,test_labels,average = 'weighted')*100, flush=True)
print("f1_score : ",f1_score(out,test_labels,average = "weighted")*100, flush=True)
print("recall score : ",recall_score(out,test_labels,average="weighted")*100, flush=True)
print("---------------------------------------------------------------------------", flush=True)

Accuracy :  92.23744292237443
Precision :  95.64329841525651
f1_score :  93.456457925636
recall score :  92.23744292237443
---------------------------------------------------------------------------


In [8]:
data_distil_bert =  np.load("features_roberta.npz")
final_data_distil_bert = []
for each in data_distil_bert:
    for each_one in data_distil_bert[each]:
        final_data_distil_bert.append(each_one)

features_roberta = np.array(final_data_distil_bert)

In [9]:
train_features, test_features, train_labels, test_labels = train_test_split(features_roberta, labels,test_size=0.3,random_state=0,stratify=labels)
    

In [10]:
pickle.dump(train(train_features, train_labels),open('roberta/svm.sav','wb'))




In [11]:
model = pickle.load(open('roberta/svm.sav', 'rb'))
out=model.predict(test_features)
print("Accuracy : ",accuracy_score(out,test_labels)*100, flush=True)
print("Precision : ",precision_score(out,test_labels,average = 'weighted')*100, flush=True)
print("f1_score : ",f1_score(out,test_labels,average = "weighted")*100, flush=True)
print("recall score : ",recall_score(out,test_labels,average="weighted")*100, flush=True)
print("---------------------------------------------------------------------------", flush=True)

Accuracy :  79.45205479452055
Precision :  82.13350941594292
f1_score :  80.12165699843969
recall score :  79.45205479452055
---------------------------------------------------------------------------
