In [2]:
#importing libraries
from sklearn import svm, datasets
import sklearn.model_selection as model_selection
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score,precision_score,recall_score
import pandas as pd

In [4]:
#read dataset
data=pd.read_csv("Manglish.csv")
data=data.astype(str)
len(data)


19122

In [5]:
#splitting the dataset to training and testing set
X = data["text"]
y = data["target"]
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, train_size=0.80, test_size=0.20, random_state=101)

In [4]:
print(X)

0        athanu athu matramanu..., udane mattam varunnund.
1                         sathyiamparyumbovayamoodiketunnu
2        Athanu ivdethee oruu rithiii so poo mone dines...
3        Appo kaannunavane appa ennu villikan pc ye kay...
4        Pattikka jathiyum hinduvum Vere vere anooo Eya...
                               ...                        
19117    Mammokka Ennum oru lahariyaanu.Annum Ennum ath...
19118     Acting ഒരു രക്ഷയും illa.... baki illathoke pinne
19119    Ivide Palakkad Jayettan Fans club nnu ashamsak...
19120    padam polikkum HBD mammokka       By Die hard ...
19121       Onnum parayan illa laletta u trigger again ...
Name: text, Length: 19122, dtype: object


In [6]:
#using Tfidf vectorizer to transform the text into a usable vector.
from sklearn.feature_extraction.text import TfidfVectorizer
Tfidf_Vect=TfidfVectorizer()
Tfidf_Vect.fit(X)
Train_X=Tfidf_Vect.transform(X_train)
Test_X=Tfidf_Vect.transform(X_test)


In [6]:
Train_X.shape

(15297, 46130)

In [8]:
Test_X.shape

(3825, 46130)

In [9]:
#fitting the model
rbf = svm.SVC(kernel='rbf', gamma=1,C=1.6).fit(Train_X, y_train)

In [10]:
#prediction for test set
bf_pred = rbf.predict(Test_X)

In [14]:
#calculating evaluation metrics
rbf_accuracy = accuracy_score(y_test, rbf_pred)
rbf_f1 = f1_score(y_test, rbf_pred, average='weighted')
rbf_precision=precision_score(y_test, rbf_pred, average='weighted')
rbf_recall=recall_score(y_test, rbf_pred, average='weighted')
print('Accuracy (RBF Kernel): ', "%.2f" % (rbf_accuracy*100))
print('F1 (RBF Kernel): ', "%.2f" % (rbf_f1*100))
print('Precision (RBF Kernel): ', "%.2f" % (rbf_precision*100))
print('Recall (RBF Kernel): ', "%.2f" % (rbf_recall*100))


Accuracy (RBF Kernel):  70.46
F1 (RBF Kernel):  70.80
Precision (RBF Kernel):  72.14
Recall (RBF Kernel):  70.46


  _warn_prf(average, modifier, msg_start, len(result))


In [8]:
#save model using pickle
import pickle
filename = 'svm_model.sav'
pickle.dump(rbf, open(filename, 'wb'))

In [10]:
#fitting model using different c and gamma values to find best value for c and gamma
import numpy as np
acc=[]
gamma1=[]
c1=[]
for i in np.arange(0.1,1,0.1):
    for j in np.arange(0.1,2.5,0.1):
        rbf = svm.SVC(kernel='rbf', gamma=i, C=j).fit(Train_X, y_train)
        rbf_pred1 = rbf.predict(Test_X)
        rbf_accuracy1 = accuracy_score(y_test, rbf_pred1)
        gamma1.append(i)
        c1.append(j)
        acc.append(rbf_accuracy1)
print(gamma1)
print(c1)
print(acc)

[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.6, 0.6, 0.6, 

In [10]:
#model predicting when user gives an input
import pandas as pd
import pickle
model = pickle.load(open("svm_model.sav", 'rb')) #load the model
text=input("Enter your Comment:")
lis=[]
lis.append(text)
dfr=pd.DataFrame(lis)
print(dfr)
dfr=dfr.iloc[:,0].str.lower()
test=Tfidf_Vect.transform(dfr)
pred= model.predict(test) #predicting the result
print(pred[0])

Enter your Comment:ith polikkum
Positive
