### Import libraries

In [1]:
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import csr_matrix
from sklearn.metrics import classification_report, accuracy_score

### Function to make document list and seperate there labels

In [2]:
def initial_feature_vectors(filename):
    sentence = []
    label = []
    with open(filename, 'r') as file:
        for line in file:
            temp = line.split("\t")
            sentence.append(temp[0])
            label.append(temp[1][0])
    return sentence , label

### Perform PCA function

In [3]:
from sklearn.decomposition import PCA

def principal_component(features,k):
    pca = PCA(n_components=k)
    pca.fit(features )
    Y = (pca.transform(features))
    return Y, pca

### Read File

In [4]:
text_path = Path('movieReviews1000.txt')
reviews , labels = initial_feature_vectors(text_path.as_posix())

### Set Train and Test Data

In [5]:
features_train = reviews[0:750]
features_test = reviews[750:]
labels_train = labels[0:750]
labels_test = labels[750:]

### Find TF-IDF

In [6]:
vectorizer = TfidfVectorizer()
#Fitting on train data
vectorizer.fit(features_train) 
#Forming train and test features
Y = vectorizer.transform(features_train)
Y_test = vectorizer.transform(features_test)

### Converting sparse matrix to dense matrix

In [7]:
from scipy.sparse import csr_matrix
Y = csr_matrix(Y)
Y = Y.todense()
Y_test = csr_matrix(Y_test)
Y_test = Y_test.todense()

### Learning and Prediction using SVM model

In [8]:
Y, pca = principal_component(Y,10)
Y_test = (pca.transform(Y_test))

kernel = ["rbf"]
print(f'------------------------------------------------------------')
for k in kernel:
    c=0.1
    while(c<10001):
        eps = 0.0001
        while(eps<2):
            model = SVC(kernel=k,C=c,tol=eps)
            model.fit(Y,labels_train)
            print(model)
            predictions = model.predict(Y_test)
            print(f'Accuracy = {accuracy_score(labels_test,predictions)}')
            print(f'Number of support vectors for class 0 and class 1 = {model.n_support_}')
            print(f'Total Number of support vectors = {model.support_vectors_.shape[0]}')
            print(f'------------------------------------------------------------')
            eps*=10
        c*=10

------------------------------------------------------------
SVC(C=0.1, tol=0.0001)
Accuracy = 0.532
Number of support vectors for class 0 and class 1 = [356 347]
Total Number of support vectors = 703
------------------------------------------------------------
SVC(C=0.1)
Accuracy = 0.532
Number of support vectors for class 0 and class 1 = [356 347]
Total Number of support vectors = 703
------------------------------------------------------------
SVC(C=0.1, tol=0.01)
Accuracy = 0.532
Number of support vectors for class 0 and class 1 = [357 347]
Total Number of support vectors = 704
------------------------------------------------------------
SVC(C=0.1, tol=0.1)
Accuracy = 0.528
Number of support vectors for class 0 and class 1 = [347 346]
Total Number of support vectors = 693
------------------------------------------------------------
SVC(C=0.1, tol=1.0)
Accuracy = 0.596
Number of support vectors for class 0 and class 1 = [336 336]
Total Number of support vectors = 672
---------------

In [9]:
kernel = ["poly"]
print(f'------------------------------------------------------------')
for k in kernel:
    c=0.1
    while(c<10001):
        eps = 0.0001
        while(eps<2):
            model = SVC(kernel=k,C=c,tol=eps)
            model.fit(Y,labels_train)
            print(model)
            predictions = model.predict(Y_test)
            print(f'Accuracy = {accuracy_score(labels_test,predictions)}')
            print(f'Number of support vectors for class 0 and class 1 = {model.n_support_}')
            print(f'Total Number of support vectors = {model.support_vectors_.shape[0]}')
            print(f'------------------------------------------------------------')
            eps*=10
        c*=10

------------------------------------------------------------
SVC(C=0.1, kernel='poly', tol=0.0001)
Accuracy = 0.412
Number of support vectors for class 0 and class 1 = [342 333]
Total Number of support vectors = 675
------------------------------------------------------------
SVC(C=0.1, kernel='poly')
Accuracy = 0.412
Number of support vectors for class 0 and class 1 = [342 333]
Total Number of support vectors = 675
------------------------------------------------------------
SVC(C=0.1, kernel='poly', tol=0.01)
Accuracy = 0.412
Number of support vectors for class 0 and class 1 = [342 333]
Total Number of support vectors = 675
------------------------------------------------------------
SVC(C=0.1, kernel='poly', tol=0.1)
Accuracy = 0.412
Number of support vectors for class 0 and class 1 = [342 332]
Total Number of support vectors = 674
------------------------------------------------------------
SVC(C=0.1, kernel='poly', tol=1.0)
Accuracy = 0.42
Number of support vectors for class 0 and

In [10]:
kernel = ["linear"]
print(f'------------------------------------------------------------')
for k in kernel:
    c=0.1
    while(c<10001):
        eps = 0.0001
        while(eps<2):
            model = SVC(kernel=k,C=c,tol=eps)
            model.fit(Y,labels_train)
            print(model)
            predictions = model.predict(Y_test)
            print(f'Accuracy = {accuracy_score(labels_test,predictions)}')
            print(f'Number of support vectors for class 0 and class 1 = {model.n_support_}')
            print(f'Total Number of support vectors = {model.support_vectors_.shape[0]}')
            print(f'------------------------------------------------------------')
            eps*=10
        c*=10

------------------------------------------------------------
SVC(C=0.1, kernel='linear', tol=0.0001)
Accuracy = 0.388
Number of support vectors for class 0 and class 1 = [350 347]
Total Number of support vectors = 697
------------------------------------------------------------
SVC(C=0.1, kernel='linear')
Accuracy = 0.388
Number of support vectors for class 0 and class 1 = [351 347]
Total Number of support vectors = 698
------------------------------------------------------------
SVC(C=0.1, kernel='linear', tol=0.01)
Accuracy = 0.388
Number of support vectors for class 0 and class 1 = [350 347]
Total Number of support vectors = 697
------------------------------------------------------------
SVC(C=0.1, kernel='linear', tol=0.1)
Accuracy = 0.388
Number of support vectors for class 0 and class 1 = [347 347]
Total Number of support vectors = 694
------------------------------------------------------------
SVC(C=0.1, kernel='linear', tol=1.0)
Accuracy = 0.388
Number of support vectors for 