In [11]:
import numpy as np
import pandas as pd
from sklearn import feature_extraction, model_selection, naive_bayes, metrics, svm

import warnings
warnings.filterwarnings("ignore")

data = pd.read_csv('data2.csv')

# init models 
f = feature_extraction.text.CountVectorizer(stop_words='english')
X = f.fit_transform(data["v2"].values.astype('U'))

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, data['v1'], test_size=0.33, random_state=42)


# init arrays 
Cs = np.arange(500, 2000, 100)  
score_train = np.zeros(len(Cs))
score_test = np.zeros(len(Cs))
recall_test = np.zeros(len(Cs))
precision_test= np.zeros(len(Cs))


def train_svm(C, at_index):
    svc = svm.SVC(C=C)
    svc.fit(X_train, y_train)
    score_train[at_index] = svc.score(X_train, y_train)
    score_test[at_index]= svc.score(X_test, y_test)
    recall_test[at_index] = metrics.recall_score(y_test, svc.predict(X_test))
    precision_test[at_index] = metrics.precision_score(y_test, svc.predict(X_test))

# test SVM C 
for idx in range(len(Cs)):
    train_svm(Cs[idx], idx)


matrix = np.matrix(np.c_[Cs, score_train, score_test, recall_test, precision_test])
models = pd.DataFrame(data = matrix, columns =
             ['C', 'Train Accuracy', 'Test Accuracy', 'Test Recall', 'Test Precision'])

# top 10 
models.head(n=10)

        C  Train Accuracy  Test Accuracy  Test Recall  Test Precision
0   500.0        0.998507       0.892424     0.897590        0.889552
1   600.0        0.998507       0.892424     0.897590        0.889552
2   700.0        0.998507       0.892424     0.897590        0.889552
3   800.0        0.998507       0.890909     0.897590        0.886905
4   900.0        0.998507       0.889394     0.897590        0.884273
5  1000.0        0.998507       0.889394     0.897590        0.884273
6  1100.0        0.999254       0.889394     0.897590        0.884273
7  1200.0        0.999254       0.887879     0.897590        0.881657
8  1300.0        0.999254       0.887879     0.897590        0.881657
9  1400.0        0.999254       0.887879     0.894578        0.883929
