In [1]:
from  urllib  import request
import logging
from pathlib import Path
import numpy as np
import pandas as pd
import re
import MeCab
from gensim import corpora, models,matutils
import random
from tqdm import tqdm_notebook as tqdm
from sklearn import model_selection
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
#from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

%load_ext jupyternotify

<IPython.core.display.Javascript object>

In [2]:
dic_dir = "/usr/local/lib/mecab/dic/mecab-ipadic-neologd/" #mac
dic_dir = "/usr/lib/mecab/dic/mecab-ipadic-neologd"
mecab = MeCab.Tagger("-Ochasen -d {}".format(dic_dir))

In [3]:
res = request.urlopen("http://svn.sourceforge.jp/svnroot/slothlib/CSharp/Version1/SlothLib/NLP/Filter/StopWord/word/Japanese.txt")
stopwords = [line.decode("utf-8").strip() for line in res]
print(stopwords[:3])

['あそこ', 'あたり', 'あちら']


In [4]:
res = request.urlopen("http://svn.sourceforge.jp/svnroot/slothlib/CSharp/Version1/SlothLib/NLP/Filter/StopWord/word/English.txt")
stopwords += [line.decode("utf-8").strip() for line in res]
print(stopwords[-3:])

["you've", 'z', 'zero']


In [5]:
class Tokenizer:
    def __init__(self, stopwords, parser=None, include_pos=None, exclude_posdetail=None, exclude_reg=None):
    
        self.stopwords = stopwords
        self.include_pos = include_pos if include_pos else  ["名詞", "動詞", "形容詞"]
        self.exclude_posdetail = exclude_posdetail if exclude_posdetail else ["接尾", "数"]
        self.exclude_reg = exclude_reg if exclude_reg else r"$^"  # no matching reg
        if parser:
            self.parser = parser
        else:
            mecab = MeCab.Tagger("-Ochasen -d /usr/local/lib/mecab/dic/mecab-ipadic-neologd/")
            self.parser = mecab.parse
            

    def tokenize(self, text, show_pos=False):
        text = re.sub(r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+", "", text)    #URL
        text = re.sub(r"\"?([-a-zA-Z0-9.`?{}]+\.jp)\"?" ,"", text)  # xxx.jp 
        text = text.lower()
        l = [line.split("\t") for line in self.parser(text).split("\n")]
        res = [
            i[2] if not show_pos else (i[2],i[3]) for i in l 
                if len(i) >=4 # has POS.
                    and i[3].split("-")[0] in self.include_pos
                    and i[3].split("-")[1] not in self.exclude_posdetail
                    and not re.search(r"(-|−)\d", i[2])
                    and not re.search(self.exclude_reg, i[2])
                    and i[2] not in self.stopwords          
            ]
        return res

In [6]:
t = Tokenizer(stopwords, mecab.parse)

In [7]:
t.tokenize("認めたくないものだな。自分自身の若さ故の過ちというものを。")

['認める', '自分自身', '若さ故の過ち']

In [8]:
def load_data_and_labels(positive_data_file, negative_data_file, level="char", lang="En"):
       
    positive_examples = list(open(positive_data_file, "r").readlines())
    negative_examples = list(open(negative_data_file, "r").readlines())
    if level == "char":
        positive_examples = [s.replace(" ", "").replace("", " ").lower() for s in positive_examples]
        negative_examples = [s.replace(" ", "").replace("", " ").lower() for s in negative_examples]
    elif level == "word":
        if lang == "Ja":
            t = Tokenizer()
            positive_examples = [t.tokenize(s) for s in positive_examples]
            negative_examples = [t.tokenize(s) for s in negative_examples]
        else:
            positive_examples = [s.strip() for s in positive_examples]
            negative_examples = [s.strip() for s in negative_examples]
    else:
        print("invaid value of 'level'. ('char' or 'word') ")
        
    n_pos = len(positive_examples)
    n_neg = len(negative_examples)
    ratio = n_pos/n_neg
    print("# pos: ", n_pos)
    print("# neg: ", n_neg)
    print("pos/neg:", ratio)
    x_text = positive_examples + negative_examples

    positive_labels = [[0, 1] for _ in positive_examples]
    negative_labels = [[1, 0] for _ in negative_examples]
    y = np.concatenate([positive_labels, negative_labels], 0)
    
    return x_text, y, ratio

In [9]:
def load_data_and_labels_multiclass(files, level="char", lang="En"):
    labels = []
    x_text = []
    n_classes = len(files)
    
    for i, f in enumerate(files):
        positive_examples = list(open(f, "r").readlines())
        if level == "char":
            positive_examples = [s.replace(" ", "").replace("", " ").lower() for s in positive_examples]
        elif level == "word":
            if lang == "Ja":
                t = Tokenizer()
                positive_examples = [t.tokenize(s) for s in positive_examples]
            else:
                positive_examples = [s.strip() for s in positive_examples]
        else:
            print("invaid value of 'level'. ('char' or 'word') ")
        print(len(positive_examples))
        x_text += positive_examples
        positive_labels = [np.identity(n_classes)[i] for _ in positive_examples]
        labels.append(positive_labels)
    
    y = np.concatenate(labels, 0)
    
    return x_text, y

# Character level + random forest

In [None]:
positive_data_file = "data/amazon_ja/pos.txt"
negative_data_file = "data/amazon_ja/neg.txt"

In [None]:
files = ["data/amazon_ja/r_{}.txt".format(i) for i in range(1,6)]

In [None]:
%%notify
x_text, y, ratio = load_data_and_labels(positive_data_file, negative_data_file, level="char", lang="Ja")
#level="char"
#x_text, y = load_data_and_labels_multiclass(files, level=level, lang="Ja")

In [None]:
x_text[0]

In [None]:
x_text_sp = [doc[:-2].split() for doc in x_text]

In [None]:
%%notify
d = corpora.Dictionary(x_text_sp)

In [None]:
d[100]

In [None]:
# bag of char
boc = [d.doc2bow(doc) for doc  in tqdm(x_text_sp)]

In [None]:
df = pd.DataFrame([len(b) for b in boc],columns=["length"])

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
len(d)

In [None]:
%%notify
dense = list(matutils.corpus2dense(boc,  num_terms=len(d)))

In [None]:
dense = np.array(dense)

In [None]:
dense.shape

In [None]:
%%notify
data_train_s, data_test_s, label_train_s, label_test_s = model_selection.train_test_split(dense.T, y, test_size=0.05)

In [None]:
estimator = RandomForestClassifier(verbose=10)

In [None]:
%%notify
estimator.fit(data_train_s, label_train_s)

In [None]:
estimator.score(data_test_s, label_test_s)

In [None]:
tuned_parameters = [
    {
        "n_estimators": [50, 70, 90, 110, 130, 150]
    }
]

clf = GridSearchCV(RandomForestClassifier(), tuned_parameters, cv=2, scoring='accuracy', n_jobs=1,verbose=10)

In [None]:
%%notify
clf.fit(data_train_s, label_train_s)

In [None]:
y_true, y_pred = label_test_s, clf.predict(data_test_s)
print(accuracy_score(y_true, y_pred, target_names=["nag","pos"]))

# Character level + LogisticRegression

In [None]:
y_ = [v.argmax() for v in y]

In [None]:
y_[:10]

In [None]:
data_train_s, data_test_s, label_train_s, label_test_s = model_selection.train_test_split(dense.T, y_, test_size=0.05)

In [None]:
estimator = LogisticRegression(verbose=10)

In [None]:
estimator.fit(data_train_s, label_train_s)

In [None]:
estimator.score(data_test_s, label_test_s)

In [None]:
diparameter={"C": [10**i for i in range(-2,4)]}
licv=GridSearchCV(LogisticRegression(),param_grid=diparameter, cv=2, scoring='accuracy', n_jobs=1,verbose=10)
licv.fit(data_train_s, label_train_s)
predictor=licv.best_estimator_

In [None]:
y_pred[:10].tolist()

In [None]:
y_true[:10]

In [None]:
y_true, y_pred = label_test_s, licv.predict(data_test_s)
#print(classification_report(y_true, y_pred, target_names=["nag","pos"], digits=4))

# BOW 

In [10]:
pos_doc = []
neg_doc = []

In [11]:
with open("data/amazon_ja/pos.txt") as f:
    pos_doc = [t.tokenize(doc) for doc in tqdm(f.readlines())]
print(pos_doc[:2])

HBox(children=(IntProgress(value=0, max=62402), HTML(value='')))


[['書き込む', '読み出し', '転送速度', 'いずれ', '満足', '画素', 'コンパクトカメラ', 'タイプ', 'デジカメ', '入れる', '撮影', '使う', '撮影後', 'カード', 'リーダ', '接続', 'する', '撮影', 'する', '膨大', '量', '画像', 'データ', 'サムネイル', '表示', 'する', 'ピックアップ', 'する', '画像', 'コピペ', 'する', 'する', 'いる', 'ストレス', '感じる', 'ない', '快適', '使える', 'いる', '限定', '個体', 'SDカード', '本体', 'シンプル', '小さい', 'ボール紙', '挟む', 'いる', '梱包', 'シンプル', '売価', '安い', '性能', '満足', '出来る', 'いる', '買う', '良い', '思う', 'いる', '耐久性', 'わかる', '経過', '観察'], ['D6', '使う', '初心者', '1つ', '問題', '使える']]


In [12]:
with open("data/amazon_ja/neg.txt") as f:
    neg_doc = [t.tokenize(doc) for doc in tqdm(f.readlines())]
print(neg_doc[:5])

HBox(children=(IntProgress(value=0, max=9060), HTML(value='')))


[['購入', 'ニコン', '使用', 'する', 'いる', 'エラー', '出る', 'いる', '子供', '運動会', 'エラー', '出る', 'ニコン', 'メーカー', 'SD', '今後', '使う', 'する', 'Panasonic', 'LUMIX', 'H3', '問題', '使える', 'PC', '認識', 'する', '返品', 'する', 'いる', '書く', 'いる', 'ニコン', '相性', '悪い'], ['トランセンド', '32GB', 'ニコン', 'デジタル一眼レフ', '問題', '使用', 'する', 'いる', '容量', '足りる', '感じる', 'メーカー', '64GB', '購入', '使用', 'する', '方々', 'エラー', '出し手', '抜き差し', '繰り返す', '直る', 'レビュー', '見る', '買う'], ['nikond', '使用', 'する', 'メモリーカード', '壊れる', 'いる', '可能性', 'ある', '使用', 'できる', '子供', '試合当日', '表示', 'する', '妻', '怒る', 'ｐｃ', '使う', '問題', 'の', '良い', 'エラー', 'カメラ', '受け付ける', '残念'], ['Amazon', '問い合わせ', '表示', 'する', '商品', '画像', 'ある', '復旧', 'ソフト', 'つなぎ', '一切', '説明', 'ない', '見る', '注文', 'する', '不満', '買う', '商品', 'バックアップ', '取る', 'パソコン', '繋ぐ', '画像', '消える', 'しまう', 'いる', '復旧', 'ソフト', '入手', 'する', '良い', 'の', '教える', '欲しい'], ['taking', 'PLUS', 'shot', 'ニコン', 'memory', 'card', 'USED', 'card', 'MAY', 'damaged', 'insert', 'Another', 'card', 'error', 'display', 'The Touch', 'panel', 'good', 'thing', 'IS', 'Found', 'It',

In [13]:
d = corpora.Dictionary(pos_doc+neg_doc)

In [14]:
pos_bow = [d.doc2bow(doc) for doc  in tqdm(pos_doc)]

HBox(children=(IntProgress(value=0, max=62402), HTML(value='')))




In [15]:
neg_bow = [d.doc2bow(doc) for doc  in tqdm(neg_doc)]

HBox(children=(IntProgress(value=0, max=9060), HTML(value='')))




In [16]:
df = pd.DataFrame([len(b) for b in pos_bow+neg_bow],columns=["length"])

In [17]:
df.head()

Unnamed: 0,length
0,51
1,6
2,17
3,27
4,29


In [18]:
df.describe()

Unnamed: 0,length
count,71462.0
mean,21.826075
std,21.068648
min,0.0
25%,10.0
50%,16.0
75%,26.0
max,803.0


In [19]:
len(d)

56482

In [20]:
pos_label = [1 for b in pos_doc]
neg_label = [0 for b in neg_doc]
print(len(pos_label))
print(len(neg_label))

62402
9060


In [21]:
label = pos_label + neg_label

In [22]:
len(label)

71462

# BOW + random forest

In [None]:
dense = list(matutils.corpus2dense(pos_bow+neg_bow,  num_terms=len(d)))

In [None]:
del pos_bow
del neg_bow
del pos_label
del neg_label

In [None]:
dense = np.array(dense)

In [None]:
dense.shape

In [None]:
data_train_s, data_test_s, label_train_s, label_test_s = model_selection.train_test_split(dense.T, label, test_size=0.05)

In [None]:
estimator = RandomForestClassifier(verbose=10)

In [None]:
estimator.fit(data_train_s, label_train_s)

In [None]:
estimator.score(data_test_s, label_test_s)

In [None]:
tuned_parameters = [{'n_estimators': [50, 70, 90, 110, 130, 150]}]#, 'max_features': ['auto', 'sqrt', 'log2', None]}]

clf = GridSearchCV(RandomForestClassifier(), tuned_parameters, cv=2, scoring='accuracy', n_jobs=1,verbose=10)

In [None]:
clf.fit(data_train_s, label_train_s)

In [None]:
print("best param")
print(clf.best_estimator_)

In [None]:
for params, mean_score, all_scores in clf.grid_scores_:
        print("{:.3f} (+/- {:.3f}) for {}".format(mean_score, all_scores.std() / 2, params))

In [None]:
y_true, y_pred = label_test_s, clf.predict(data_test_s)
print(classification_report(y_true, y_pred,target_names=["nag","pos"]))

# BOW + LogisticRegression

In [None]:
y_ = [np.argmax(v) for v in label]

In [None]:
y_[:10]

In [None]:
data_train_s, data_test_s, label_train_s, label_test_s = model_selection.train_test_split(dense.T, y_, test_size=0.05)

In [None]:
estimator = LogisticRegression(verbose=10)

In [None]:
estimator.fit(data_train_s, label_train_s)

In [None]:
estimator.score(data_test_s, label_test_s)

In [None]:
diparameter={"C": [10**i for i in range(-2,4)]}
licv=GridSearchCV(LogisticRegression(),param_grid=diparameter, cv=2, scoring='accuracy', n_jobs=1,verbose=10)
licv.fit(data_train_s, label_train_s)
predictor=licv.best_estimator_

# Tf-Idf + random forest

In [23]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [24]:
with open("data/amazon_ja/pos.txt") as f:
    raw_doc = f.readlines()
with open("data/amazon_ja/neg.txt") as f:
    raw_doc += f.readlines()
len(raw_doc)

71462

In [25]:
vectorizer = TfidfVectorizer(tokenizer=t.tokenize)
train_matrix = vectorizer.fit_transform(raw_doc)

In [26]:
#data_train_s, data_test_s, label_train_s, label_test_s = model_selection.train_test_split(dense.T, label, test_size=0.1)
data_train_s, data_test_s, label_train_s, label_test_s = model_selection.train_test_split(train_matrix, label, test_size=0.05)

In [27]:
estimator = RandomForestClassifier(verbose=10)

In [28]:
estimator.fit(data_train_s, label_train_s)

building tree 1 of 10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.9s remaining:    0.0s


building tree 2 of 10


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    5.7s remaining:    0.0s


building tree 3 of 10


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    8.7s remaining:    0.0s


building tree 4 of 10


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   11.4s remaining:    0.0s


building tree 5 of 10


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   14.0s remaining:    0.0s


building tree 6 of 10


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:   17.2s remaining:    0.0s


building tree 7 of 10


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:   20.0s remaining:    0.0s


building tree 8 of 10


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:   22.9s remaining:    0.0s


building tree 9 of 10


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:   25.7s remaining:    0.0s


building tree 10 of 10


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   28.6s finished


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=10,
            warm_start=False)

In [29]:
estimator.score(data_test_s, label_test_s)

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.1s finished


0.9286513710128708

In [30]:
tuned_parameters = [{'n_estimators': [50, 70, 90, 110, 130, 150]}]#, 'max_features': ['auto', 'sqrt', 'log2', None]}]

clf = GridSearchCV(RandomForestClassifier(), tuned_parameters, cv=2, scoring='accuracy', n_jobs=3,verbose=10)

In [31]:
clf.fit(data_train_s, label_train_s)

Fitting 2 folds for each of 6 candidates, totalling 12 fits
[CV] n_estimators=50 .................................................
[CV] n_estimators=50 .................................................
[CV] n_estimators=70 .................................................
[CV] ........ n_estimators=50, score=0.9112040774239165, total= 1.3min
[CV] n_estimators=70 .................................................
[CV] ........ n_estimators=50, score=0.9104433642657239, total= 1.3min
[CV] n_estimators=90 .................................................


[Parallel(n_jobs=3)]: Done   2 tasks      | elapsed:  1.3min


[CV] ........ n_estimators=70, score=0.9104139048460745, total= 1.8min
[CV] n_estimators=90 .................................................
[CV] ........ n_estimators=70, score=0.9090239519193942, total= 1.9min
[CV] n_estimators=110 ................................................
[CV] ........ n_estimators=90, score=0.9099425541316836, total= 2.6min
[CV] n_estimators=110 ................................................
[CV] ........ n_estimators=90, score=0.9087293403647291, total= 2.6min
[CV] n_estimators=130 ................................................
[CV] ........ n_estimators=110, score=0.910384445426425, total= 3.3min
[CV] n_estimators=130 ................................................


[Parallel(n_jobs=3)]: Done   7 tasks      | elapsed:  6.7min


[CV] ....... n_estimators=110, score=0.9096720973396577, total= 3.4min
[CV] n_estimators=150 ................................................
[CV] ....... n_estimators=130, score=0.9105022831050228, total= 3.9min
[CV] n_estimators=150 ................................................


[Parallel(n_jobs=3)]: Done   9 out of  12 | elapsed:  8.5min remaining:  2.8min


[CV] ....... n_estimators=130, score=0.9099961700497894, total= 3.9min
[CV] ....... n_estimators=150, score=0.9097952570334364, total= 4.5min
[CV] ....... n_estimators=150, score=0.9090239519193942, total= 4.3min


[Parallel(n_jobs=3)]: Done  12 out of  12 | elapsed: 12.9min finished


GridSearchCV(cv=2, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid=True, n_jobs=3,
       param_grid=[{'n_estimators': [50, 70, 90, 110, 130, 150]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=10)

In [None]:
print("best param")
print(clf.best_estimator_)

In [None]:
for params, mean_score, all_scores in clf.grid_scores_:
        print("{:.3f} (+/- {:.3f}) for {}".format(mean_score, all_scores.std() / 2, params))

In [None]:
y_true, y_pred = label_test_s, clf.predict(data_test_s)
print(classification_report(y_true, y_pred,target_names=["nag","pos"]))

# Tf-Idf + LogisticRegression

In [32]:
y_ = [np.argmax(v) for v in label]

In [34]:
data_train_s, data_test_s, label_train_s, label_test_s = model_selection.train_test_split(train_matrix, label, test_size=0.05)

In [35]:
estimator = LogisticRegression(verbose=10)

In [36]:
estimator.fit(data_train_s, label_train_s)

[LibLinear]

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=10, warm_start=False)

In [37]:
estimator.score(data_test_s, label_test_s)

0.9289311695579183

In [38]:
diparameter={"C": [10**i for i in range(-2,4)]}
licv=GridSearchCV(LogisticRegression(),param_grid=diparameter, cv=2, scoring='accuracy', n_jobs=1,verbose=10)
licv.fit(data_train_s, label_train_s)
predictor=licv.best_estimator_

Fitting 2 folds for each of 6 candidates, totalling 12 fits
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.8730004418912948, total=   0.2s
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.8730224199393101, total=   0.2s
[CV] C=0.1 ...........................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.4s remaining:    0.0s


[CV] .................. C=0.1, score=0.8837825894829872, total=   0.2s
[CV] C=0.1 ...........................................................


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.6s remaining:    0.0s


[CV] .................. C=0.1, score=0.8822732227557964, total=   0.2s
[CV] C=1 .............................................................


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.8s remaining:    0.0s


[CV] .................... C=1, score=0.9163057887759611, total=   0.3s
[CV] C=1 .............................................................


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    1.2s remaining:    0.0s


[CV] ..................... C=1, score=0.916978463895354, total=   0.4s
[CV] C=10 ............................................................


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    1.6s remaining:    0.0s


[CV] ................... C=10, score=0.9298865812343496, total=   0.5s
[CV] C=10 ............................................................


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    2.1s remaining:    0.0s


[CV] ................... C=10, score=0.9275550187078337, total=   0.4s
[CV] C=100 ...........................................................


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    2.5s remaining:    0.0s


[CV] .................. C=100, score=0.9232582118132273, total=   0.6s
[CV] C=100 ...........................................................


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    3.2s remaining:    0.0s


[CV] .................. C=100, score=0.9197772736646731, total=   0.6s
[CV] C=1000 ..........................................................
[CV] ................. C=1000, score=0.9143025482397997, total=   1.0s
[CV] C=1000 ..........................................................
[CV] ................. C=1000, score=0.9121173732433786, total=   1.0s


[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    5.8s finished
