In [1]:
from neucube import Reservoir
from neucube.validation import Pipeline
from neucube.sampler import SpikeCount
import torch

from sklearn.metrics import accuracy_score as accuracy
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from xgboost import XGBClassifier
from tqdm import tqdm

from sklearn.datasets import fetch_20newsgroups
from experiments.preprocess import TextPrep


In [2]:
device = "cuda" if torch.cuda.is_available else "cpu"

In [3]:
preprocessor = TextPrep()
cats = ['comp.graphics','sci.med', 'talk.politics.guns']   #, 'rec.motorcycles'] 'soc.religion.christian']
newsgroups_train = fetch_20newsgroups(subset='all', categories=cats)
X, y = preprocessor.preprocess_dataset(newsgroups_train)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\aleks\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
X.to(device)
y.to(device)

tensor([0., 0., 2.,  ..., 1., 1., 2.], device='cuda:0')

In [5]:
kf = KFold(n_splits=4, shuffle=True, random_state=123)
y_total, pred_total = [],[]

for train_index, test_index in tqdm(kf.split(X)):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]

  res = Reservoir(inputs=X.shape[2], cube_shape=(5, 5, 5))
  sam =  SpikeCount()
  #clf = LogisticRegression(solver='liblinear')
  #clf = RandomForestClassifier()
  #clf = XGBClassifier()
  #clf = MultinomialNB()
  clf = SVC(kernel='linear')
  pipe = Pipeline(res, sam, clf)
  
  pipe.fit(X_train, y_train, train=True)
  pred = pipe.predict(X_test)

  y_total.extend(y_test)
  pred_total.extend(pred)

print(accuracy(y_total, pred_total))
print(confusion_matrix(y_total, pred_total))

4it [1:46:44, 1601.16s/it]


0.702749738948834
[[717 171  85]
 [219 618 153]
 [101 125 684]]
