In [1]:
import gensim.downloader as api
from sklearn.datasets import fetch_20newsgroups
import torch

from experiments.experiment import snn_experiment, lsa_experiment
from experiments.preprocess import Word2VecPrep

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
word2vec = api.load('word2vec-google-news-300')
text_prep = Word2VecPrep(word2vec)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\aleks\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
newsgroups_train = fetch_20newsgroups(subset='train')

### Averaged Word2Vec

In [5]:
train_x, train_y = text_prep.preprocess_dataset(newsgroups_train, avg=True, max_size=150)
train_x.to(device)
train_y.to(device)
print(device)

cuda


In [6]:
lsa_experiment(train_x, train_y, clf_type="regression", splits=4)

4it [00:30,  7.57s/it]



---- CLASSIFIER: regression ----
acc: 0.6415944847092099
[[187   7   1   0   0   3   7   2  13  18   1   4   0   9   4 159  12  38
   15   0]
 [  1 327  45  24   8  53  66   2   5   8   1  10   9   0  20   5   0   0
    0   0]
 [  0  51 323  51  17  52  73   6   5   1   0   8   3   0   1   0   0   0
    0   0]
 [  0  31  42 307  35  19  91  11   3   8   0  15  23   2   1   1   1   0
    0   0]
 [  0  35  31 120 193  21 112   8  10   3   1  11  29   0   2   2   0   0
    0   0]
 [  0  64  46  11   9 366  70   1   1   6   0   9   2   0   5   2   1   0
    0   0]
 [  2   3  11  25   4   2 456  16   7   9   5  11  22   1   7   0   1   1
    2   0]
 [  1  10   1   0   1   2  65 386  68  10   2   6  10   3   3   1  21   1
    3   0]
 [  2   7   0   2   0   2  55  71 407  10   2   5   8   5   4   0  13   1
    4   0]
 [  0   3   1   0   0   0  30   0   5 498  50   0   0   1   2   4   0   1
    2   0]
 [  2   2   0   0   0   1  27   0   5  48 512   0   0   0   0   0   1   0
    2   0]
 [  6  

In [7]:
lsa_experiment(train_x, train_y, clf_type="random_forest", splits=4)

4it [02:37, 39.38s/it]



---- CLASSIFIER: random_forest ----
acc: 0.6168463850097224
[[245   4   1   0   2   5   1   6  13  10   3   2   0   8   5 105  12  26
   16  16]
 [  4 280  68  32  23  69  32   7   9   8   3   8  16   3  18   0   1   2
    1   0]
 [  0  70 329  50  25  50  35   7   6   3   1   8   6   1   0   0   0   0
    0   0]
 [  0  43  40 293  73  27  52  13   6   3   0   6  26   3   4   0   1   0
    0   0]
 [  3  55  43 132 186  28  52  17  11   2   1  10  28   4   5   0   0   0
    1   0]
 [  0  86  60  30  29 321  34   6   1   2   0  12   4   1   5   1   1   0
    0   0]
 [  2  11  17  29  11  11 425  12  10  10  11   7  16   0   5   0   3   2
    1   2]
 [  2  12   4   6   8   7  34 383  74  12   1  10  18   2   3   0  13   1
    3   1]
 [  5   5   3   4   6   6  38  92 375  18   7   3   7   6   6   0  12   1
    3   1]
 [  4  10   5   1   1   1  17   7   7 455  72   0   0   0   4   2   4   2
    4   1]
 [  2   3   0   1   3   2   9  10  10  77 475   0   0   1   3   0   2   0
    1   1]
 [  

In [8]:
lsa_experiment(train_x, train_y, clf_type="xgboost", splits=4)

4it [04:41, 70.34s/it]



---- CLASSIFIER: xgboost ----
acc: 0.6900300512639208
[[287   2   1   0   1   3   1   6   8   6   2   3   0   6   2  78  10  12
   20  32]
 [  5 326  54  27  20  65  28   2   5   7   0   8  15   2  14   2   2   0
    0   2]
 [  0  49 363  53  42  51  11   2   4   2   0   7   3   0   1   0   2   1
    0   0]
 [  0  26  39 311  91  26  30  10   7   2   3   8  33   1   1   0   2   0
    0   0]
 [  1  29  24 100 301  20  30   5   7   2   2   8  38   5   3   1   1   0
    1   0]
 [  0  63  55  19  33 370  11   0   1   4   2   9   7   7   5   3   1   2
    1   0]
 [  1  10  16  26  21   7 411  15  11   9   6   4  33   4   5   1   1   2
    2   0]
 [  1   9   8   8   4   3  24 416  64   6   2   2  19   0   3   3  17   1
    3   1]
 [  4   2   4   7   6   3  23  56 426   6   5   4   8   4  12   0  14   2
    9   3]
 [  0   5   5   2   2   3   6   3   3 480  65   0   3   2   1   3   4   3
    7   0]
 [  2   4   1   1   1   3   8   1   6  59 497   0   2   0   3   1   2   3
    6   0]
 [  6   6 

### Spiking Word2Vec

In [9]:
spike_train_x, spike_train_y = text_prep.preprocess_dataset(newsgroups_train, avg=False, max_size=150)
mask = spike_train_x >= 0.25
data_x = torch.where(mask, torch.tensor(1), torch.tensor(0))

In [10]:
data_x.to(device)
spike_train_y.to(device)
print(device)

cuda


#### Regression

In [11]:
snn_experiment(data_x, spike_train_y, clf_type="regression", splits=4, shape=(6, 6, 6), res_train=True)

4it [1:57:20, 1760.11s/it]


---- CLASSIFIER: regression ----
acc: 0.4888633551352307
[[ 33   0   2   0   2   4   6   4   5  11  15  10   2  28   7 272  14  65
    0   0]
 [  0 149  93  20  14  98  67   5   6   8  16  24  28  16  19  19   2   0
    0   0]
 [  0  19 336  27  18  76  42  13  10  10   6   7   7   9   6   5   0   0
    0   0]
 [  0  14  84 168  65  59  76   9  10   8  15  26  28  11   5   9   0   3
    0   0]
 [  0  21  76  77 145  46  57  10   8   7  12  33  30  16  18  19   2   1
    0   0]
 [  0  36  72  24   5 318  54   6   5   8  11  24   6   8   9   6   0   1
    0   0]
 [  0   1  19  17   9  15 412  20   7  14  23  15  13   5   8   5   0   2
    0   0]
 [  3   4  17   5   2   9  48 308  62  18  25  14  13  19   8  22  11   6
    0   0]
 [  0   4  10   2   2  13  56  63 296  24  38  10  15  24  11  18   6   6
    0   0]
 [  2   5   6   1   3   2  26   4   8 368 141   5   2   4   1  15   1   3
    0   0]
 [  0   2   1   2   0   0  23   3   8  49 487   3   1   5   4   9   1   2
    0   0]
 [  0   

In [12]:
snn_experiment(data_x, spike_train_y, clf_type="regression", splits=4, shape=(7, 7, 7), res_train=True)

4it [1:52:22, 1685.60s/it]


---- CLASSIFIER: regression ----
acc: 0.5204171822520771
[[ 42   2   1   0   0   5   6   2   5  15  13  10   0  19   3 281  10  66
    0   0]
 [  0 143 103  15  13 102  65  10   7  18  10  16  20  17  25  18   1   1
    0   0]
 [  0  17 371  23  16  69  31  13   3   6  11   9   4   8   3   6   0   1
    0   0]
 [  1  10  87 188  44  55  78  17   4  11  12  28  31   4   6  10   0   4
    0   0]
 [  0  18  66  94 163  38  67  13  11   3  13  19  30  16   7  15   4   1
    0   0]
 [  0  31  74  17   8 344  49   2   3   4  10  24   3   5   7  10   1   1
    0   0]
 [  0   2  12  15  12  10 423  22   8  11  24   8  13   6   8   7   1   3
    0   0]
 [  1   1  15   1   2  17  43 324  60  21  30   5  11  14  10  14  19   6
    0   0]
 [  0   3   7   3   4   7  44  89 291  33  43   6  12  14  14  14  10   4
    0   0]
 [  1   5   8   0   1   1  20   5   3 395 129   1   1   4   2  11   4   6
    0   0]
 [  1   0   0   0   0   2  16   2   4  42 519   2   1   4   4   3   0   0
    0   0]
 [  2   

In [13]:
snn_experiment(data_x, spike_train_y, clf_type="regression", splits=4, shape=(8, 8, 8), res_train=True)

4it [1:58:45, 1781.42s/it]


---- CLASSIFIER: regression ----
acc: 0.5350008838607035
[[ 54   0   2   0   0   2   5   3   3  12  16  11   0  18   2 285  10  57
    0   0]
 [  0 151 110  15  11  99  66   4  10   9  11  23  16   9  30  19   1   0
    0   0]
 [  0  16 370  27  13  67  32  12   2   8  17   7   3   5   3   6   1   2
    0   0]
 [  0  15  83 219  36  51  81  10   5   4  10  29  28   6   6   6   1   0
    0   0]
 [  0  19  66  88 152  47  77  15   7   6   9  28  34  11   4  13   0   2
    0   0]
 [  0  36  83  22   4 334  46   3   3   6  11  22   4   6   4   8   1   0
    0   0]
 [  0   6  13  12  10   9 446  16   8  10  14  11  14   2   8   5   1   0
    0   0]
 [  0   2   7   4   1   9  52 340  63  17  19  13  10  13   5  12  23   4
    0   0]
 [  0   3   9   3   2   5  52  69 323  31  29   5   9  16  12  18   8   4
    0   0]
 [  2   2   2   1   1   1  27   2   3 391 138   3   1   4   3  11   2   3
    0   0]
 [  0   0   1   1   0   0  16   1   3  33 527   3   0   2   3   9   1   0
    0   0]
 [  2   

In [14]:
snn_experiment(data_x, spike_train_y, clf_type="regression", splits=4, shape=(10, 10, 10), res_train=True)

4it [2:15:52, 2038.21s/it]


---- CLASSIFIER: regression ----
acc: 0.5578928760827294
[[ 50   1   1   0   0   2   6   2   2  15  13  10   0  26   2 270  10  70
    0   0]
 [  0 167 100  19  14  98  67   4  12   5   7  21  17  15  21  16   0   1
    0   0]
 [  0  20 387  28  13  56  31  17   3   5  13   7   3   4   1   2   0   1
    0   0]
 [  0  16  88 223  41  49  79   7   7   9   9  20  25   3   6   5   0   3
    0   0]
 [  0  16  58 102 191  33  66  12   8   8   7  15  36   9   5  10   1   1
    0   0]
 [  0  32  73  21   6 351  44   1   1   7   6  24   3  12   5   5   1   1
    0   0]
 [  0   4  13  12  13   9 442  15   9  11  18  14  14   3   3   3   2   0
    0   0]
 [  0   1   7   4   4  11  44 340  67  10  21   8  14  15  10  12  21   5
    0   0]
 [  0   4  12   0   2   7  46  69 356  17  18   6   8  14  11  12  11   5
    0   0]
 [  2   6   4   0   0   2  17   4   6 409 124   0   1   4   2  10   0   6
    0   0]
 [  1   3   0   1   0   0  17   1   2  39 528   5   0   1   0   1   1   0
    0   0]
 [  0   

#### XGBoost

In [15]:
snn_experiment(data_x, spike_train_y, clf_type="xgboost", splits=4, shape=(6, 6, 6), res_train=True)

4it [1:56:27, 1746.93s/it]


---- CLASSIFIER: xgboost ----
acc: 0.5448117376701432
[[215   9   1   1   2   2   4   3  13   9   1  10   2  17   7  86  18  30
   19  31]
 [  2 223  64  31  31  62  21  12  11  13   5  15  28   6  30  12   5   3
    5   5]
 [  1  40 319  44  31  55  24   9   9   7   3  13  14   4   6   6   0   1
    3   2]
 [  0  46  55 225  70  31  29  16   8   7   6  21  40   9   9   8   4   2
    1   3]
 [  3  34  30  90 231  33  36  13   9   7   2  16  42   6  10   4   2   3
    4   3]
 [  0  68  66  38  29 275  13  10   4   8  11  18  20   9  10   6   1   2
    2   3]
 [  2  17  14  29  31   7 355  21  21  12  11   7  25   7  10   2   4   3
    5   2]
 [  7  10  15   9  12  13  21 343  60  13   5  12  27   4  10   7   6   6
   10   4]
 [  6  14   5   4   9  11  21  67 334  26  10   7  19   8  15   5  17   5
   11   4]
 [  5  11   4   4   3   2  11  13  13 391  80   7   6   9   8   0  10  11
    9   0]
 [  1   3   2   4   5   4   8   5  13  75 445   5   6   3   4   1   2   6
    7   1]
 [  7  11  

In [None]:
snn_experiment(data_x, spike_train_y, clf_type="xgboost", splits=4, shape=(7, 7, 7), res_train=True)

In [None]:
snn_experiment(data_x, spike_train_y, clf_type="xgboost", splits=4, shape=(8, 8, 8), res_train=True)

4it [18:12, 273.23s/it]

---- CLASSIFIER: xgboost ----
acc: 0.9147331786542924
[[545  26  13]
 [ 36 529  29]
 [ 20  23 503]]





In [17]:
snn_experiment(data_x, spike_train_y, clf_type="xgboost", splits=4, shape=(10, 10, 10), res_train=True)

4it [2:35:02, 2325.74s/it]


---- CLASSIFIER: xgboost ----
acc: 0.6352306876436273
[[237   4   3   0   0   2   0   4   9   5   2   4   1  14   6  84  19  28
   23  35]
 [  3 296  71  16  18  56  18   6  11  12   2  15  21   2  25   7   1   1
    2   1]
 [  0  35 376  37  23  54  17   6   6   6   1   3  15   2   2   6   0   0
    0   2]
 [  3  34  47 306  66  24  33   6   8   4   3  12  32   7   2   1   0   0
    2   0]
 [  2  30  23  80 281  22  29  10   3   7   4  15  48   6   8   3   2   2
    2   1]
 [  0  68  47  31  20 349  18   8   3   5   3  12   8   5   4   6   1   1
    3   1]
 [  0  11   8  24  25   7 386  21  18  13  10   5  33   2  11   2   3   3
    1   2]
 [  5   6   2   8  10   4  23 389  64   7   6   4  19  10   2   2  23   2
    5   3]
 [  5  12   4   6   7   3  21  76 380  12   8   5  13  11  12   3  11   2
    5   2]
 [  4  10   1   1   4   3  12   3  11 450  64   0   2   2   9   3   6   2
    7   3]
 [  2   8   2   2   4   1   9   4  14  54 484   0   1   3   3   2   0   1
    6   0]
 [  5   9  