### AIR Project

## Imports and specific settings

In [271]:
import ast
import string

import torch
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
from gensim.models import Word2Vec
import multiprocessing
import time
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset

In [272]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True
print("running models with {}".format(device))

running models with cuda:0


## Loading of dataset and nan-removal

In [273]:
def pre_process():
    data = pd.read_csv('WELFake_Dataset.csv', index_col=0)
    print(data.shape)
    # display(data[:300])
    for i,x in data.iterrows():
        if len(str(x["text"])) <= 10:
            data.loc[i, "text"] = np.nan
        if len(str(x["title"])) <= 10:
            data.loc[i, "title"] = np.nan

    data.dropna(inplace=True)
    print(data.shape)
    data.reset_index(drop=True, inplace=True)
    data.to_csv("data/data.csv")
    display(data[:300])

## Tokenization

In [274]:
def tokenize():
    stop = stopwords.words('english')
    stemmer = SnowballStemmer('english')
    punc = [u'\u201c',u'\u201d',u'\u2018',u'\u2019',u'\u2024',u'\u2025',u'\u2026',u'\u2027']
    # print(punc)
    data = pd.read_csv('data/data.csv', index_col=0)
    titles = list()
    texts = list()
    for i, row in data.iterrows():
        title = str(row["title"])
        text = str(row["text"])
        t1 = ""
        for c in title:
            if not (c in string.punctuation or c in punc):
                t1 += c
            else:
                t1 += " "
        t2 = ""
        for c in text:
            if not (c in string.punctuation or c in punc):
                t2 += c
            else:
                t2 += " "
        title_tokens = nltk.tokenize.word_tokenize(t1)
        text_tokens = nltk.tokenize.word_tokenize(t2)
        # title_filtered = [w.lower() for w in title_tokens if not w.lower() in string.punctuation]
        # title_filtered = [w.lower() for w in title_filtered if not w.lower() in punc]
        title_filtered = [w.lower() for w in title_tokens if not w.lower() in stop]
        title_stemmed = [stemmer.stem(w) for w in title_filtered]
        # text_filtered = [w.lower() for w in text_tokens if not w.lower() in string.punctuation]
        # text_filtered = [w.lower() for w in text_filtered if not w.lower() in punc]
        text_filtered = [w.lower() for w in text_tokens if not w.lower() in stop]
        text_stemmed = [stemmer.stem(w) for w in text_filtered]
        # print(title_stemmed)
        # print(text_stemmed)
        titles.append(title_stemmed)
        texts.append(text_stemmed)
        if i % 5000 == 0:
            print(i)
    d = {"title":titles, "text":texts, "label":data["label"]}
    data_cleaned = pd.DataFrame(data=d)
    # data_cleaned["title"] = titles
    # data_cleaned["text"] = texts
    data_cleaned.to_csv("data/data_token.csv")
# tokenize()

## Creating and Training Word2Vec Model

In [275]:
load_model_from_disc = True
w2v_model = None
data = pd.read_csv('data_tokenized/data_token.csv', index_col=0)
# for i, row in data.iterrows():
#     print(type(row["title"]))
#     print(row)
#     data.loc[i, "title"] = ast.literal_eval(row["title"])
#     data.loc[i, "text"] = ast.literal_eval(row["text"])
if load_model_from_disc:
    try:
        w2v_model = Word2Vec.load("word2vec.model")
    except:
        pass

if w2v_model is None or not load_model_from_disc:
    if load_model_from_disc:
        print("Could not load model from disc. Training model...")
    else:
        print("Loading from disc deactivated. Training model...")

    class MySentences(object):
        def __init__(self, data):
            self.data = data

        def __iter__(self):
            for doc in pd.concat([data["text"], data["title"]]): #change to "title" or combine both
                doc = ast.literal_eval(doc)
                yield doc

    sentences = MySentences(data)

    cores = multiprocessing.cpu_count()
    w2v_model = Word2Vec(min_count=20,
                         window=2,
                         sample=6e-5,
                         alpha=0.03,
                         min_alpha=0.0007,
                         negative=20,
                         workers=cores-1)

    w2v_model.build_vocab(sentences, progress_per=10000)
    t = time.time()
    w2v_model.train(sentences, total_examples=w2v_model.corpus_count, epochs=3, report_delay=1)
    print('Time to train the model: {} mins'.format(round((time.time() - t) / 60, 2)))
    w2v_model.save("word2vec.model")
else:
    print("Model loaded from disc.")

Model loaded from disc.


In [276]:
# calculate similarity
w2v_model.wv.similarity("amazon", 'nazi')

-0.11904364

In [277]:
# calculate similarity
w2v_model.wv.similarity("obama", 'trump')

0.60530704

In [278]:
# find out which element doesn't match
w2v_model.wv.doesnt_match(['amazon', 'obama', 'trump'])

'amazon'

In [279]:
# Which word is to obama as georg is to bush?
w2v_model.wv.most_similar(positive=["obama", "georg"], negative=["bush"], topn=3)

[('barack', 0.607993483543396),
 ('presid', 0.4728606641292572),
 ('behest', 0.4636874198913574)]

In [280]:
# e.g. words most similar to obama
w2v_model.wv.most_similar(positive=["obama"])

[('barack', 0.832769513130188),
 ('administr', 0.6584988832473755),
 ('presid', 0.6397863626480103),
 ('predecessor', 0.625878632068634),
 ('trump', 0.6053071022033691),
 ('bush', 0.557529628276825),
 ('outgo', 0.5535241961479187),
 ('undo', 0.5471165180206299),
 ('holdov', 0.5300476551055908),
 ('clinton', 0.5224902629852295)]

In [281]:
# e.g. words most similar to obama
w2v_model.wv.most_similar(positive=["presid"])

[('barack', 0.7051833868026733),
 ('45th', 0.6659713387489319),
 ('successor', 0.6399291157722473),
 ('obama', 0.6397863626480103),
 ('administr', 0.6288126111030579),
 ('trump', 0.6117547750473022),
 ('donald', 0.6109979748725891),
 ('predecessor', 0.6096833348274231),
 ('pres', 0.594482958316803),
 ('presidenti', 0.5868942737579346)]

## Creating Doc2Vec
word2vec for each word with average over document

In [282]:
# creates w2v representation for all documents and titles
def doc2vec():
    titles = list()
    texts = list()
    start = time.time()
    for i, row in data.iterrows():
        vec_title = np.zeros(shape=w2v_model.vector_size)
        vec_text = np.zeros(shape=w2v_model.vector_size)
        tit = ast.literal_eval(row["title"])
        tex = ast.literal_eval(row["text"])
        tit_cnt = 0
        tex_cnt = 0
        for word in tit:
            try:
                vec_title += w2v_model.wv[word]
            except KeyError:
                # print("Didn't find word {}".format(word))
                tit_cnt += 1
                pass
        for word in tex:
            try:
                vec_text += w2v_model.wv[word]
            except KeyError:
                # print("Didn't find word {}".format(word))
                tex_cnt += 1
                pass
        if len(tit) > tit_cnt:
            vec_title /= (len(tit) - tit_cnt)
        if len(tex) > tex_cnt:
            vec_text /= (len(tex) - tex_cnt)
        titles.append(vec_title.tolist())
        texts.append(vec_text.tolist())
        if i % 5000 == 0:
            print("[{}/{}] - {:.1f}s".format(i, len(data.index), time.time() - start))
    end = time.time()
    print("creating doc2vec took {:.1f}s".format(end - start))
    d = {"title":titles, "text":texts, "label":data["label"]}
    data_w2v = pd.DataFrame(data=d)
    data_w2v.to_csv("data/data_w2v.csv")
    display(data_w2v[:100])
doc2vec()

[0/70735] - 0.0s
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.]
[ 0.99748569  0.2247427   0.4354149   0.19536692 -0.2018244   0.09970292
  0.55635818  0.61088441  0.65475376 -0.27401805 -0.61830201 -1.27911472
 -0.45447664 -0.01037367 -0.39624905 -0.71380613 -0.2468091  -1.00076924
  0.50735812  0.12101652  0.85197233 -0.63821731 -0.23425124  0.15814104
  0.64235136 -0.17906579  0.43393953  0.62042567 -0.9378583   0.10757749
 -0.49557299  0.36399874 -0.55203386 -0.11273773  0.57342574 -0.16634555
  0.70404322  0.92241394  0.55667547 -0.20816373 -0.25437867 -0.02401135
 -1.15483778 -0.62305382  0.13446344  0.76675004 -0.39184166  1.03597337
 -0.56808898 -0.11320374  0.56671156 -0.22944351 -0.58812296 -1.10388675
 -0.75314739  0.3267

Unnamed: 0,title,text,label
0,"[-0.17737161740660667, 0.23486142853895822, 0.12445201848944028, -0.36334328601757687, 0.3301913384348154, -0.028877111462255318, 0.3542606706420581, -0.1827598474919796, -0.36685079149901867, 0.17834910843521357, 0.2996298174063365, -0.34684255595008534, -0.13291914388537407, 0.5066589800020059, 0.19267991185188293, -0.2708385114868482, -0.017944110557436943, 0.28307173525293666, -0.11388187358776729, -0.30716870352625847, 0.41513919038698077, 0.7277844523390135, -0.06439176139732201, 0.09227927153309186, -0.5425457172095776, 0.14019572952141365, 0.029552965114514034, -0.4386397817482551, -0.42108624925216037, -0.413217222938935, -0.13197220209985971, 0.21986744770159325, -0.2825335699599236, -0.16889805952087045, -0.4155910698076089, 0.2879291175243755, -0.1103774116684993, -0.8907897769434688, -0.3223227026561896, 0.10792573584088434, -0.1445204878691584, -0.2927070196407537, 0.2897471431642771, 0.10879019275307655, 0.47844405348102254, -0.44211805953333777, 0.5719812869404753, -0.4012464658978085, 0.7594906856926779, -0.1151490987977013, 0.14371274524213126, 0.22255045330772796, -0.1222666905572017, 0.4955827960123618, -0.31929607689380646, 0.017945933466156323, 0.16614588738108674, -0.01917497751613458, -0.07837012064798425, -0.35003646835684776, 0.03681470391650995, 0.2148221828926277, 0.5458240484197935, -0.37602727115154266, -0.15628574431563416, 0.0708180017148455, -0.18581471219658852, 0.08098900131881237, -0.539757615265747, 0.5942123178392649, 0.3621960940460364, 0.18527882150374353, 0.20178120645383993, -0.012844864279031754, -0.5178411702315012, 0.5746897514909506, -0.21704354463145137, -0.5987081391115984, -0.42566451740761596, -0.23372547763089338, 0.31736567585418624, -0.20182241996129355, -0.7591941518088182, 0.20308093664546809, 0.03316183264056841, 0.42153524021462846, -0.08880513596038024, -0.13754549156874418, 0.5791262040535609, -0.3480027320717151, 0.28539641325672466, 0.2626928749183814, 0.11944863498987009, 0.2772822293142478, 0.15702432145675024, 0.49627114459872246, 0.4761998246734341, -0.2544484054669738, -0.09271977220972379, -0.5678285965695977]","[-0.40317140313859323, 0.17912740358198212, 0.050562859989638054, 0.055954877193714135, 0.0934258276801718, -0.023519680137824826, 0.24632954259287898, 0.1434498220579113, -0.23731825938003862, 0.017518557575046452, 0.02849149547639523, -0.33533329706499654, -0.2550358505249872, 0.37727573531560615, -0.23844510968698798, -0.0501059377303351, 0.00435799360805577, -0.042552340270672596, -0.07153317957728648, -0.28219471891853076, 0.316290018046194, 0.3761055734737851, 0.07512606074023434, 0.12298925005307587, -0.2549261285887057, 0.14509551490984307, -0.13024391400400012, -0.3689671294372771, -0.086444049350384, -0.15495215342814456, 0.29306362660242136, 0.28684928263699205, -0.2653720033543948, -0.1925096292161825, -0.4215396416248297, -0.043265524127373146, -0.1471679153010742, -0.6697407869658475, -0.12966115223728686, -0.30731571397677576, -0.08219364198473224, -0.20401955047391765, 0.10506988533594748, 0.12025182841066877, 0.12918642091053717, -0.21510587996700192, 0.37280161471516216, -0.4425061703104033, 0.2538979571554335, 0.11037910568357169, -0.007000621740045653, -0.08913956716329156, 0.08515967715328555, 0.27757835686267424, -0.22118830430898295, -0.07811686954735789, 0.05627476818376744, 0.08087808955834527, -0.0853087707212318, -0.1422012611342808, 0.17385939709284623, 0.2476592813871009, -0.041187109410764804, 0.1138095081755548, -0.05519140118691634, -0.014071714172418554, -0.0672952887194823, 0.12840113050306626, -0.3247153297708551, 0.10708220859983464, 0.07283706835182202, 0.0888161897662332, 0.24702526510182016, 0.09409497735250151, -0.11882258266556014, 0.2885739945335851, 0.11462983908889302, -0.11191882241876905, -0.21861281287083664, -0.01776596985215931, -0.06559378575228575, 0.03072875509858778, -0.548379166946704, 0.3558431576434654, -0.08911928602538745, 0.007791372722645094, -0.0045858592184113015, -0.25692187547845335, 0.12068631859403306, 0.04987181828775465, -0.023052691821404885, 0.09747564121414129, -0.0005873091000691864, -0.2433421427602203, -0.08566320109647778, 0.3659985110734654, 0.19498569313881195, -0.10852157450715431, 0.05691196544347391, -0.48680588649080786]",1
1,"[0.02511770012123244, 0.40957435380135265, -0.2369934692978859, -0.2121798044868878, 0.295932443014213, 0.18076643484528176, 0.4779385652925287, 0.3257415433015142, -0.22047330299392343, 0.3010874473090683, -0.1983625101191657, -0.21182628494820424, -0.0414447837642261, 0.6450525932013988, -0.09116567777735847, -0.08050191402435303, 0.17859128064342908, 0.31608657113143374, 0.049787470272609165, -0.1692362836495574, 0.6455044937985284, 0.8739587222891194, 0.262510446831584, 0.36309332280818907, 0.08998486293213707, 0.2517449166625738, 0.09113263977425438, -0.7348068416384714, -0.41157201983566793, -0.15110740810632706, 0.30817642119447036, 0.2632670695228236, -0.0579057793532099, -0.33890827372670174, -0.31094187358394265, 0.3398670011333057, -0.32165802024038775, -0.6064861507288047, -0.30095257610082626, -0.018423321523836682, -0.38184844156993286, -0.044527502464396615, 0.16442051649625813, 0.4289762872670378, 0.2865532035274165, 0.17759397359830992, 0.032378499529191425, -0.36855629325977396, 0.29892829965267864, 0.1841155386396817, -0.16069440077990294, -0.10719483212700912, 0.16666697724057095, 0.5896546255264964, -0.09820036430444036, -0.07983925193548203, -0.13275530561804771, 0.17426305477108275, -0.334408112136381, -0.23612641995506628, 0.24975493470472948, 0.24453023235712731, 0.1777616087347269, 0.015296310252909149, 0.04892375799162047, 0.05117394868284464, -0.3913779492889132, -0.0648576489516667, -0.4870030917227268, -0.0635907846902098, 0.04670811377997909, 0.06017912106056299, 0.251803882420063, 0.21904887518446362, -0.3944479356952278, 0.5487774695668902, -0.4252366375710283, -0.08660394618553775, 0.004315790321145739, 0.13724701319422042, 0.11732097848185472, -0.055962118187120984, -0.6011411058051246, -0.03905760496854782, -0.09006714687815734, 0.02149829893772091, -0.21998694139931882, -0.4404819578464542, 0.47398302650877405, 0.044731500425509045, 0.06565682802881513, 0.2270090545394591, 0.2898753397166729, -0.21483414457595376, -0.1501339924122606, 0.42123646422156263, 0.42888625524938107, -0.2244745253452233, -0.05343824907738183, -0.5750141973673765]","[-0.497278791187065, 0.03649287129796687, -0.1987687081896833, -0.19662800440121264, 0.45575732702300664, 0.04849873837970552, 0.3563722971649397, 0.36176267940373646, -0.13906860670873097, 0.1458419416809366, -0.020719180898075656, -0.5276746885584933, -0.3513494843528384, 0.20575122322354997, -0.046303607523441315, -0.12611781610619455, 0.461240967469556, 0.04482300934337434, 0.09319010447888147, -0.3598102162636462, 0.46420255800088245, 0.4922039510593528, -0.020578479305619284, 0.20740097960723297, 0.18391937483102083, -0.02730514162353107, -0.12997925097477578, -0.4340501227416098, -0.2169292058263506, -0.4355809491659914, 0.09206717239604109, 0.23843712741065592, -0.20490295351261184, 0.12552743334145772, -0.15149599313735962, 0.3048461294910383, -0.26265874097035047, -0.4785894396759215, -0.23121458300877185, 0.01634822394477115, -0.061882946108068736, -0.06164681556678954, 0.23766593313554213, 0.2686609716287681, 0.2596390242023127, 0.15531175033677191, -0.13292797486342134, -0.3364946667016262, 0.702708037126632, 0.23629601743249667, 0.2561979822459675, -0.3964966136429991, 0.2380670133445944, 0.16033858992159367, -0.20699800783768296, 0.0041048335177557805, 0.1579347246636947, 0.3818398253282621, -0.25901801577614, -0.16539799333328292, -0.025897580420687086, -0.18331807783050907, 0.2959791941656953, 0.0695751394544329, 0.1307861952421566, -0.34561815041871297, -0.440458602138928, -0.10328851214476995, -0.3142488510126159, 0.20889647790629948, 0.11008048944530033, 0.17952004137138525, 0.0849093296343372, 0.0879834255292302, -0.07758655842571031, 0.21252255975490525, -0.36786546433965367, -0.02464901762349265, 0.047688860446214676, 0.1468259837655794, -0.06758021385896773, -0.04540533572435379, -0.8026305020210289, 0.45696018024214674, 0.04316397845035508, 0.37423078520666986, -0.35045326394694193, -0.16606391398679643, 0.07041762716003827, 0.12300118928154309, -0.006529001607781365, 0.16815829197210924, 0.3648508200865416, -0.3041465441651997, -0.5159245558703939, 0.8508004340831012, 0.3297534906083629, -0.37297812068746206, -0.014513468458538964, -0.2911757016110988]",1
2,"[-0.18137966755491036, -0.08797292411327362, 0.004238184845934694, -0.06382937568168227, 0.08547974277574283, 0.2589952452824666, 0.22129459063021037, 0.25196134184415525, -0.2422392342526179, -0.13630880014254496, -0.11156080462611638, -0.38076543267099905, 0.134959817935641, 0.26355569523114425, -0.3214533179998398, -0.21079164585815027, -0.08585174313674752, -0.3563975439621852, 0.10659908732542625, -0.43123240367724347, 0.1774470702959941, 0.2940484600571486, 0.08247136367628208, 0.24300733201492292, 0.041820862545416906, 0.03976757853077008, -0.6618830980016635, -0.1365467434605727, -0.11109876661346509, -0.11948445095465733, -0.08143085642502858, -0.06569484882773115, 0.08370052908475582, -0.378593758894847, -0.40621226796737087, -0.01743231971676533, -0.06335598402298413, -0.303757173797259, -0.016869221742336567, -0.08337448365413226, -0.015659298317936752, 0.15938145867907083, -0.17413212616856283, 0.03617507104690258, -0.02500384759444457, 0.05648108686392124, 0.1591061889552153, -0.5922373473071135, -0.31322664065429795, 0.2221854369227703, 0.2875592728646902, -0.3265175700116043, 0.5509033534102715, 0.3069908047238222, 0.10612561066563313, 0.12667898991360113, 0.38340479135513306, -0.3291937857866287, -0.46791852437532866, -0.46339404582977295, 0.012443413241551472, 0.053425877140118525, -0.10459177196025848, 0.2082559848920657, -0.23522640506808573, -0.07775621753759109, -0.16612495023470658, -0.08938026141661864, -0.1947321380273654, 0.5730185067424407, 0.3680623317710482, 0.18277243003607369, 0.30557176671349084, 0.1420518457889557, 0.3605213096508613, 0.23373183070753628, 0.44539543642447543, 0.06247592001007153, -0.659414354711771, -0.35927286027715755, 0.0060245366050646855, -0.3471457471068089, -0.5816022214981226, 0.23935032062805617, 0.20156793353649285, -0.4132844725480446, 0.3141246381430672, -0.1349649830506398, 0.07739886326285508, 0.12102583767129825, -0.2510844274209096, -0.07175569465527168, 0.1519274006669338, 0.08404981402250436, -0.2402304708957672, 0.3519807518101655, 0.16797188726755288, -0.3577107128042441, 0.06567777693271637, -0.207927713313928]","[-0.37401702814492627, 0.01021529059536049, 0.0583857167424554, -0.003212013227429481, 0.15185035363311306, 0.07765557302854598, 0.2228348764310208, 0.24602372170959835, -0.06093494008797201, 0.03614821958708294, -0.12133492547660372, -0.3439180725229083, -0.026513522595773647, 0.27287333407573855, -0.323424114918218, -0.11326015142940475, -0.00688493457702314, -0.08372576336607322, -0.09365730446460406, -0.36111546168448055, 0.1932504929607075, 0.5558657895914317, 0.06488337551690487, 0.1354018378616636, 0.007129442439554134, -0.10116524946085999, -0.4146002467849382, -0.29514255560527713, -0.11033002779379697, -0.14232612011623405, 0.20528716517434586, 0.14028659507061758, -0.004018140745521974, -0.46916682433877743, -0.33439555349672134, -0.11028293092792693, -0.1777788767320735, -0.43268595958787004, -0.24369445153867567, -0.19841109082585962, -0.04248716580801674, -0.08759286376035531, 0.13180060064389904, 0.15034417731447086, 0.12110694788705548, -0.03298926206558482, -0.0715983877174734, -0.44766530591357057, 0.042486977203929376, 0.16794337613491833, 0.08747038679028128, -0.3505256360355121, 0.3420960733088695, 0.33542610426378305, -0.002321105170619464, -0.04109417470575746, 0.13721910352732294, -0.032653560078019395, -0.13513579237652237, -0.30720000219987004, 0.06983962582748679, 0.16747963983159994, 0.0076759286003903315, 0.08500361946781956, -0.004077154279139484, -0.08116800274862032, -0.14947569441515482, 0.048568738659425084, -0.2908690543751608, 0.26286621511896935, 0.03500523486253505, 0.16545969230031993, 0.13713310826662115, 0.2025975823935662, 0.11116380398994141, 0.33414532766916527, 0.19185157175180853, -0.03611860177867735, -0.10354832942547185, -0.12848277354124127, 0.09250807800911985, -0.07588974642223098, -0.5085612345232299, 0.28742239211535114, 0.008071045815614202, -0.2215275489412228, 0.0784908719442194, -0.08756495078009183, 0.0823031904556731, 0.08906284166482448, -0.044941430265152446, 0.044114616903869475, 0.19857628070879213, -0.05399021906236823, -0.17584589377711576, 0.44220216935221984, 0.07681490083945469, -0.284344514304824, 0.15452063533005564, -0.30147859623457895]",0
3,"[-0.2642590190199288, 0.18431492420760068, -0.004173693331805142, -0.08160627430135553, 0.3693362725051967, 0.20644220845265823, -0.06657807528972626, 0.13510967046022415, -0.11797136237675493, 0.011056811633435163, -0.3754494183442809, -0.4107481491836635, -0.40144664726474066, 0.5172222873026674, -0.22154958410696549, 0.07390706342729655, 0.3951555386778306, 0.031829367984424935, 0.0855154511016074, -0.3723175403746692, 0.11586081846193834, 0.22346809370951218, 0.01367721435698596, -0.039258331060409546, -0.026297368109226227, 0.41141973503611307, -0.3329925558275797, -0.19606724787842145, -0.3221280009231784, -0.17582778361710635, 0.16931176016276533, 0.46778412061658775, 0.009829487482255156, 0.11231744805858894, -0.2085000200287058, 0.05621220188384706, -0.43309165435758507, -0.37143130210990255, 0.02378679405559193, -0.266397164626555, 0.02834791453047232, 0.07281629605726762, -0.21397724070332266, 0.007080760191787373, 0.2069225934418765, -0.3282147768715566, 0.7753153768452731, -0.7080797214399684, 0.37580903551795264, -0.12297952547669411, 0.05992150035771457, 0.24169358882037076, 0.30442121489481494, -0.04538221369412812, -0.3845638463442976, -0.20098050412806598, 0.13691604476083408, -0.19212760784747926, -0.06015936488454992, -0.36407587880438025, 0.020762421275404366, 0.1408748985691504, -0.0009223011068322442, 0.11645675145766952, -0.3075380968776616, 0.07381115388125181, 0.060015821660106834, -0.22016516818918966, -0.06000577190636911, -0.025444644418629734, -0.14039127731865103, 0.11827410757541656, -0.15340043739839035, -0.28438339856537903, -0.07852726497433403, 0.43617054549130524, 0.23373026747933842, -0.2301648243923079, -0.14382378215139563, 0.020209653811021286, -0.14420620622960004, -0.18718280012465335, -0.06205486133694649, 0.47237536311149597, 0.281853585080667, 0.1166432989941148, 0.2228167341514067, -0.2571713717823679, 0.34105781228704884, 0.3005536344241012, 0.06508973850445314, -0.0954068973660469, 0.4357104196731763, -0.1383892771872607, 0.11755724032197824, 0.38674153285947716, -0.05463681116022847, -0.17485560629178176, 0.6582787805660204, -0.3565672961148349]","[-0.1523945692428191, 0.4155647132956716, 0.12508653209680262, -0.10937462223282722, 0.11890088880823776, 0.1527263335577549, 0.02524663142842446, 0.19970529909713516, 0.343479214437271, 0.2619680100073601, -0.414583243593626, -0.23618812531127897, -0.07411711706731801, 0.37265191652632795, -0.012985314162745372, -0.15364850668931826, 0.2348918002164385, -0.168207285835975, 0.3309294135944301, -0.3717227481926481, 0.2200056361340581, 0.3903890362953846, -0.19480527773829606, 0.18026932988516953, -0.3189576552763249, 0.2318269316142014, -0.29740466260429943, -0.18275629500410315, -0.5636565491323198, -0.0702428449614262, 0.0002941918240186754, 0.14881544078136205, -0.37409912939008516, -0.15087991428644584, -0.07740960814968241, 0.18255272952778517, -0.029801395655054373, -0.27125301226501314, 0.057592289649090016, -0.17192256921561253, -0.07652049854877645, -0.14035342276717225, -0.12802932161330616, 0.19028489913861377, 0.38413410288819544, -0.3029009117968949, 0.03658308769180549, -0.23222143857132102, 0.1797585276735973, -0.026376844484341536, -0.007294439269063618, 0.2695048904678229, 0.17597551505024514, -0.3850120923529978, -0.47075469561798133, -0.41420356638537187, -0.1177765683821914, 0.025530789287961986, -0.01606993935127773, -0.35823968865038575, 0.14779181193621577, 0.10069114305166431, 0.023512076042120307, -0.14942893304997626, -0.16639394601785137, 0.2760339765612718, -0.26924055058753965, -0.2415769680298235, -0.13299523699184784, 0.13938848418730176, -0.15147593782060373, 0.20419450845796738, 0.3216086747955548, -0.11485704792414228, -0.1793143993446892, 0.4774172451608637, 0.28942773864766314, -0.08424536760091922, 0.21866973663086975, -0.1926106653250579, -0.26044585163587, -0.07815616393047538, -0.02783836354938786, 0.2830627674024832, 0.5428321675305635, 0.24468504893101595, 0.29537480466609967, -0.017822564817842602, 0.08789550888454847, -0.08769223588068441, 0.04479590536667987, 0.02293812354125887, 0.3163710894172972, -0.21586206569522182, 0.17310601723010918, 0.32137678695313565, 0.17864511260585728, -0.26160586735740865, 0.41451517293369616, -0.3265126918097127]",1
4,"[-0.5060462322500017, 0.16238908298934498, 0.07844466798835331, 0.09976751605669658, 0.1598321882645703, 0.3847079144583808, 0.13245125290834242, -0.04704207926988602, -0.4749842435121536, 0.13600958832022217, 0.05589736873904864, -0.05574198646677865, -0.018646299839019775, 0.1396078603963057, -0.07274273037910461, 0.15557260397407743, -0.04658369057708316, -0.071958150404195, 0.3446362896098031, -0.9283482312328286, 0.38946086582210326, 0.20335614846812355, -0.4282994833257463, -0.10116092032856411, -0.48652802283565205, -0.1823082168896993, -0.15999158968528113, -0.24553197746475539, -0.23813714418146345, -0.14271863632731968, -0.20474654022190306, 0.17098930933409268, 0.24390742927789688, -0.15544326768981087, -1.001678021831645, -0.014688954999049505, -0.24724640402322015, -0.27984052730931175, -0.1126230541202757, 0.2850455534127023, -0.3443741798400879, -0.044408157053920955, 0.45622486621141434, -0.09197049753533469, 0.15281234681606293, -0.27372196316719055, 0.27540500462055206, -0.5865538203054004, 0.29200326402982074, 0.4259008432014121, 0.039456659307082496, 0.14910412564252815, 0.0980038775338067, 0.5071614616447024, -0.26272785291075706, -0.5330787060989274, -0.07618523565017515, -0.1325905401673582, -0.14126232928699917, -0.18790204326311746, 0.14082881280531487, -0.06605948838922712, 0.29855062771174645, -0.5412313722901874, -0.16857649717066023, -0.7469736602571275, -0.255659282207489, -0.16502751948104966, -0.2960519129410386, -0.07252014180024464, 0.027840335439476702, 0.08286263095214963, -0.15864028533299765, 0.1276298397117191, 0.04916425308005677, 0.1403314121481445, 0.18153706482715076, 0.02480941845311059, -0.2450822964310646, -0.2898458176188999, 0.11876122570700115, -0.5216472649739848, -0.7757940722836388, 0.8458635906378428, 0.20464875140330857, -0.2484126811226209, 0.014261298916406102, 0.11114664334389898, 0.2596695212026437, 0.29221633531981045, -0.025930719243155584, 0.07040850321451823, 0.019603905785414908, -0.027835371748854715, -0.09051524764961666, 0.7103466325336032, 0.3833734995375077, -0.17731430298752254, 0.17610270529985428, -0.16509462727440727]","[-0.5834946689782319, 0.07412079007502784, -0.012929324364220654, 0.009188179405733805, 0.22871015129947206, 0.17586108816278823, 0.21321013626401072, 0.05577067820251816, -0.10473354571947345, 0.039660830975130755, -0.0045938808376107506, -0.2971374513288201, -0.3008080259707101, 0.17565957685518596, -0.17163864012669633, -0.09771830631351029, 0.02456178423707132, 0.18208097417459443, 0.3058615208439598, -0.5493343668679397, 0.36969712114444486, 0.39201114350688404, -0.04834927135940503, 0.06526608631290771, -0.309522407702429, -0.19581293319869372, -0.3449497576949566, -0.3890112104239287, -0.3003995149775788, -0.2878107019631123, 0.14281808375208466, 0.05221273837162665, 0.006590775609516574, -0.37635868117903115, -0.48462091057940765, -0.13257115987577925, -0.3313681541114218, -0.38044092524106854, -0.12066028973570576, 0.12363728909856743, 0.04239645665166555, -0.06342031976005442, 0.18987778977887637, 0.1567634211163278, 0.3480140145040221, -0.059719035857253605, -0.1420120881939376, -0.5134277017718112, 0.1769865412523763, 0.2815591857544388, 0.09879485938905014, -0.3274979095664565, 0.30421351299931604, 0.41706501822228786, -0.26180120271251156, -0.11910667019309822, 0.08760057372144527, 0.03308714690307776, -0.2249296935585638, -0.2545392587505005, -0.06313018134484688, 0.0502066233643779, 0.4904786852740303, -0.12929894905537367, -0.18023770739738312, -0.4847095892592161, -0.1815072696194639, 0.12037101925385219, -0.20236743670646792, 0.09129134486256926, 0.03916346870914653, -0.05736159909930494, 0.00828415485167945, 0.05599363691483935, -0.13947600540877492, 0.1358052453716044, -0.033564742916712055, -0.04944842320142521, -0.13001849313990937, -0.31532502218873965, 0.14023080247247385, -0.2310680731770979, -0.4434276216973861, 0.3480836471542716, 0.08894529774595328, -0.0058033318138094965, -0.12304804663829229, 0.04304550240264723, 0.1775522820651531, 0.07681073265805564, -0.02731811736744863, -0.2526373333918552, 0.25559642324224113, -0.05425362805146034, -0.25186651711248687, 0.6090087413235947, 0.28404392805640344, -0.2393316967430076, 0.250030359391261, -0.2398383832226197]",1
5,"[-0.4345634603364901, 0.03110724856907671, 0.070881841534918, 0.2514308164404197, 0.19904472340237012, -0.12602896226400678, 0.44978704235770484, 0.5240842991254546, -0.28908256780017505, -0.2282320267774842, -0.13019368797540665, -0.2528469115495682, -0.10450625080953944, 0.16565403139049356, -0.40142975645986473, -0.3073820380324667, -0.5972572939724408, -0.2761146200990135, -0.058627045967362144, -0.5727885920893062, -0.002593553201718764, 0.1778185602954843, -0.4347634322263978, 0.32991714775562286, -0.24731175059621985, -0.821602157571099, -0.3745123341002248, -0.25506955182010477, 0.20973461722447115, -0.14258210716599767, 0.36710194836963306, 0.36751546372066846, -0.17230970751155505, -0.586358037523248, -0.05348386920311234, -0.08337806622413071, -0.1171985925598578, -0.8825970861044797, -0.1150516230951656, -0.3618280455808748, -0.35241406749595294, 0.10004565966400233, -0.009200857444242998, 0.17069128833033823, 0.2008809338916432, -0.23005869713696567, -0.37585733221335843, -0.4948955103754997, -0.10284142225811427, 0.3913230841810053, -0.01293650625104254, -0.6821498518640344, 0.067881115288897, 0.23031532696702264, -0.03335567089644345, 0.27852244539694354, 0.2793550282208757, 0.04032923856919462, -0.44827028770338406, -0.2999427123841914, -0.17450412870808082, 0.2122109505263242, 0.08036085218191147, 0.031810295884497464, 0.10442286283201115, 0.3327938284386288, 0.25084327161312103, 0.03902901844544844, -0.5823412889784033, 0.5538576228374784, -0.18790999308905817, -0.17153493179516358, 0.14858503775163132, 0.4051003647460179, -0.07742166654630141, 0.2510787102986466, -0.0683289844204079, -0.11202185533263466, -0.13966658948497337, 0.030765514651482754, 0.07558219676667993, -0.004064879634163596, -0.4658291925760833, 0.748881302096627, 0.20767938119189983, -0.3420738171447407, 0.3834585601633245, -0.15913612056862225, -0.27888031236150046, 0.23845061151818794, 0.12449173697016457, 0.04742150076410987, -0.1330383907664906, -0.010947860354049639, -0.10212657126513394, 0.26155801729129796, 0.446807342835448, -0.2874213013459336, 0.025830502537163822, 0.2171870917081833]","[-0.4951883386820555, 0.23439266439527273, 0.011399977374821901, -0.007779659703373909, 0.3296380444953684, -0.34304431453347206, 0.4828171543776989, 0.9613597095012665, -0.4387990552932024, -0.22119089774787426, -0.24458504118956625, -0.3594708004966378, -0.41361766518093646, 0.44469256419688463, -0.5452623385936022, 0.21975418739020824, -0.5877222009003162, -0.21515219286084175, -0.65245003066957, -0.37691121082752943, 0.007868298591347411, -0.36659403913654387, -0.49706105585210025, 0.2138334922492504, -0.44775467924773693, -0.3805770929902792, -0.4284392446279526, -0.6749394200742245, 0.3762284335680306, -0.18942233454436064, 0.6884261909872293, -0.0013303402811288834, -0.3443786781281233, -0.5243081371299922, -0.1522175958380103, -0.37381623149849474, -0.40836679690983146, -0.787804233841598, 0.22020279616117477, -0.511302966857329, -0.1131128128618002, -0.07327909581363201, -0.17267666850239038, 0.32269744016230106, 0.08100159489549696, -0.5211298335343599, -0.1133220037445426, -0.5350655047222972, -0.3839136175811291, 0.246925993822515, 0.24377784226089716, -0.910967078525573, 0.07603597640991211, 0.6732962355017662, 0.0492690484970808, 0.41104588750749826, 0.1577030960470438, -0.07241347152739763, -0.3087329436093569, 0.07280447450466454, 0.08777685143286362, 0.5398184105579276, 0.033635037019848824, 0.2800473886745749, 0.06361858228046913, 0.17549632291775197, 0.6261077672243118, 0.0665851915255189, -0.4866741532459855, 0.7159687278326601, -0.14773628627881408, -0.04893295676447451, 0.12340201297774911, 0.4275046996772289, -0.0254310704767704, 0.5345167354680598, -0.3258698619902134, 0.027769983746111393, -0.2468455508351326, 0.10411240626126528, -0.3111966084688902, 0.42268234491348267, -0.25318427104502916, 0.15735292620956898, 0.20756256268941797, -0.5008826581761241, 0.27773212641477585, -0.04808429069817066, -0.20332918921485543, 0.2466777302324772, 0.22072303481400013, 0.14400846138596535, -0.015177306719124317, -0.12061397079378366, 0.2999998489394784, 0.07604863308370113, 0.4421570096164942, -0.0796653083525598, 0.3575498014688492, 0.18157797120511532]",1
6,"[-0.3203091447552045, 0.062434068756798904, 0.04198690727353096, -0.05447138249874115, -0.09821036060651143, 0.0013584340612093607, 0.3161282200366259, 0.005258682866891225, -0.6230211858948072, 0.059414570592343806, -0.41385622024536134, -0.45878320386012394, 0.08106632878383001, 0.8518103619416555, -0.23202855587005616, 0.02663200572133064, -0.043056329960624376, 0.4689646398027738, -0.29273104164749386, -0.20873126685619353, 0.3498362004136046, 0.7702416923517982, -0.266246297955513, -0.24625053654114407, -0.3485632743996879, -0.03461065838734309, -0.12424808144569396, 0.11196001172065735, -0.022933268547058107, -0.252680483708779, 0.5962298187116782, 0.4625658541917801, 0.22749874020616215, -0.44409229308366777, -0.6797065953413646, 0.3421906061470509, -0.24296493530273439, -0.14940485656261443, 0.31535180086890857, -0.30451740473508837, -0.2071854265096287, -0.14088225762049358, 0.5085643784608692, 0.48848772048950195, 0.18593820333480834, -0.22521930038928986, 0.29638738424206773, -0.5825266242027283, 0.13673131143053371, -0.2811710047225157, -0.13147403476759792, -0.4342070569594701, 0.028175886995935193, 0.2191340567233662, -0.16055049468607951, 0.158378596107165, 0.21483568971355757, -0.008965437610944113, 0.1088790679971377, -0.34133687019348147, 0.33166398828228316, -0.14304437184085447, -0.19113364666700364, 0.14039361774921416, -0.2840621337294579, -0.3276220877965291, -0.2208055853843689, -0.02038272519906362, -0.36895836939414345, -0.19006482704232136, 0.03995685031016668, -0.10796817429363728, 0.2616151519740621, 0.21507784550388653, -0.08715749581654866, 0.44260898331801096, 0.3233365575472514, -0.011222367997591694, -0.1353724433109164, -0.12358759120106697, 0.32865413998564086, 0.24144874985019366, -0.5848562583327294, 0.48377212981383005, -0.39703264037768043, 0.058962263787786164, 0.15381984810034435, 0.16152281959851583, 0.386186450223128, -0.18025917758544285, -0.42426563929766414, -0.1630218207836151, -0.05830309900144736, -0.09749095855901639, -0.20645865201950073, 0.4579912222921848, 0.1689738576610883, -0.2764179406066736, 0.41889740029970807, -0.2888958012064298]","[-0.5228719540466448, 0.13794000419780952, 0.03974026645611943, 0.00921471867059848, 0.00422197233402092, 0.13442116710606195, 0.1802173884752569, 0.2542306600895689, -0.26451301938704863, -0.0592299293840432, -0.10100221208062109, -0.3474658985332522, -0.13744123673194464, 0.476662443367807, -0.21792664769805883, -0.15807067051188636, 0.09849544026611642, 0.04168042571831296, -0.026810626847712137, -0.2515864760280793, 0.2815559139503664, 0.5817851979011769, -0.06738828666657494, -0.09582056936928789, -0.1706104838495491, -0.01057223273967286, -0.07618293201707246, -0.2908428561951937, -0.12005149722205945, -0.21871892516750074, 0.5377943639906536, 0.1875131767072523, 0.02864384013476945, -0.23379956155958523, -0.4107766980038714, 0.08466417974444064, -0.1604434049951325, -0.5135991325360219, -0.08689063670867272, -0.3556653998564673, -0.1548292195910538, -0.061294568347111916, 0.27455450840585843, 0.36575457370459663, 0.19120577995075297, -0.2726338769998828, 0.09337545822211007, -0.5380077140249369, 0.15179259863945135, 0.09245306673950253, 0.034551401078729456, -0.26818078048242866, 0.07586639690459955, 0.1904334974091301, -0.23298776729604698, -0.07709078959950054, 0.10715809571805573, 0.0014797933470770603, -0.12800584842969898, -0.06510455858758615, 0.20716538082609195, 0.045992649414484166, 0.061971228189396724, 0.10633410966976453, -0.14695585799538566, -0.023139877522094104, -0.11017734659498003, 0.09148478147552441, -0.2656152384313008, 0.07817851313330297, 0.23140095472815694, 0.14896845642676335, 0.12378642533173542, 0.13221603229229573, -0.054819237511917834, 0.38136065530569385, 0.06512554381255999, 0.037316248782391426, -0.05959497313974923, 0.06472836981726568, 0.07971953386654153, 0.13364649858709624, -0.4705427416541635, 0.3986755975500773, -0.11589830950833857, -0.014702926062843727, 0.06947897459941967, 0.015458436455321675, 0.294086710805827, 0.12754467495837968, -0.004610137310135, 0.030124895317295126, -0.13263089158252336, -0.21977193992698693, -0.07958413469996162, 0.3611457438207274, 0.10236179281580653, -0.2607382662693849, 0.2101962002179095, -0.33855546939094333]",1
7,"[-1.0495268143713474, -0.1235731327906251, 0.2819864168923232, -0.5038860235363245, -0.318421377800405, 0.3034155871719122, 0.42171848216094077, 0.4377747233957052, 0.9890235662460327, 0.10819182703562547, 0.08536760089918971, 0.13585876114666462, -0.3749775029718876, 0.08754067867994308, -0.17455408722162247, 0.12164779612794518, 0.5165180750191212, 0.043539219070225954, 0.5492247412912548, 0.1816327841952443, 0.8377616107463837, 1.0568745285272598, -0.8932957105571404, -0.27250404097139835, -0.13656306895427406, 0.23159224353730679, 0.22363757528364658, -0.5945023843087256, -0.5130792483687401, 0.3065408952534199, -0.15938279731199145, -0.1203853627666831, 0.06097659468650818, 0.5521865747869015, 0.4619122571311891, 1.4686044175177813, -0.9057357832789421, -0.48783778538927436, -0.003780001774430275, -0.35262518376111984, -0.7971615623682737, -0.15670982727533556, -0.3994526853784919, 0.5245678266510367, 0.38925211504101753, -0.12818973883986473, 0.49997422378510237, -0.47410563565790653, 0.04982629045844078, 0.09674724366050214, 0.20864574424922466, 0.02529848739504814, 0.11318511981517076, 0.5329800071194768, -0.06820166995748878, -0.07550293952226639, -0.44137564301490784, -0.16056421678513288, -0.6251152120530605, -0.3498628418892622, -0.49672001972794533, 0.3742825463414192, -0.14227847568690777, 0.17637839308008552, -0.05974683165550232, 0.4051858567399904, -1.0507031977176666, 0.33684146296582185, -0.2563084587454796, 0.32555239275097847, 0.47318214271217585, 0.5741334275808185, -0.1503256019204855, -0.2016248437575996, -0.5478971864795312, 0.884532552678138, -0.3715978041291237, -0.014852989464998245, 0.09669292334001511, -0.7210515327751637, -0.29578572418540716, 0.24212712328881025, -0.5229287995025516, -0.2272256501019001, 0.22502120025455952, -0.11162929004058242, 0.12668683711672202, 0.5272279074415565, 1.494704145938158, 0.3981695072725415, -0.03349606692790985, 0.33424077834933996, 0.6605135388672352, -0.4019664917141199, -0.19229156663641334, 0.7405716627836227, 1.2636023797094822, -0.49319202080368996, 0.2771922694519162, -0.3016895258333534]","[-0.6091955998291848, 0.17129203270842383, 0.31931807129897405, -0.1856928317703214, -0.14761120854260473, 0.21523071860491044, 0.14381429859398934, 0.31403403058714396, 0.2666888267333782, 0.20764696157564952, -0.006612810329036966, -0.16560743375452652, -0.10363833352780878, 0.21787241548283218, 0.0056551417312667674, 0.1193144967627473, 0.12001323445738016, -0.08159324055408324, 0.24606386866402694, -0.20485550310509112, 0.3620739440720369, 0.5038337094967794, -0.3521226845711496, -0.0668876018255843, -0.10600360880068957, -0.05008651998376282, -0.09667484800611355, -0.13420404990003162, -0.19444254176522144, 0.11883817950853567, 0.1413494589476717, 0.06857056051303775, -0.07384895751653803, -0.08927241888218741, 0.1764996314050093, 0.30734918386844445, -0.2825879420182693, -0.434625091911973, -0.22379503401453704, -0.24647680887856332, -0.49378997433721006, -0.24828922989340316, -0.10536988227200625, 0.3155769115017463, 0.2767518592908899, -0.10610713079584981, 0.03793051244286668, -0.2666336207839518, 0.12663840307867374, 0.23287476303496688, -0.030483646631519246, -0.02197466740712815, 0.1936910059750478, 0.16696794308536417, -0.3476463411976621, -0.30581893313517033, -0.17590240927602022, 0.0968305583914006, -0.19760479184291807, -0.20708201373318288, -0.08184002606973742, 0.2774071378684752, -0.18196707417744334, 0.06502326408706048, -0.16015317800763026, 0.2203606077970347, -0.4416524088097193, 0.1624169294339989, -0.22408529960639853, 0.06837753528891483, 0.10331356020153788, 0.1068850370949247, -0.004361430123395343, -0.009361490417517111, -0.14696618727593952, 0.4904696767713901, 0.040492659967038054, -0.09848905558651479, 0.03794532482225607, -0.22545516365458376, -0.06712657758747344, -0.06450413829345401, -0.33645756909908775, 0.14643245485273199, 0.2730208164192007, 0.2527597077999569, 0.23816985933497892, 0.2196940845864563, 0.3454546925014803, 0.23908407874637386, -0.08734108062272103, 0.31847888542160235, 0.15470989291026718, -0.2733899772256692, -0.22407858911031706, 0.28927994965413234, 0.42986676100230514, -0.4511616289552659, 0.2815137282842472, -0.28169217877408154]",1
8,"[-0.6033503727189132, 0.0461021571287087, -0.6321255468896457, -0.18726828960435732, 0.5585254239184516, 0.2566856454525675, 0.49361908435821533, -0.13766407647303172, -0.7355249949863979, 0.03423522199903216, -0.081573930020178, -0.7504103556275368, -0.4540862108447722, 0.12862548232078552, -1.1477538006646293, 0.05194741913250515, 0.05730938911437988, -0.10741604225976127, -0.09316372299300772, -0.38446412341935293, 0.48763355932065416, 0.40809757315686773, -0.37978250107594896, -0.3794169681412833, -0.2011138079687953, 0.10086054887090411, -0.41271149686404635, -1.1343013516494207, 0.07608214978660856, 0.11415164305695466, 0.17729181902749197, 0.3212976615343775, -0.008749922471387046, -0.6137382537126541, -0.6069174323763166, 0.28547431209257673, 0.66741786471435, -0.5283346080354282, -0.2973297876971109, -0.17451063343988998, -0.14141004106828145, 0.24085316061973572, -0.019342918481145586, -0.39064832031726837, -0.1027858704328537, -0.5404766907117197, 0.8565078015838351, -0.6748914186443601, -0.11716315908623594, 0.44060257502964567, 0.665304354258946, 0.2449394507067544, 0.21729043019669397, 0.6255791421447482, -0.17766919519220079, 0.21471127920917102, 0.5651345700025558, 0.3490778263658285, -0.6031776049307415, -0.40054167341440916, -0.2924921666937215, 0.08048130571842194, 0.048332800822598596, -0.2379354898418699, 0.196705447775977, -0.18045144528150558, -0.5088436039430755, -0.4750708043575287, 0.4210987218788692, 0.14102853408881597, 0.42222478134291513, -0.04014483599790505, -0.001182595533984048, 0.13255884312093258, -0.3812179858131068, 0.025299815194947377, 0.0308569712298257, -0.7013385955776487, -0.7316401015434947, 0.09554830965186868, -0.22719149930136545, -0.3747189585119486, 0.009489265935761588, 0.8300103928361621, -0.2921708758388247, -0.49642541472400936, 0.007901291495987348, 0.11333531753293105, 0.5673001280852726, 0.4076962800962584, 0.16067600569554738, -0.17690502373235567, 0.14545132964849472, -0.031464731586830955, -0.5666770168713161, 0.008314481803349085, 0.18418542402131216, 0.24248551657157286, -0.02919502237013408, -0.16055147562708175]","[-0.1839346239035331, 0.13127130849636692, -0.1356771286785841, 0.17602018686117993, 0.060517061218295407, 0.19611137007332238, 0.2559217445049392, 0.35610627094486164, -0.43020148033774896, 0.14320415379325016, -0.13102175549255551, -0.43296226350754685, -0.20369687841382533, 0.5680606317934441, -0.24011881969596346, -0.08132759942161666, 0.09056469584757662, -0.10298674165013351, 0.022523679250523072, -0.34357141425755316, 0.36126140040053517, 0.13344422385423685, -0.18894556256808187, -0.07232562075656769, -0.2918183625200486, 0.09359181528320659, -0.41559383134545175, -0.43864209527092257, -0.15408172461713582, -0.06890031402332956, 0.32484257827540625, 0.2023803205147758, -0.04622064373142659, -0.29726703494574896, -0.38473396365738843, -0.025230940874640402, -0.03612636769184037, -0.5074227977761449, -0.03160250791960163, -0.3403871110107673, -0.01851047969361164, 0.11760275313248154, -0.08332240851318945, 0.1352270906482404, 0.19808092192817028, -0.22268851583043553, 0.2386113689552771, -0.23153468109273034, 0.10207273456954251, 0.1602269537962421, 0.23188150943151956, -0.28866576066867233, 0.1353416018762351, 0.3532034485681873, -0.14072312895385486, 0.19533140748713668, 0.19343609581818672, -0.022196408065496272, -0.24366993533161807, -0.13659998897070946, 0.1898333327844739, 0.21608950376448816, 0.03966849708281781, -0.007848199163893881, 0.0647470462812618, -0.12022700675699788, -0.10212885474323165, -0.011306068029036763, -0.07072813907787774, 0.17836126258408436, 0.1908896855639136, 0.09979611503821319, -0.14018973081374808, -0.003882930334071358, -0.0881525509606075, 0.2689453276663857, -0.0805433372385135, -0.150876309578275, -0.24951752548578068, 0.09955544940044664, 0.005005497685729503, -0.03389030020019078, -0.31446267627403396, 0.16808110119154765, 0.0755028999232084, -0.26039689188520476, 0.02074017814169819, -0.05154892523967217, 0.17050772157867464, 0.005214909755984999, 0.21690615421331283, 0.0790417054263146, 0.07320333498762388, -0.1601818860700307, -0.15775402902904684, 0.21790403362525546, 0.2793763592643173, -0.09371436862052564, 0.20022844188330904, -0.1450456951780019]",1
9,"[-0.0547459751367569, -0.13416900895535946, 0.08928947448730469, -0.022425709664821623, -0.4576897084712982, -0.520653854496777, 0.46627447176724673, -0.6593525528907775, 0.3555509330704808, 0.20610465332865716, -0.4348417186178267, -0.7003322094678879, -1.1874925538897514, -0.2777964428067207, 0.10971724614501, -0.35415113493800165, 0.7926171600818634, -0.09146013185381889, 0.10018038153648376, -0.43554632402956484, 0.4350946806371212, 0.3864199732430279, -0.7798486690968275, 0.13406431004405023, -0.23651724606752395, 0.3821557879447937, -0.44385805740021167, -0.24117325395345687, -0.11455382220447063, -0.01115102767944336, 0.539281764626503, 0.03704383969306946, -0.20874225422739984, -0.4494802855886519, 0.08090612441301345, 0.27229015100747345, -0.1830044962465763, -0.8600373841822148, -0.2074089042842388, 0.09040125533938408, -0.024976528150727973, 0.32768638879060746, -0.33242964521050455, 0.8505683990195394, -0.014578241109848022, -0.31054347604513166, 0.0064014442265033725, -0.7232331246137619, 0.6467852963134646, 0.7440771739929914, -0.22634644880890847, -0.208548029884696, 0.5527902387082577, -0.2805007047951221, -0.7470273204147816, -0.09996693170833168, 0.2462200105190277, 0.35396058633923533, -0.32896660938858985, -0.039340542443096635, -0.3101439541205764, -0.30781607285607604, -0.13697691303677856, 0.007772108912467957, -0.8294541101902724, -0.9253068871796131, -1.05674889087677, 0.1089417289942503, 0.07457835935056209, 0.6726604503579438, 0.39324132949113844, 0.7579876810312272, -0.09231373891234398, 0.23089040946215392, 0.07801012359559537, 0.30416237339377405, 0.12926269117742778, 0.11524028703570366, 0.5245615843683481, -0.11438198611140252, -0.37241877131164075, -0.49415768100880086, -0.5111625015735626, 0.81343739554286, -0.37204451858997345, 0.25494259130209684, 0.5343873634934425, 0.3687250554561615, -0.04177801888436079, -0.10018607713282109, 0.41635181605815885, -0.03452350171282888, 0.22162079848349095, -0.3437894806265831, 0.013357138726860285, 0.5150769431143999, -0.7511874109506607, -0.5433776393532753, 0.5169665968976915, -0.7472526580095291]","[-0.23708437482050992, 0.11292388521151264, 0.18555903019555864, -0.05577072337483126, -0.15445137713007046, -0.21616880828514695, 0.30711058405533614, -0.26924135104962316, 0.13685590213283666, 0.2876476726678327, -0.1712896864684832, -0.5101950634968373, -0.6440174269832523, 0.07368144151931096, -0.06345454789698124, -0.290457552329674, 0.3373238681241181, -0.027774333592816072, 0.24691991574676617, -0.44979066810763485, 0.23796513201956052, 0.22931440471925893, -0.21211997612834896, -0.003410843484070858, -0.1671178538828169, 0.1899898837736935, -0.4830810038869306, -0.28871413334713847, -0.2997924559626581, -0.04592913229609245, 0.2456480175223907, -0.038996933659924286, -0.20508243982671936, -0.3403098085245922, -0.07422445253735728, 0.17535800093760312, -0.3282151618933445, -0.4421574713227376, -0.23663203704010877, -0.06006224324828268, 0.02428320659342989, 0.14766478276194078, -0.0073443152593030275, 0.4604163745926931, 0.23355513384836493, -0.33373880365018277, -0.10004105761037967, -0.6495175027711079, 0.4284218005089311, 0.4853182542260884, -0.12123246505314952, -0.22342245361464105, 0.45844231964404886, 0.005440187163544318, -0.4295193534754709, -0.1418288161640034, 0.08721271716878803, 0.2413955486137502, -0.1697232162534764, 0.012719554147613337, -0.18746996275046718, -0.0053204447769514785, 0.015775765450302067, -0.07877872980277706, -0.3174288557695701, -0.4470390724633628, -0.5336075826285978, 0.27737806704601004, -0.135500125792316, 0.4551452275371745, 0.1262475995199205, 0.5034188982386487, 0.17322162466968866, 0.19811490555560538, 0.00291604419866585, 0.33083044349355417, 0.0005055028265178006, 0.06770264506668246, 0.36654279528376504, -0.25895424723745186, -0.17653483478967838, -0.18719780209550166, -0.49067122404697444, 0.44519921391074124, -0.09390697412660534, 0.10019700575490699, 0.1936364181492055, 0.1527006465363181, 0.05124983459758256, 0.0018405438716716177, 0.24987152760419154, 0.09394740269546056, 0.21784166041618042, -0.25548669932172163, -0.13184868411001, 0.41556719063662223, -0.2856678317715352, -0.4367107475168248, 0.3100752800688689, -0.5526145889550973]",0


## Train-Test-split and Dataloader Creation

In [283]:
import ast
import string

import torch
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
from gensim.models import Word2Vec
import multiprocessing
import time
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset

params = {'batch_size': 10,
          'shuffle': True,
          'num_workers': 0}
max_epochs = 100


data_d2v = pd.read_csv("data/data_w2v.csv", index_col=0)
titles = list()
texts = list()
for i, row in data_d2v.iterrows():
    titles.append(ast.literal_eval(row["title"]))
    texts.append(ast.literal_eval(row["text"]))

data_d2v["title"] = titles
data_d2v["text"] = texts


data_d2v_title = data_d2v[["title", "label"]].copy()
data_d2v_text = data_d2v[["text", "label"]].copy()
X_train_title, X_test_title, y_train_title, y_test_title = train_test_split(data_d2v_title["title"], data_d2v_title["label"], test_size=0.15, random_state=42, shuffle=True)
X_train_text, X_test_text, y_train_text, y_test_text = train_test_split(data_d2v_text["text"], data_d2v_text["label"], test_size=0.15, random_state=42, shuffle=True)

X_train_title.reset_index(drop=True, inplace=True)
X_test_title.reset_index(drop=True, inplace=True)
y_train_title.reset_index(drop=True, inplace=True)
y_test_title.reset_index(drop=True, inplace=True)
X_train_text.reset_index(drop=True, inplace=True)
X_test_text.reset_index(drop=True, inplace=True)
y_train_text.reset_index(drop=True, inplace=True)
y_test_text.reset_index(drop=True, inplace=True)


class data_set(Dataset):
    def __init__(self, X, y):
        super(Dataset, self).__init__()
        assert len(X.index) == len(y.index)
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X.index)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

train_dataset_title = data_set(X_train_title, y_train_title)
test_dataset_title = data_set(X_test_title, y_test_title)
train_dataset_text = data_set(X_train_text, y_train_text)
test_dataset_text = data_set(X_test_text, y_test_text)

train_dataloader_title = DataLoader(train_dataset_title, **params)
test_dataloader_title = DataLoader(test_dataset_title, **params)
train_dataloader_text = DataLoader(train_dataset_text, **params)
test_dataloader_text = DataLoader(test_dataset_text, **params)


print("bf")
for batch, (X, y) in enumerate(train_dataloader_title):

    print(X)
    print(y)
    break

print("done")


1923
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.9974856872594994, 0.22474270370852856, 0.4354148988562231, 0.19536691727756544, -0.20182440283473665, 0.09970291618627446, 0.5563581765465836, 0.6108844055019262, 0.6547537642886675, -0.2740180529654026, -0.6183020116420079, -1.2791147189968415, -0.4544766436522926, -0.010373667276416415, -0.39624904681935563, -0.7138061252714113, -0.24680909886956215, -1.0007692363880973, 0.5073581156925163, 0.12101651515495317, 0.8519723341437696, -0.6382173126890459, -0.23425124359551733, 0.158141043