In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy
import copy
import matplotlib
import os
from matplotlib import pyplot as plt
import seaborn as sns
import torch.nn.functional as F
sns.set(style="ticks")
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence
import numpy as np
from utils import Str2id
from collections import Counter
from utils import NodeClassifier,get_word_embeddings
from tqdm import tqdm_notebook as tqdm
# from tqdm import tqdm
from tqdm import tnrange
from datasets import NormalEdgeDataset, NodeTextEdgeDataset, MultipleLableDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer
# %config InlineBackend.figure_format = 'retina'
from scoring import evaluate
import pickle
from model import NodeRepresentation, SemiSkipGram
import time

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
def collate_fn(data):
    u = [d[0] for d in data]
    v = [d[1] for d in data]
    neg_v = []
    for d in data:
        neg_v += d[2]
    return u, v, numpy.array(neg_v)

In [15]:
# use_cuda=torch.cuda.is_available()
batch_size=100
neg_sampling_size=10
emb_size=256

In [5]:
base_data_path="/mnt/store1/plus1lab/multilabel-data"
base_model_data_path="/mnt/store1/plus1lab/multilabel-model"

In [24]:
dataset="v1_aminer_5k"
ne_model='deepwalk'

In [25]:
edge_list_path = "%s/%s_5/original-edgelist.txt" % (base_data_path, dataset)
stats_file = "%s/%s_5/stats.txt" % (base_data_path, dataset)
node_labels_file = "%s/%s_5/node-labels.txt" % (base_data_path, dataset)
original_deepwalk_path = "%s/deepwalk-node/%s_5/original.walks" % (base_model_data_path,
    dataset)

In [26]:
node_map = Str2id()
label_map = Str2id()
if ne_model == 'deepwalk':
    edge_dataset = NormalEdgeDataset(
        node_map,
        node_map,
        original_deepwalk_path,
        neg_sampling_size,
        from_random_walk=True,
        window_size=10)
elif ne_model == 'skip-gram':
    edge_dataset = NormalEdgeDataset(node_map, node_map, edge_list_path,
                                     neg_sampling_size)
else:
    raise Exception('No such network embedding model')
edge_dataset.negative_sampling1()

20150it [00:05, 3485.57it/s]


In [27]:
edge_label_ratio=5
edge_train_prob=1.0
edge_label_train_prob=0.2

In [29]:
print("edge_label_train_prob", edge_label_train_prob)
print("batch_size", batch_size)
print("neg_sampling_size", neg_sampling_size)
print("dataset", dataset)
print("ne_model", ne_model)
filename = str(time.time())
print("filename", filename)

# for edge_label_train_prob in [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]:
for edge_label_train_prob in [0.0]:
# for edge_label_ratio in [1, 5, 10]:
# for emb_size in [16, 32, 64, 128, 256, 512]:
# for emb_size in [256]:
    print("edge_label_ratio", edge_label_ratio)
    print("edge_train_prob", edge_train_prob)
    print("emb_size", emb_size)

    edge_labels_path = "%s/aegcn/%s_%d/label-mat.txt" % (base_model_data_path,
                                                         dataset,
                                                         edge_label_ratio)
    all_scores = []
    node_map.freeze = True
    total_label_count = int(open(stats_file).readline().strip().split(" ")[2])
    edge_label_dataset = MultipleLableDataset(
        node_map,
        node_map,
        edge_labels_path,
        total_label_count,
        ratio=1.0,
        use_all_zeros=False)
    print(len(node_map.str2id_dict))
    use_cuda = False
    print(len(edge_label_dataset))

    torch.cuda.manual_seed_all(13)
    torch.manual_seed(13)
    model = SemiSkipGram(emb_size, total_label_count, use_cuda)
    node_representation = NodeRepresentation(
        len(node_map.str2id_dict), emb_size, use_cuda)
    node_optimizer = torch.optim.SGD(node_representation.parameters(), 1)
    model_optimizer = torch.optim.SGD(model.parameters(), 1)

    edge_loader = DataLoader(
        edge_dataset,
        batch_size=batch_size,
        collate_fn=collate_fn,
        shuffle=True,
        drop_last=True)
    edge_label_loader = DataLoader(
        edge_label_dataset,
        batch_size=batch_size,
        shuffle=True,
        drop_last=True)

    lambda0 = lambda epoch: max(0.00001, 0.05 / (1.0 + epoch * batch_size / 10000 * 0.01))
    scheduler0 = optim.lr_scheduler.LambdaLR(
        node_optimizer, lr_lambda=[lambda0])
    lambda1 = lambda epoch: max(0.00001, 0.05 / (1.0 + epoch * batch_size / 10000 * 0.01))
    scheduler1 = optim.lr_scheduler.LambdaLR(
        model_optimizer, lr_lambda=[lambda1])

    edge_errs = [0]
    edge_label_errs = [0]
    edge_loader_train = iter(edge_loader)
    edge_label_loader_train = iter(edge_label_loader)
    batch_process_bar = tqdm(range(int(100000)))

    for idx in batch_process_bar:
        directed = True
        if idx % 5000 == 0 or (idx < 5000 and idx % 5000 == 0):

            node_representation.save_embedding(node_map, filename, False)
            score = evaluate(
                filename, node_labels_file, stats_file, num_shuffles=1, use_std_out=True)
            all_scores.append([idx, score])
            os.remove(filename)
        if numpy.random.rand() < edge_train_prob:
            try:
                uids, vids, neg_vids = edge_loader_train.next()
            except:
                edge_loader_train = iter(edge_loader)
                edge_dataset.negative_sampling1()
                uids, vids, neg_vids = edge_loader_train.next()
            node_optimizer.zero_grad()
            emb_u = node_representation.forward(
                uids, is_start=True, directed=directed)
            emb_v = node_representation.forward(
                vids, is_start=False, directed=directed)
            neg_emb_v = node_representation.forward(
                neg_vids, is_start=False, directed=directed)
            neg_emb_v = neg_emb_v.view(batch_size, -1, emb_size)
            err = model.stucture_loss(emb_u, emb_v, neg_emb_v)
            # edge_errs.append(err.cpu().data.numpy()[0])
            err.backward()
            node_optimizer.step()
            scheduler0.step()

        if numpy.random.rand() < edge_label_train_prob:
            try:
                uids, vids, edge_labels = edge_label_loader_train.next()
            except:
                edge_label_loader_train = iter(edge_label_loader)
                uids, vids, edge_labels = edge_label_loader_train.next()

            edge_labels = Variable(edge_labels.type(torch.FloatTensor))
            node_optimizer.zero_grad()
            model_optimizer.zero_grad()
            emb_u = node_representation.forward(
                uids, is_start=True, directed=directed)
            emb_v = node_representation.forward(
                vids, is_start=False, directed=directed)
            err = model.edge_label_loss(emb_u, emb_v, edge_labels)
            err.backward()
            model_optimizer.step()
            node_optimizer.step()
            # edge_label_errs.append(err.cpu().data.numpy()[0])
        batch_process_bar.set_postfix_str("Loss: %0.2f | %0.2f, lr1: %0.6f" % (
            edge_errs[-1],
            edge_label_errs[-1],
            node_optimizer.param_groups[0]["lr"],
        ))
    result_file_name = "../../results/our_model/%s/%s/%d/training_prob_%0.2f_%0.2f_batch_%d_dim_%d_neg_%d.pkl" % (
        dataset, ne_model, edge_label_ratio, edge_train_prob,
        edge_label_train_prob, batch_size, emb_size, neg_sampling_size)
    pickle.dump(all_scores, open(result_file_name, "wb"))

edge_label_train_prob 0.0
batch_size 100
neg_sampling_size 10
dataset v1_aminer_5k
ne_model deepwalk
filename 1534902302.492674
edge_label_ratio 5
edge_train_prob 1.0
emb_size 256
Use 1.00 labels
not count topk 0
2015
147


2015 lines [00:00, 275536.22 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.11000433087916847, 'accuracy': 0.004699738903394256, 'macro': 0.03447154983441913}
Average score: {'micro': 0.11000433087916847, 'accuracy': 0.004699738903394256, 'macro': 0.03447154983441913}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.11861564262844945, 'accuracy': 0.0027563395810363835, 'macro': 0.03801074142944303}
Average score: {'micro': 0.11861564262844945, 'accuracy': 0.0027563395810363835, 'macro': 0.03801074142944303}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.12575262343024257, 'accuracy': 0.0024813895781637717, 'macro': 0.038161649351920686}
Average score: {'micro': 0.12575262343024257, 'accuracy': 0.0024813895781637717, 'macro': 0.038161649351920686}
-------------------


2015 lines [00:00, 378873.11 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.20757180156657964, 'accuracy': 0.037075718015665796, 'macro': 0.14927719058997954}
Average score: {'micro': 0.20757180156657964, 'accuracy': 0.037075718015665796, 'macro': 0.14927719058997954}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.24667583677212288, 'accuracy': 0.04851157662624035, 'macro': 0.2073346160612755}
Average score: {'micro': 0.24667583677212288, 'accuracy': 0.04851157662624035, 'macro': 0.2073346160612755}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.302329564919493, 'accuracy': 0.06327543424317618, 'macro': 0.2688602551888197}
Average score: {'micro': 0.302329564919493, 'accuracy': 0.06327543424317618, 'macro': 0.2688602551888197}
-------------------


2015 lines [00:00, 377064.45 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.17661547377571718, 'accuracy': 0.033942558746736295, 'macro': 0.13793432441554332}
Average score: {'micro': 0.17661547377571718, 'accuracy': 0.033942558746736295, 'macro': 0.13793432441554332}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.2414694128496772, 'accuracy': 0.050165380374862185, 'macro': 0.21371063032362111}
Average score: {'micro': 0.2414694128496772, 'accuracy': 0.050165380374862185, 'macro': 0.21371063032362111}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.3236013986013986, 'accuracy': 0.07382133995037221, 'macro': 0.29265230882951626}
Average score: {'micro': 0.3236013986013986, 'accuracy': 0.07382133995037221, 'macro': 0.29265230882951626}
-------------------


2015 lines [00:00, 371691.55 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.19382104342757214, 'accuracy': 0.0402088772845953, 'macro': 0.15680552289594898}
Average score: {'micro': 0.19382104342757214, 'accuracy': 0.0402088772845953, 'macro': 0.15680552289594898}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.23963133640552994, 'accuracy': 0.060088202866593166, 'macro': 0.21580767680281823}
Average score: {'micro': 0.23963133640552994, 'accuracy': 0.060088202866593166, 'macro': 0.21580767680281823}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.307480179248535, 'accuracy': 0.07382133995037221, 'macro': 0.28281040604422697}
Average score: {'micro': 0.307480179248535, 'accuracy': 0.07382133995037221, 'macro': 0.28281040604422697}
-------------------


2015 lines [00:00, 6417.93 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.18583042973286876, 'accuracy': 0.02610966057441253, 'macro': 0.15485065145437715}
Average score: {'micro': 0.18583042973286876, 'accuracy': 0.02610966057441253, 'macro': 0.15485065145437715}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.2482399755127028, 'accuracy': 0.04244762954796031, 'macro': 0.21407755963914346}
Average score: {'micro': 0.2482399755127028, 'accuracy': 0.04244762954796031, 'macro': 0.21407755963914346}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.3137120302042217, 'accuracy': 0.06451612903225806, 'macro': 0.28798379890403397}
Average score: {'micro': 0.3137120302042217, 'accuracy': 0.06451612903225806, 'macro': 0.28798379890403397}
-------------------


2015 lines [00:00, 365518.66 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.20539115147963669, 'accuracy': 0.04960835509138381, 'macro': 0.16789462470372993}
Average score: {'micro': 0.20539115147963669, 'accuracy': 0.04960835509138381, 'macro': 0.16789462470372993}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.2489984591679507, 'accuracy': 0.05898566703417861, 'macro': 0.21718831603845978}
Average score: {'micro': 0.2489984591679507, 'accuracy': 0.05898566703417861, 'macro': 0.21718831603845978}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.3192602930914166, 'accuracy': 0.07444168734491315, 'macro': 0.2923617956037896}
Average score: {'micro': 0.3192602930914166, 'accuracy': 0.07444168734491315, 'macro': 0.2923617956037896}
-------------------


2015 lines [00:00, 347456.12 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.19878997407087295, 'accuracy': 0.03289817232375979, 'macro': 0.16025962926283924}
Average score: {'micro': 0.19878997407087295, 'accuracy': 0.03289817232375979, 'macro': 0.16025962926283924}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.24216610891390325, 'accuracy': 0.04630650496141125, 'macro': 0.216085103224301}
Average score: {'micro': 0.24216610891390325, 'accuracy': 0.04630650496141125, 'macro': 0.216085103224301}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.3192512450626825, 'accuracy': 0.07754342431761786, 'macro': 0.29498458283380374}
Average score: {'micro': 0.3192512450626825, 'accuracy': 0.07754342431761786, 'macro': 0.29498458283380374}
-------------------


2015 lines [00:00, 374474.84 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.1842680262199563, 'accuracy': 0.038642297650130546, 'macro': 0.15102071554837926}
Average score: {'micro': 0.1842680262199563, 'accuracy': 0.038642297650130546, 'macro': 0.15102071554837926}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.2410700479356734, 'accuracy': 0.05622932745314223, 'macro': 0.21241888162477526}
Average score: {'micro': 0.2410700479356734, 'accuracy': 0.05622932745314223, 'macro': 0.21241888162477526}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.3091629348949363, 'accuracy': 0.07009925558312655, 'macro': 0.2832306985853732}
Average score: {'micro': 0.3091629348949363, 'accuracy': 0.07009925558312655, 'macro': 0.2832306985853732}
-------------------


2015 lines [00:00, 375523.09 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.20052272397270215, 'accuracy': 0.027676240208877285, 'macro': 0.15715598483198132}
Average score: {'micro': 0.20052272397270215, 'accuracy': 0.027676240208877285, 'macro': 0.15715598483198132}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.24792881251917767, 'accuracy': 0.046857772877618525, 'macro': 0.2184522520011482}
Average score: {'micro': 0.24792881251917767, 'accuracy': 0.046857772877618525, 'macro': 0.2184522520011482}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.3186888621932384, 'accuracy': 0.06699751861042183, 'macro': 0.29250066168886835}
Average score: {'micro': 0.3186888621932384, 'accuracy': 0.06699751861042183, 'macro': 0.29250066168886835}
-------------------


2015 lines [00:00, 375623.22 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.17241879350348027, 'accuracy': 0.020887728459530026, 'macro': 0.14265510147060803}
Average score: {'micro': 0.17241879350348027, 'accuracy': 0.020887728459530026, 'macro': 0.14265510147060803}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.23605546995377505, 'accuracy': 0.04189636163175303, 'macro': 0.21648961720129148}
Average score: {'micro': 0.23605546995377505, 'accuracy': 0.04189636163175303, 'macro': 0.21648961720129148}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.32119951464725255, 'accuracy': 0.07382133995037221, 'macro': 0.2974123611654183}
Average score: {'micro': 0.32119951464725255, 'accuracy': 0.07382133995037221, 'macro': 0.2974123611654183}
-------------------


2015 lines [00:00, 380973.79 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.18970503181029497, 'accuracy': 0.0412532637075718, 'macro': 0.1685253653898706}
Average score: {'micro': 0.18970503181029497, 'accuracy': 0.0412532637075718, 'macro': 0.1685253653898706}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.23764490543014033, 'accuracy': 0.05733186328555678, 'macro': 0.214541191242502}
Average score: {'micro': 0.23764490543014033, 'accuracy': 0.05733186328555678, 'macro': 0.214541191242502}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.31869446343130553, 'accuracy': 0.062034739454094295, 'macro': 0.29356859464966834}
Average score: {'micro': 0.31869446343130553, 'accuracy': 0.062034739454094295, 'macro': 0.29356859464966834}
-------------------


2015 lines [00:00, 218764.34 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.20225376847651105, 'accuracy': 0.029765013054830286, 'macro': 0.15746492911990298}
Average score: {'micro': 0.20225376847651105, 'accuracy': 0.029765013054830286, 'macro': 0.15746492911990298}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.24450253729048133, 'accuracy': 0.044652701212789414, 'macro': 0.2126235561575585}
Average score: {'micro': 0.24450253729048133, 'accuracy': 0.044652701212789414, 'macro': 0.2126235561575585}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.30749354005167956, 'accuracy': 0.07568238213399504, 'macro': 0.2785581578621383}
Average score: {'micro': 0.30749354005167956, 'accuracy': 0.07568238213399504, 'macro': 0.2785581578621383}
-------------------


2015 lines [00:00, 360006.92 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.184673549512869, 'accuracy': 0.027154046997389034, 'macro': 0.147723348215658}
Average score: {'micro': 0.184673549512869, 'accuracy': 0.027154046997389034, 'macro': 0.147723348215658}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.24052367179174913, 'accuracy': 0.05181918412348401, 'macro': 0.21001184426872033}
Average score: {'micro': 0.24052367179174913, 'accuracy': 0.05181918412348401, 'macro': 0.21001184426872033}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.3173288250211327, 'accuracy': 0.08188585607940446, 'macro': 0.28813255547037736}
Average score: {'micro': 0.3173288250211327, 'accuracy': 0.08188585607940446, 'macro': 0.28813255547037736}
-------------------


2015 lines [00:00, 372757.14 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.19770148385219669, 'accuracy': 0.03237597911227154, 'macro': 0.16714497019778243}
Average score: {'micro': 0.19770148385219669, 'accuracy': 0.03237597911227154, 'macro': 0.16714497019778243}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.24617268830373545, 'accuracy': 0.06229327453142227, 'macro': 0.22201442752493572}
Average score: {'micro': 0.24617268830373545, 'accuracy': 0.06229327453142227, 'macro': 0.22201442752493572}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.3165592895699112, 'accuracy': 0.0794044665012407, 'macro': 0.29279228443450206}
Average score: {'micro': 0.3165592895699112, 'accuracy': 0.0794044665012407, 'macro': 0.29279228443450206}
-------------------


2015 lines [00:00, 368290.16 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.1797736506094022, 'accuracy': 0.03237597911227154, 'macro': 0.1475586589516963}
Average score: {'micro': 0.1797736506094022, 'accuracy': 0.03237597911227154, 'macro': 0.1475586589516963}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.2313731527093596, 'accuracy': 0.047960308710033074, 'macro': 0.2054711269605291}
Average score: {'micro': 0.2313731527093596, 'accuracy': 0.047960308710033074, 'macro': 0.2054711269605291}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.29747492217225874, 'accuracy': 0.06513647642679901, 'macro': 0.2724805847803367}
Average score: {'micro': 0.29747492217225874, 'accuracy': 0.06513647642679901, 'macro': 0.2724805847803367}
-------------------


2015 lines [00:00, 378076.52 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.2035139092240117, 'accuracy': 0.0422976501305483, 'macro': 0.1635032144870597}
Average score: {'micro': 0.2035139092240117, 'accuracy': 0.0422976501305483, 'macro': 0.1635032144870597}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.2530064754856614, 'accuracy': 0.05953693495038589, 'macro': 0.2196645528454848}
Average score: {'micro': 0.2530064754856614, 'accuracy': 0.05953693495038589, 'macro': 0.2196645528454848}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.3166009936611273, 'accuracy': 0.07630272952853598, 'macro': 0.29654030769291556}
Average score: {'micro': 0.3166009936611273, 'accuracy': 0.07630272952853598, 'macro': 0.29654030769291556}
-------------------


2015 lines [00:00, 357524.54 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.18760907504363003, 'accuracy': 0.02349869451697128, 'macro': 0.1526183212874073}
Average score: {'micro': 0.18760907504363003, 'accuracy': 0.02349869451697128, 'macro': 0.1526183212874073}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.24923171481253842, 'accuracy': 0.050716648291069456, 'macro': 0.21576226105346769}
Average score: {'micro': 0.24923171481253842, 'accuracy': 0.050716648291069456, 'macro': 0.21576226105346769}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.32335329341317365, 'accuracy': 0.0750620347394541, 'macro': 0.30257477540053923}
Average score: {'micro': 0.32335329341317365, 'accuracy': 0.0750620347394541, 'macro': 0.30257477540053923}
-------------------


2015 lines [00:00, 380493.54 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.1894005212858384, 'accuracy': 0.03080939947780679, 'macro': 0.1507525893344956}
Average score: {'micro': 0.1894005212858384, 'accuracy': 0.03080939947780679, 'macro': 0.1507525893344956}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.2589312977099237, 'accuracy': 0.05237045203969129, 'macro': 0.22955084457919928}
Average score: {'micro': 0.2589312977099237, 'accuracy': 0.05237045203969129, 'macro': 0.22955084457919928}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.3278518262073741, 'accuracy': 0.08312655086848635, 'macro': 0.3014330224824653}
Average score: {'micro': 0.3278518262073741, 'accuracy': 0.08312655086848635, 'macro': 0.3014330224824653}
-------------------


2015 lines [00:00, 257049.26 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.18945848375451263, 'accuracy': 0.044386422976501305, 'macro': 0.1445526083737005}
Average score: {'micro': 0.18945848375451263, 'accuracy': 0.044386422976501305, 'macro': 0.1445526083737005}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.24521712723959915, 'accuracy': 0.05733186328555678, 'macro': 0.21665748632672688}
Average score: {'micro': 0.24521712723959915, 'accuracy': 0.05733186328555678, 'macro': 0.21665748632672688}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.3116663852777309, 'accuracy': 0.08002481389578164, 'macro': 0.2882478994813832}
Average score: {'micro': 0.3116663852777309, 'accuracy': 0.08002481389578164, 'macro': 0.2882478994813832}
-------------------


2015 lines [00:00, 7497.04 lines/s]


Results, using embeddings of dimensionality 256
-------------------
Train percent: 0.05
Shuffle #1:    {'micro': 0.20043509789702682, 'accuracy': 0.045953002610966055, 'macro': 0.16200196265572256}
Average score: {'micro': 0.20043509789702682, 'accuracy': 0.045953002610966055, 'macro': 0.16200196265572256}
-------------------
Train percent: 0.1
Shuffle #1:    {'micro': 0.2505736576411198, 'accuracy': 0.05181918412348401, 'macro': 0.22262105184045808}
Average score: {'micro': 0.2505736576411198, 'accuracy': 0.05181918412348401, 'macro': 0.22262105184045808}
-------------------
Train percent: 0.2
Shuffle #1:    {'micro': 0.31435431315383294, 'accuracy': 0.06327543424317618, 'macro': 0.2843042501817429}
Average score: {'micro': 0.31435431315383294, 'accuracy': 0.06327543424317618, 'macro': 0.2843042501817429}
-------------------


