## Imports

In [None]:
import os
import re
import sys
import typing
import gc
import pandas as pd
import pickle
import logging

sys.path.append(
    os.path.join('.','src')
)

from src.models import Pipeline
from src.nodes import *

In [None]:
logging.basicConfig(filename='logs/logs.log', level=logging.DEBUG)

# Tweets Model

In [None]:
pipeline = Pipeline('CONFIG_MODEL_TWEETS.json', load_model_data = True)

In [None]:
d = {
    'train_set_len' : len(pipeline.train_dataset),
    'train_set_tokens' : pipeline.train_dataset.token_len(),
    'val_set_len' : len(pipeline.val_dataset),
    'val_set_tokens' : pipeline.val_dataset.token_len(),
    'test_set_len' : len(pipeline.test_dataset),
    'test_set_tokens' : pipeline.test_dataset.token_len(),
}

In [None]:
d

In [None]:
pipeline.perplexity()

In [None]:
pipeline.train_model()

In [None]:
pipeline.perplexity()

In [None]:
pipeline = Pipeline('CONFIG_MODEL_TWEETS.json', load_model_data = False)

In [None]:
pipeline.load_model('models/tweets/tweets.pth')

In [None]:
pipeline.generate(start_text = 'all', num_words = 40)

In [None]:
pipeline.generate(start_text = 'what', num_words = 40)

In [None]:
pipeline.generate(start_text = 'i like', num_words = 40)

# Wiki Model

In [None]:
pipeline = Pipeline('CONFIG_MODEL_WIKI.json', load_model_data = True)

In [None]:
pipeline.perplexity(with_recall = True, with_tqdm = True)

In [None]:
pipeline.load_model('models/wiki103/wiki103.pth')

In [None]:
pipeline.perplexity(with_recall = True, with_tqdm = True)

In [None]:
pipeline.train_model()

In [None]:
pipeline.perplexity()

In [None]:
pipeline.generate(start_text = 'all', num_words=40)

In [None]:
pipeline.generate(start_text = 'what', num_words=40)

In [None]:
pipeline.generate(start_text = 'i like')

In [None]:
d = {
    'train_set_len' : len(pipeline.train_dataset),
    'train_set_tokens' : pipeline.train_dataset.token_len(),
    'val_set_len' : len(pipeline.val_dataset),
    'val_set_tokens' : pipeline.val_dataset.token_len(),
    'test_set_len' : len(pipeline.test_dataset),
    'test_set_tokens' : pipeline.test_dataset.token_len(),
}

In [None]:
d

## FedAVG

In [1]:
from src.federated_pipeline import Federated_AVG

import os
import pickle

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch

In [2]:
federated = Federated_AVG(
    "CONFIG_MODEL_TWEETS.json",
    "CONFIG_FEDERATED_TWEETS.json",
    testing = True
)

100%|██████████| 1000/1000 [00:00<00:00, 3817.73it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9522.60it/s]
100%|██████████| 1/1 [00:00<00:00, 117.98it/s]
100%|██████████| 10/10 [00:03<00:00,  3.20it/s]


In [None]:
federated.train(3, save_results = False)

100%|██████████| 10/10 [00:00<00:00, 2054.52it/s]


round 0


  0%|          | 0/10 [00:00<?, ?it/s]

round 1


100%|██████████| 10/10 [00:20<00:00,  2.03s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

round 2


 20%|██        | 2/10 [00:06<00:24,  3.03s/it]

In [None]:
pd.DataFrame(federated.results)

In [None]:
federated.nodes[1].losses

## LICCHAVI

In [1]:
from src.federated_pipeline import Federated_LICCHAVI

import os
import pickle
import matplotlib.pyplot as plt
import pandas as pd
import torch

In [2]:
federated = Federated_LICCHAVI(
    "CONFIG_MODEL_TWEETS.json",
    "CONFIG_FEDERATED_TWEETS.json",
    testing = True
)

100%|██████████| 1000/1000 [00:00<00:00, 4170.78it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9998.39it/s]
100%|██████████| 1/1 [00:00<00:00, 60.73it/s]
100%|██████████| 2/2 [00:00<00:00,  5.29it/s]


In [3]:
federated.train(2, save_results = False)

  0%|          | 0/2 [00:00<?, ?it/s]

round 0


100%|██████████| 2/2 [00:00<00:00,  2.88it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

round 1


100%|██████████| 2/2 [00:03<00:00,  1.65s/it]


OrderedDict([('embedding_layer.weight', tensor([[-9.8524e-40, -9.8313e-40, -9.8148e-40,  ..., -9.9455e-40,
          9.9530e-40,  9.8174e-40],
        [ 1.4923e-39, -1.0035e-39,  1.4954e-39,  ...,  1.5023e-39,
          1.0374e-39,  1.0569e-39],
        [-1.7490e-02,  1.1819e-03, -8.0561e-03,  ..., -2.1182e-01,
         -3.6590e-02, -9.5094e-03],
        ...,
        [-1.1032e-05,  3.3619e-04, -5.5264e-03,  ..., -1.0525e-01,
          3.7976e-02, -2.8149e-05],
        [ 3.5613e-02,  3.3337e-02,  3.1249e-02,  ..., -2.2565e-04,
         -5.7802e-02,  1.8486e-02],
        [-2.4720e-21,  1.2476e-33,  1.5994e-09,  ..., -4.4659e-03,
          2.6219e-04, -1.6057e-18]], device='cuda:0')), ('rnn.weight_ih_l0', tensor([[  47.6124,   74.3962,   76.0923,  ...,  -26.3939,   64.0800,
           -2.8154],
        [ -69.8502,  -24.2733, -145.4402,  ...,  -95.1002,   50.5877,
         -130.4213],
        [  27.8286,   56.4417,  -58.5810,  ...,  -12.7532,   96.9957,
          -23.9668],
        ...,
  

  0%|          | 0/2 [00:00<?, ?it/s]

round 2


100%|██████████| 2/2 [00:03<00:00,  1.79s/it]


OrderedDict([('embedding_layer.weight', tensor([[-9.8524e-40, -9.8313e-40, -9.8148e-40,  ..., -9.9455e-40,
          9.9530e-40,  9.8174e-40],
        [ 1.4923e-39, -1.0035e-39,  1.4954e-39,  ...,  1.5023e-39,
          1.0374e-39,  1.0569e-39],
        [-1.7490e-02,  1.1819e-03, -8.0561e-03,  ..., -2.1182e-01,
         -3.6590e-02, -9.5094e-03],
        ...,
        [-1.1032e-05,  3.3619e-04, -5.5264e-03,  ..., -1.0525e-01,
          3.7976e-02, -2.8149e-05],
        [ 3.5613e-02,  3.3337e-02,  3.1249e-02,  ..., -2.2565e-04,
         -5.7802e-02,  1.8486e-02],
        [-2.4720e-21,  1.2476e-33,  1.5994e-09,  ..., -4.4659e-03,
          2.6219e-04, -1.6057e-18]], device='cuda:0')), ('rnn.weight_ih_l0', tensor([[  94.6298,  148.1439,  151.5327,  ...,  -52.2351,  127.5321,
           -5.1253],
        [-139.0611,  -47.9982, -290.0901,  ..., -189.5106,  100.5744,
         -260.0823],
        [  55.1016,  112.2707, -116.5450,  ...,  -24.9810,  193.2979,
          -47.3858],
        ...,
  

In [4]:
pd.DataFrame(federated.results)

Unnamed: 0,0,1,2
perplexity_1,177.023,170.636,167.522
loss_1,5.16901,5.1323,5.11397
f1_recall_1,0.192639,0.199466,0.198427
f3_recall_1,0.303206,0.310775,0.311962
generate_1,all we are tired of the body years ago in history,all kaine was to the atlantic city in history ...,all trump isnt fighting white trolls are five ...
attack_perplexity_1,1038.57,967.136,924.686
perplexity,57.6005,57.5626,57.5506
loss,4.06355,4.06288,4.06267
f1_recall,0.345994,0.346599,0.346448
f3_recall,0.45555,0.456458,0.457139


In [5]:
federated.nodes[1].losses

{'total_loss': [5.423960519873577, 5.362229824066162],
 'loss': [5.259511144264884, 5.202203403348508],
 'reg_loss': [0.16444938111564386, 0.1600264314076175]}

In [None]:
federated = Federated_LICCHAVI(
    "CONFIG_MODEL_TWEETS.json",
    "CONFIG_FEDERATED_TWEETS.json",
    testing = True
)

In [None]:
federated.train(1, save_results = False)

In [None]:
pd.DataFrame(federated.results)

In [None]:
federated.nodes[1].losses