In [5]:
#| default_exp run
#|  export
from fastcore.script import call_parse
def split_string(string):
    # Removing the parentheses and splitting the string by comma
    parts = string[1:-1].split(",")
    # Removing the whitespace and quotes from the parts
    parts = [part.strip().strip("'") for part in parts]
    return parts[0], parts[1]

def return_iters(db:str # Path to db
                 ):
    train_iter = []
    test_iter = []
    file = open(db, 'r', encoding='latin1')
    mapping = {
        "Libertarian Left": 1,
        "Libertarian Right": 2,
        "Authoritarian Left": 3,
        "Authoritarian Right": 4,
        "Centrist": 5,
        "Authoritarian Center": 6,
        "Left": 7,
        "Right": 8,
        "Libertarian Center": 9,
    }
    lines = file.readlines()
    for line in lines:
        opinion,text = split_string(line)
        train_iter+=[(mapping[opinion],text)]
        test_iter+=[(mapping[opinion],text)]
    train_iter = iter(train_iter)
    test_iter = iter(test_iter)
    file.close()
    return train_iter, test_iter

In [6]:
#|  export
from torchtext.data.utils import get_tokenizer
# from Political_Compass_AI.data_processing import return_iters
# from Political_Compass_AI.data_processing import split_string
from Political_Compass_AI.data_processing import yield_tokens
from Political_Compass_AI.data_processing import collate_batch
from Political_Compass_AI.model import TextClassificationModel
from Political_Compass_AI.training import train
from Political_Compass_AI.training import evaluate
from torchtext.data.functional import to_map_style_dataset
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split
import time
import torch
import pandas as pd

def collate_batch(
        batch
):
    global text_pipeline
    global db
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    label_pipeline = lambda x: int(x) - 1
    label_list, text_list, offsets = [], [], [0]
    for (_label, _text) in batch:
        label_list.append(label_pipeline(_label))
        processed_text = torch.tensor(text_pipeline(_text), dtype=torch.int64)
        text_list.append(processed_text)
        offsets.append(processed_text.size(0))
    label_list = torch.tensor(label_list, dtype=torch.int64)
    offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)
    text_list = torch.cat(text_list)
    return label_list.to(device), text_list.to(device), offsets.to(device)
@call_parse
def run(
    _db:str # dn path to run alignment distribution
    ,emsize = 128
    ,LR = 5
    ,BATCH_SIZE = 32
    ,optimizer = "Adagrad"
    ,EPOCHS = 20


):
    global text_pipeline
    global db
    db=_db
    tokenizer = get_tokenizer('basic_english')
    text_pipeline = lambda x: vocab(tokenizer(x))
    label_pipeline = lambda x: int(x) - 1
    train_iter, test_iter = return_iters(db)
    vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=["<unk>"])
    vocab.set_default_index(vocab["<unk>"])
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    train_iter, test_iter = return_iters(db)
    dataloader = DataLoader(train_iter, batch_size=8, shuffle=False, collate_fn=collate_batch)
    train_iter, test_iter = return_iters(db)
    num_class = len(set([label for (label, text) in train_iter]))
    vocab_size = len(vocab)
    model = TextClassificationModel(vocab_size, emsize, num_class).to(device)
    run_ledger = open("Run_Ledger.txt", 'a')
    criterion = torch.nn.CrossEntropyLoss()
    _optimizer=optimizer
    if optimizer=="Adagrad":
        optimizer = torch.optim.Adagrad(model.parameters(), lr=LR)
    elif optimizer=="SGD":
        optimizer = torch.optim.SGD(model.parameters(), lr=LR)
    else:
        print("Choose a different optimizer")
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
    total_accu = None
    train_iter, test_iter = return_iters(db)
    train_dataset = to_map_style_dataset(train_iter)
    test_dataset = to_map_style_dataset(test_iter)
    num_train = int(len(train_dataset) * 0.95)
    split_train_, split_valid_ = \
        random_split(train_dataset, [num_train, len(train_dataset) - num_train])

    train_dataloader = DataLoader(split_train_, batch_size=BATCH_SIZE,
                                  shuffle=True, collate_fn=collate_batch)
    valid_dataloader = DataLoader(split_valid_, batch_size=BATCH_SIZE,
                                  shuffle=True, collate_fn=collate_batch)
    test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE,
                                 shuffle=True, collate_fn=collate_batch)
    first_flag = True
    for epoch in range(1, EPOCHS + 1):
        epoch_start_time = time.time()
        train(train_dataloader, model, optimizer, epoch)
        accu_val = evaluate(valid_dataloader, model)
        if total_accu is not None and total_accu > accu_val:
            scheduler.step()
        else:
            total_accu = accu_val

        print('-' * 59)
        print('| end of epoch {:3d} | time: {:5.2f}s | '
              'valid accuracy {:8.3f} '.format(epoch,
                                               time.time() - epoch_start_time,
                                               accu_val))
        print('-' * 59)

    accu_test = evaluate(test_dataloader,model)

    df_Log = {"Database_file":[],"Epochs":[],"LR":[],"Batch_Size":[],
              "Final_accu":[],"Optimzer":[],"accu_test":[]}

    df_Log["Database_file"].append(db)
    df_Log["Epochs"].append(str(EPOCHS))
    df_Log["LR"].append( str(LR))
    df_Log["Batch_Size"].append(str(BATCH_SIZE))
    df_Log["Final_accu"].append(str(accu_val))
    df_Log["Optimzer"].append(_optimizer)
    df_Log["accu_test"].append(accu_test)

    dataframe = pd.DataFrame(df_Log)
    dataframe.to_csv('Run_Ledger.csv',mode='a', index=False,sep="\t")
    print(str(accu_test))
    return model

In [None]:
model =run("../Full_Align_DB",BATCH_SIZE=32,LR=5,EPOCHS=20)
#torch.save(model.state_dict(), <path_to>)
# model.load_state_dict(torch.load(<path_to>))


cuda
| epoch   1 |    50/ 1463 batches | accuracy    0.124
| epoch   1 |   100/ 1463 batches | accuracy    0.138
| epoch   1 |   150/ 1463 batches | accuracy    0.121
| epoch   1 |   200/ 1463 batches | accuracy    0.160
| epoch   1 |   250/ 1463 batches | accuracy    0.142
| epoch   1 |   300/ 1463 batches | accuracy    0.155
| epoch   1 |   350/ 1463 batches | accuracy    0.138
| epoch   1 |   400/ 1463 batches | accuracy    0.138
| epoch   1 |   450/ 1463 batches | accuracy    0.142
| epoch   1 |   500/ 1463 batches | accuracy    0.149
| epoch   1 |   550/ 1463 batches | accuracy    0.136
| epoch   1 |   600/ 1463 batches | accuracy    0.148
| epoch   1 |   650/ 1463 batches | accuracy    0.144
| epoch   1 |   700/ 1463 batches | accuracy    0.134
| epoch   1 |   750/ 1463 batches | accuracy    0.165
| epoch   1 |   800/ 1463 batches | accuracy    0.154
| epoch   1 |   850/ 1463 batches | accuracy    0.156
| epoch   1 |   900/ 1463 batches | accuracy    0.149
| epoch   1 |   950/ 14

In [83]:
def predict(text, text_pipeline):
    with torch.no_grad():
        text = torch.tensor(text_pipeline(text))
        output = model(text, torch.tensor([0]))
        return output.argmax(1).item() + 1
mapping = {
        1:"Libertarian Left",
        2: "Libertarian Right",
        3:"Authoritarian Left" ,
        4: "Authoritarian Right",
        5: "Centrist",
         6:"Authoritarian Center",
        7: "Left",
        8: "Right",
        9:"Libertarian Center" ,
    }
model = model.to("cpu")
ex_text_str = """
People didn't even think he was going to invade to begin with, what are you talking about? 😅
And I have no idea how much European support is carrying Ukraine here vs how effective the Ukrainian resistance is (and how fucking shite the Russian offence is).
'Nyhow. Not really relevant. Putin has to be stopped, the war has to end. Aid should be provided to Ukraine. And at the very least, your Socialist Party isn't literally simping for Putin.
Which many Communist parties around the world are doing.
I just tried to explain why they might do that.
Because you called them "nuts".
There's good reasons not to trust NATO or the U.S. I hope time won't prove me correct, but it is naïve to assume that America's already hyper-conservative neoliberal approach cannot slide further right.
For the record, I also don't think the U.S is doing anything inherently bad in terms of helping Ukraine (unlike what someone like Jimmy Dore might spin).
"""
print("This is a %s comment" % mapping[predict(ex_text_str, text_pipeline)])

This is a Libertarian Right comment
