In [1]:
from ResNet_baseline_final import *
from fastai import *
from fastai.text import *
from nltk.tokenize import word_tokenize
from fastai.callbacks import *
import torch.utils.data as data_utils

In [2]:
defaults.device = torch.device('cuda')

In [None]:
PADDING = 64  # 64 for Headlines, 128 for Reddit Main/Pol
DATASET_PATH = '.../Datasets/Headlines/'
DATASET = 'Headlines.csv'
COL = 'headline'   #headline for Headlines, comment for Reddit Main/Pol

# Word embedding weights
WEIGHTS = '.../Datasets/Headlines/Embeddings/glove/Weights_glove_headlines.pkl'

In [3]:
def pad_to(x:Collection[str], pad_til = PADDING) -> Collection[str]:
    res = []
    count = 0
    for t in x:
        res.append(t)
        count += 1
    while count < pad_til:
        res.append(PAD)
        count +=1
    return res

In [4]:
tokenizer = Tokenizer(SpacyTokenizer, 'en', pre_rules=[fix_html, replace_rep, replace_wrep, spec_add_spaces, rm_useless_spaces], post_rules=[replace_all_caps, deal_caps, pad_to])

In [5]:
processor = [TokenizeProcessor(tokenizer=tokenizer), NumericalizeProcessor()]

In [6]:
data = (TextList.from_csv(DATASET_PATH, DATASET, cols=COL, processor=processor))

In [7]:
data = data.split_from_df(col='valid').label_from_df(cols=0).databunch()

In [8]:
weights_matrix = pickle.load(open(WEIGHTS, 'rb'))

In [9]:
net = ResNet(weights_matrix)

In [10]:
net.to('cuda')

ResNet(
  (embedding): Embedding(9120, 50, padding_idx=1)
  (init_conv): Conv1d(1, 16, kernel_size=(3, 50), stride=(1,), padding=(1, 0), bias=False)
  (features): Sequential(
    (init_conv): Conv1d(1, 16, kernel_size=(3, 50), stride=(1,), padding=(1, 0), bias=False)
    (init_norm): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (init_relu): ReLU(inplace)
    (resblock1): Sequential(
      (0): SequentialEx(
        (layers): ModuleList(
          (0): Sequential(
            (0): Conv1d(16, 16, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
            (1): ReLU(inplace)
            (2): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
          (1): Sequential(
            (0): Conv1d(16, 16, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
            (1): ReLU(inplace)
            (2): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        

In [None]:
learn = Learner(data, net, wd=0.1, loss_func=CrossEntropyFlat(),  metrics=[accuracy, FBeta(average='macro',beta=1),FBeta(average='micro',beta=1)])

In [None]:
learn.fit_one_cycle(10, 1e-03, moms=(0.8,0.7))