In [1]:
from dweNet_final import *
from fastai import *
from fastai.text import *
from fastai.callbacks import *
from nltk.tokenize import word_tokenize
import torch.utils.data as data_utils
import numpy

In [2]:
defaults.device = torch.device('cuda')

In [3]:
PADDING = 64  # 64 for Headlines, 128 for Reddit Main/Pol
DATASET_PATH = '.../Datasets/Headlines/'
DATASET = 'Headlines.csv'
COL = 'headline'   #headline for Headlines, comment for Reddit Main/Pol

# Word embedding weights
WEIGHTS = '.../Datasets/Headlines/Embeddings/glove/Weights_glove_headlines.pkl'


In [4]:
def pad_to(x:Collection[str], pad_til = PADDING) -> Collection[str]:
    res = []
    count = 0
    for t in x:
        res.append(t)
        count += 1
    while count < pad_til:
        res.append(PAD)
        count +=1
    return res

In [5]:
tokenizer = Tokenizer(SpacyTokenizer, 'en', pre_rules=[fix_html, replace_rep, replace_wrep, spec_add_spaces, rm_useless_spaces], post_rules=[replace_all_caps, deal_caps, pad_to])

In [6]:
processor = [TokenizeProcessor(tokenizer=tokenizer), NumericalizeProcessor()]

In [7]:
data = (TextList.from_csv(DATASET_PATH, DATASET, cols=COL, processor=processor))

In [8]:
data = data.split_from_df(col='valid').label_from_df(cols=0).databunch()

In [9]:
weights_matrix = pickle.load(open(WEIGHTS, 'rb'))

In [11]:
net =  DweNet(weights_matrix) 

In [12]:
net.to('cuda')

DweNet(
  (embedding): Embedding(9121, 50, padding_idx=1)
  (features): Sequential(
    (conv0): Conv1d(1, 64, kernel_size=(3, 50), stride=(1,), padding=(1, 0), bias=False)
    (norm0): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): LeakyReLU(negative_slope=0.02, inplace)
    (denseblock1): _DenseBlock(
      (denselayer1): SequentialEx(
        (layers): ModuleList(
          (0): Sequential(
            (0): Conv1d(64, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
            (1): LeakyReLU(negative_slope=0.02, inplace)
            (2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
          (1): dropMergeLayer()
        )
      )
      (denselayer2): SequentialEx(
        (layers): ModuleList(
          (0): Sequential(
            (0): Conv1d(96, 32, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
            (1): LeakyReLU(negative_slope=0.02, inplace)
            (2

In [13]:
learn = Learner(data, net, wd=0.1, loss_func=CrossEntropyFlat(), metrics=[accuracy, FBeta(average='macro',beta=1),FBeta(average='micro',beta=1)])

In [None]:
learn.fit_one_cycle(10,1e-03, moms=(0.8,0.7))