# Using Linear model

- Applying efficiency trick concepts



# 1)-Importing key Modules

In [0]:
#support both Python 2 and Python 3 with minimal overhead.
from __future__ import absolute_import, division, print_function

# I am an engineer. I care only about error not warning. So, let's be maverick and ignore warnings.
import warnings
warnings.filterwarnings('ignore')

In [2]:
! pip install dynet

Collecting dynet
[?25l  Downloading https://files.pythonhosted.org/packages/88/f0/01a561a301a8ea9aea1c28f82e108c38cd103964c7a46286ab01757a4092/dyNET-2.1-cp36-cp36m-manylinux1_x86_64.whl (28.1MB)
[K     |████████████████████████████████| 28.1MB 1.4MB/s 
Installing collected packages: dynet
Successfully installed dynet-2.1


In [0]:
from collections import defaultdict
import math
import time
import random
import dynet as dy
import numpy as np

In [4]:
! pip install version_information

Collecting version_information
  Downloading https://files.pythonhosted.org/packages/ff/b0/6088e15b9ac43a08ccd300d68e0b900a20cf62077596c11ad11dd8cc9e4b/version_information-1.0.3.tar.gz
Building wheels for collected packages: version-information
  Building wheel for version-information (setup.py) ... [?25l[?25hdone
  Created wheel for version-information: filename=version_information-1.0.3-cp36-none-any.whl size=3880 sha256=d561c584e1b8e06c9700b1153ae4c3b7d2647c612fbb10bb10b00f9e805497da
  Stored in directory: /root/.cache/pip/wheels/1f/4c/b3/1976ac11dbd802723b564de1acaa453a72c36c95827e576321
Successfully built version-information
Installing collected packages: version-information
Successfully installed version-information-1.0.3


In [5]:
# first install: pip install version_information
%reload_ext version_information
%version_information pandas,torch,numpy

Software,Version
Python,3.6.8 64bit [GCC 8.3.0]
IPython,5.5.0
OS,Linux 4.14.137+ x86_64 with Ubuntu 18.04 bionic
pandas,0.25.3
torch,1.3.1+cu100
numpy,1.17.4
Sun Nov 17 20:01:57 2019 UTC,Sun Nov 17 20:01:57 2019 UTC


# 2)- Setting up neural network

In [0]:
N = 2 # The length of the n-gram
EMB_SIZE = 128 # The size of the embedding
HID_SIZE = 128 # The size of the hidden layer

In [0]:
# Functions to read in the corpus
w2i = defaultdict(lambda: len(w2i))
S = w2i["<s>"]
UNK = w2i["<unk>"]
def read_dataset(filename):
  with open(filename, "r") as f:
    for line in f:
      yield [w2i[x] for x in line.strip().split(" ")]

# 3)- Loading Data

### loading data using traditional format
using read()

In [0]:
train = list(read_dataset("train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("valid.txt"))
i2w = {v: k for k, v in w2i.items()}
nwords = len(w2i)

# 4)- Model Building

### 4.1)- Start DyNet and define trainer

In [0]:
model = dy.ParameterCollection()
trainer = dy.AdamTrainer(model, alpha=0.001) # notice how LR is changed alpha as we are using ADAM now

### 4.2)-Define the model

In [0]:
W_emb = model.add_lookup_parameters((nwords, EMB_SIZE)) # Word weights at each position
W_h = model.add_parameters((HID_SIZE, EMB_SIZE * N))    # Weights of the softmax
b_h = model.add_parameters((HID_SIZE))                  # Weights of the softmax
W_sm = model.add_parameters((nwords, HID_SIZE))         # Weights of the softmax
b_sm = model.add_parameters((nwords))                   # Softmax bias

### 4.3)-A function to calculate scores for one value

In [0]:
def calc_score_of_histories(words, dropout=0.0):
  # This will change from a list of histories, to a list of words in each history position
  words = np.transpose(words)
  # Lookup the embeddings and concatenate them
  emb = dy.concatenate([dy.lookup_batch(W_emb, x) for x in words])
  # Create the hidden layer
  h = dy.tanh(dy.affine_transform([b_h, W_h, emb]))
  # Perform dropout
  if dropout != 0.0:
    h = dy.dropout(h, dropout)
  # Calculate the score and return
  return dy.affine_transform([b_sm, W_sm, h])

### 4.4)-Calculate the loss value for the entire sentence

In [0]:
def calc_sent_loss(sent, dropout=0.0):
  # Create a computation graph
  dy.renew_cg()
  # The initial history is equal to end of sentence symbols
  hist = [S] * N
  # Step through the sentence, including the end of sentence token
  all_histories = []
  all_targets = []
  for next_word in sent + [S]:
    all_histories.append(list(hist))
    all_targets.append(next_word)
    hist = hist[1:] + [next_word]
  s = calc_score_of_histories(all_histories, dropout=dropout)
  return dy.sum_batches(dy.pickneglogsoftmax_batch(s, all_targets))

### 4.5)-Generate a sentence

In [0]:
MAX_LEN = 100

In [0]:
def generate_sent():
  dy.renew_cg()
  hist = [S] * N
  sent = []
  while True:
    p = dy.softmax(calc_score_of_histories([hist])).npvalue()
    next_word = np.random.choice(nwords, p=p/p.sum())
    if next_word == S or len(sent) == MAX_LEN:
      break
    sent.append(next_word)
    hist = hist[1:] + [next_word]
  return sent

# 5)- Train Model

In [0]:
last_dev = 1e20
best_dev = 1e20

In [18]:
for ITER in range(5):
  # Perform training
  random.shuffle(train)
  train_words, train_loss = 0, 0.0
  start = time.time()
  for sent_id, sent in enumerate(train):
    my_loss = calc_sent_loss(sent, dropout=0.2)
    train_loss += my_loss.value()
    train_words += len(sent)
    my_loss.backward()
    trainer.update()
    if (sent_id+1) % 5000 == 0:
      print("--finished %r sentences (word/sec=%.2f)" % (sent_id+1, train_words/(time.time()-start)))
  print("iter %r: train loss/word=%.4f, ppl=%.4f (word/sec=%.2f)" % (ITER, train_loss/train_words, math.exp(train_loss/train_words), train_words/(time.time()-start)))

--finished 5000 sentences (word/sec=1292.70)
--finished 10000 sentences (word/sec=1289.24)
--finished 15000 sentences (word/sec=1291.28)
--finished 20000 sentences (word/sec=1294.41)
--finished 25000 sentences (word/sec=1295.21)
--finished 30000 sentences (word/sec=1292.46)
--finished 35000 sentences (word/sec=1290.20)
--finished 40000 sentences (word/sec=1290.51)
iter 0: train loss/word=6.0528, ppl=425.2811 (word/sec=1290.74)
--finished 5000 sentences (word/sec=1292.57)
--finished 10000 sentences (word/sec=1293.89)
--finished 15000 sentences (word/sec=1295.00)
--finished 20000 sentences (word/sec=1294.78)
--finished 25000 sentences (word/sec=1294.68)
--finished 30000 sentences (word/sec=1295.42)
--finished 35000 sentences (word/sec=1296.39)
--finished 40000 sentences (word/sec=1298.12)
iter 1: train loss/word=5.6172, ppl=275.1247 (word/sec=1297.84)
--finished 5000 sentences (word/sec=1288.87)
--finished 10000 sentences (word/sec=1298.70)
--finished 15000 sentences (word/sec=1300.99)
-

# 5)- Evaluate

In [19]:
dev_words, dev_loss = 0, 0.0
start = time.time()
for sent_id, sent in enumerate(dev):
  my_loss = calc_sent_loss(sent)
  dev_loss += my_loss.value()
  dev_words += len(sent)
print("iter %r: dev loss/word=%.4f, ppl=%.4f, time=%.2fs" % (ITER, dev_loss/dev_words, math.exp(dev_loss/dev_words), time.time()-start))

iter 4: dev loss/word=5.6327, ppl=279.4280, time=14.41s


# 6)-Generate a few sentences

In [20]:
for _ in range(5):
    sent = generate_sent()
    print(" ".join([i2w[x] for x in sent]))

it was <unk> and mr. icahn won the eyes
the dollar problem trade with face <unk> ad executive said such managers could come from the takeover below were managed by activities
sales of $ N a share a N N and wednesday cut it about the company to N million in the marina <unk> <unk> furs despite stock prices inflation in fees mcdonald d. <unk> in & financing which include <unk> into N
time it still late friday crash rose N N of the bonds <unk> book a specific certificates series gained N was priced at a plant in <unk> the company said the revenue french interest payments to which have intended
three other money managers largest funds is the allegations that those is did n't release this way can not be a plunged N N here can do not say whether those handling of this was a important part that the <unk> quantum and liability have diseases and <unk> corp
