In [1]:
%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader

import datetime
import copy
from IPython.display import clear_output
import youtokentome as yttm

import simcube
from simcube.data import tokenize_corpus, build_vocabulary, \
    save_texts_to_file, LanguageModelDataset, load_war_and_piece_chunks, \
    GreedyGenerator, BeamGenerator
from simcube.pipeline import train_eval_loop, init_random_seed
from simcube.base import get_params_number

init_random_seed()

plt.rcParams["figure.figsize"] = (15,10)

In [None]:
# Init signature:
# nn.Embedding(
#     num_embeddings,
#     embedding_dim,
#     padding_idx=None,
#     max_norm=None,
#     norm_type=2.0,
#     scale_grad_by_freq=False,
#     sparse=False,
#     _weight=None,
# )
# Docstring:     
# A simple lookup table that stores embeddings of a fixed dictionary and size.

# This module is often used to store word embeddings and retrieve them using indices.
# The input to the module is a list of indices, and the output is the corresponding
# word embeddings.

nn.Embedding()

In [2]:
import numpy as np

In [4]:
b = np.array([0.1, 0.5, 0.3, 0.1])

In [5]:
a = np.array([[1, 0, 1, 0], [0, 1, 3, 0], [2, 3, 0, 0]])

In [6]:
a.dot(b)

array([0.4, 1.4, 1.7])

In [8]:
a = np.array([[1, 0, 1, 0], [0, 1, 3, 0], [2, 3, 0, 0]])

In [9]:
q = np.array([0, 0, 1])

In [10]:
q.dot(a)

array([2, 3, 0, 0])

In [11]:
from scipy.special import softmax

In [12]:
softmax(q.dot(a))

array([0.25069239, 0.68145256, 0.03392753, 0.03392753])

In [13]:
a.dot(softmax(q.dot(a)))

array([0.28461991, 0.78323514, 2.54574246])

In [112]:
vect = np.array([[0.3504305689198156, 0.871844425624726, 0.29345316540775357, 0.49159320438393916, 0.16391992930609034, 0.24589641847050037, 0.34921020303336925, 0.09968814035867879, 0.8652385667745919, 0.00906484385602968, 0.6134586521086117, 0.08104312584086149, 0.643129733435556, 0.6610968673257929, 0.6825169003800382], [0.005641686042561211, 0.3397733866278605, 0.4408793722092307, 0.6618752692611525, 0.4192615374283991, 0.6718589897811911, 0.23503584107912667, 0.9972834040264165, 0.6907780153811639, 0.5160598448726361, 0.4200243418824855, 0.7745997472321381, 0.9124177261957108, 0.627661131744206, 0.7792319239076758], [0.44947044223343224, 0.39851993627332394, 0.27645205987950927, 0.3360502940952873, 0.20207394761469466, 0.27730469648938627, 0.9647449489128369, 0.38480306917172535, 0.7014748335636187, 0.5616919724157547, 0.3082991954077743, 0.43320540280287834, 0.7682716834674514, 0.04669826413239708, 0.7975639937877288], [0.3529089999231677, 0.5085801437940869, 0.6686697864089949, 0.9579051761714787, 0.14893344048972235, 0.6504801381978066, 0.6554087909852483, 0.2182290972559655, 0.8874805349214916, 0.8549111586624765, 0.2256959432511686, 0.7090739886051667, 0.9215898392742404, 0.8361038069049278, 0.9807575571901945], [0.93096165894251, 0.21204415175966806, 0.005393301311816812, 0.6868163541395496, 0.17651743121260177, 0.4276211882347165, 0.7172630747046246, 0.6222321154458413, 0.782866044726867, 0.9401403417712956, 0.6009251310321828, 0.7689712777389628, 0.011137370287858661, 0.6750220130270511, 0.3656918897133191], [0.344423832576648, 0.5200078573131781, 0.08090528060543856, 0.6187002344784092, 0.24428489996011238, 0.18400539459399, 0.40308101020726217, 0.19255989698913867, 0.8010944590934469, 0.20324438899818598, 0.4927144298170735, 0.03783988662477278, 0.7705093963091103, 0.2520865403496373, 0.40266725180440743], [0.6681310493060861, 0.2801674372250399, 0.6224648405839522, 0.6287746784150413, 0.864080498689899, 0.23833127705610258, 0.20311743810136906, 0.6646132937899738, 0.23575457417289924, 0.1869625695994146, 0.7712148738157554, 0.15237041670323637, 0.2763150373902683, 0.46500408886101585, 0.991468614310106], [0.47955269815875845, 0.18371674117676162, 0.4749895427072034, 0.5127159626377625, 0.14327300286458633, 0.5921086963579639, 0.21467664382766927, 0.08984875049424312, 0.5619088573772313, 0.6324525220346037, 0.65145500723789, 0.5118736033583858, 0.3791794826772541, 0.7062193547285907, 0.12888775429739185], [0.8936198110390413, 0.1499351596848777, 0.23230300209801535, 0.6275970485906217, 0.14179412142521963, 0.44590423506527455, 0.6398118989481705, 0.44025473834142315, 0.9690917160909921, 0.7329911430579539, 0.4723409208689966, 0.30051845327308735, 0.6517065287372249, 0.11682964074366375, 0.3356912564688531], [0.5819483213886298, 0.12715730125007862, 0.6624081011547434, 0.27210122174739293, 0.3321414469174103, 0.6738692045564213, 0.49979407643990537, 0.923095453187033, 0.8688108133354637, 0.29672803800781, 0.8422355709858387, 0.22967466587429908, 0.48606633908371566, 0.3302629931498261, 0.9271715208940098], [0.814229644181095, 0.6269508015754929, 0.19067116118180638, 0.6597416333912787, 0.3042396495694798, 0.5349586078017191, 0.9889297007928726, 0.5059647631701082, 0.6586214714303461, 0.19972197385065704, 0.730120041302739, 0.9254129585548022, 0.7774768791337286, 0.5880525770183761, 0.4404909426586451], [0.8393608070151912, 0.551470751477307, 0.3776646281929925, 0.7403545806778788, 0.01464073752506101, 0.49682079457661743, 0.12829037985166736, 0.8323709714882789, 0.4861583628986299, 0.10966510942571872, 0.36384711262095637, 0.008343156485128067, 0.05481969871494197, 0.11036480291979456, 0.3495717657917], [0.575668909069271, 0.1948209406820347, 0.5066632418120769, 0.5610866065811511, 0.7503051258152065, 0.20250301475454058, 0.9387177222181186, 0.4214964558859865, 0.2441688535705906, 0.2852282954051667, 0.7185375048873539, 0.09961745251862686, 0.507873295740294, 0.9713796833363287, 0.8218946227484244], [0.14519733396011691, 0.07264015089790021, 0.7254237309701331, 0.7437297525624584, 0.3465971472185204, 0.6489212261982703, 0.2152569561085349, 0.6476151760429897, 0.2045187871395916, 0.9599712380254137, 0.28554199184758966, 0.7701922251424572, 0.7095119328780166, 0.7579558453415812, 0.4251898428876446]])
                
Q = np.array([0.30760147020407946, 0.1528992448227442, 0.9387231083505163, 0.12201982125460176, 0.3159744925438269, 0.555332538642272, 
              0.8654043562058316, 0.5523485724329922, 0.6405492495162189, 0.8421217300945876, 0.03415012932624606, 0.0914780538557024, 
              0.745151636966557, 0.9885343010237021, 0.02289480154711454])

In [115]:
# from scipy.special import softmax

def softmax(z):
    z -= np.max(z)
    sm = (np.exp(z).T / np.sum(np.exp(z), axis=0)).T
    return sm

def attention(features, query):
    """
    features - InLen x EmbSize - features of elements of input sequence
    query - EmbSize - features of query object

    returns vector of size EmbSize - features, aggregated according to the query
    """
    result = softmax(query.dot(features.T))
    return result

In [116]:
attention(vect, Q)

array([0.02902208, 0.09907626, 0.0485058 , 0.22835242, 0.06944933,
       0.02040724, 0.03002252, 0.0423537 , 0.05911434, 0.07735371,
       0.0911031 , 0.01519346, 0.08483444, 0.1052116 ])

In [16]:
a

array([[1, 0, 1, 0],
       [0, 1, 3, 0],
       [2, 3, 0, 0]])

In [18]:
a.dot(a.T)

array([[ 2,  3,  2],
       [ 3, 10,  3],
       [ 2,  3, 13]])

In [25]:
softmax(a.dot(a.T), axis=1)

array([[2.11941558e-01, 5.76116885e-01, 2.11941558e-01],
       [9.10221936e-04, 9.98179556e-01, 9.10221936e-04],
       [1.67006637e-05, 4.53971105e-05, 9.99937902e-01]])

In [23]:
a.dot(softmax(a.dot(a.T), axis=0))

array([[2.11974959e-01, 5.76207679e-01, 2.21181736e+00],
       [9.60323927e-04, 9.98315747e-01, 3.00072393e+00],
       [2.14672223e-01, 3.57065555e+00, 2.14672223e-01],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

In [26]:
b = np.array([[1, 0, 1, 0], [0, 1, 1, 0]])

In [30]:
logits = b.T.dot(b)
logits

array([[1, 0, 1, 0],
       [0, 1, 1, 0],
       [1, 1, 2, 0],
       [0, 0, 0, 0]])

In [34]:
b.dot(softmax(logits, axis=0)).T

array([[0.73105858, 0.5       ],
       [0.5       , 0.73105858],
       [0.73105858, 0.73105858],
       [0.5       , 0.5       ]])

In [37]:
projk = np.array([[1, 0], [0, 0]])
projq = np.array([[0, 1], [0, 0]])
projv = np.array([[1, 0], [0, 1]])

In [39]:
bais = np.array([[0, ], [0, ]])

In [83]:
inp = np.array([[1, 0, 1, 0], [0, 1, 1, 0]])
inp.T

array([[1, 0],
       [0, 1],
       [1, 1],
       [0, 0]])

In [57]:
K = inp.T.dot(projk.T)
Q = inp.T.dot(projq.T)
V = inp.T.dot(projv.T)

In [58]:
K

array([[1, 0],
       [0, 0],
       [1, 0],
       [0, 0]])

In [59]:
Q

array([[0, 0],
       [1, 0],
       [1, 0],
       [0, 0]])

In [60]:
V

array([[1, 0],
       [0, 1],
       [1, 1],
       [0, 0]])

In [62]:
logits = Q.dot(K.T)
logits

array([[0, 0, 0, 0],
       [1, 0, 1, 0],
       [1, 0, 1, 0],
       [0, 0, 0, 0]])

In [80]:
attscore = softmax(logits, axis=1)
attscore

array([[0.25      , 0.25      , 0.25      , 0.25      ],
       [0.36552929, 0.13447071, 0.36552929, 0.13447071],
       [0.36552929, 0.13447071, 0.36552929, 0.13447071],
       [0.25      , 0.25      , 0.25      , 0.25      ]])

In [81]:
attscore.dot(V)

array([[0.5       , 0.5       ],
       [0.73105858, 0.5       ],
       [0.73105858, 0.5       ],
       [0.5       , 0.5       ]])

In [122]:
features = [[0.5175129778200084, 0.13021330700949507], [-0.3578609445921744, -0.07768163060380659], [-0.046577477754636734, -0.12288550821838619], [0.4424092505449793, -1.431399548551344], [0.753992222548331, -1.1210257338970167], [1.6736061037504428, -1.9789731491226337], [-1.4985152255486565, -1.6614802556117283], [-0.610065708073959, -0.8475335063027695], [-0.1657640783522184, -1.7079776825852762], [0.7857341373616981, 0.2956255012408635], [-0.49243028413686984, 0.01065675311085114], [0.20598401523943788, -0.6339670563637549], [-0.15698934123474126, 0.9516567843056503], [-0.08965595798444444, 0.9923765422389716], [-0.9649404480809814, -0.6203623955866846]]
proj_k = [[-0.4777373377067222, 1.384780896738418], [1.5537173245233542, -1.6151073640132454]]
bias_k = [0.34068677065672126, -1.7225350645946451]
proj_q = [[1.667192089239799, 0.9072106203091014], [1.0017863742909645, -0.8578876449703756]]
bias_q = [-0.3811843050970959, -0.15922065479353645]
proj_v = [[2.396375885002737, 0.23995979796695774], [0.21631803999882093, -0.6475173781192963]]
bias_v = [1.448781271879823, -0.7144164516316529]

features = np.array(features)
proj_k = np.array(proj_k)
bias_k = np.array(bias_k)
proj_q = np.array(proj_q)
bias_q = np.array(bias_q)
proj_v = np.array(proj_v)
bias_v = np.array(bias_v)

In [142]:
features.shape

(15, 2)

In [144]:
proj_k.shape, bias_k.shape

((2, 2), (2,))

In [150]:
(features.dot(proj_k.T) + bias_k).shape

(15, 2)

In [151]:
K = features.dot(proj_k.T) + bias_k
Q = features.dot(proj_q.T) + bias_q
V = features.dot(proj_v.T) + bias_v

logits = Q.dot(K.T)
logits.shape

(15, 15)

In [178]:
attscore = softmax(logits.T, 1)
attscore.shape

(15, 15)

In [179]:
attscore

array([[5.55467453e-02, 1.20023437e-01, 7.05290204e-02, 3.10537116e-02,
        3.18138831e-02, 5.76301999e-03, 7.79609258e-02, 4.82618077e-02,
        3.61240283e-02, 5.59959451e-02, 1.64155866e-01, 4.58318071e-02,
        5.15079531e-01, 5.17699398e-01, 7.47039578e-02],
       [1.42508509e-02, 5.08101348e-02, 2.21884676e-02, 1.78459515e-03,
        1.93628441e-03, 7.08721908e-05, 1.71207654e-02, 1.16294630e-02,
        2.57703208e-03, 1.36374473e-02, 8.46179911e-02, 7.06182471e-03,
        5.17215069e-01, 5.12685727e-01, 3.00832853e-02],
       [2.83628840e-02, 9.70989071e-02, 4.65899051e-02, 9.94572277e-03,
        9.34638815e-03, 7.33720096e-04, 7.47121069e-02, 3.53255812e-02,
        1.52533116e-02, 2.56736005e-02, 1.48268671e-01, 2.19863379e-02,
        5.17215069e-01, 5.05849263e-01, 7.29755926e-02],
       [4.29588176e-05, 5.75342601e-04, 2.84370098e-04, 1.33520178e-02,
        2.78067218e-03, 5.44974394e-03, 7.61386057e-01, 6.66284334e-03,
        9.34997006e-02, 1.64973963e-0

In [176]:
np.max(attscore, axis=1, keepdims=True)

array([[0.5176994 ],
       [0.51721507],
       [0.51721507],
       [0.76138606],
       [0.76138606],
       [0.76138606],
       [0.76138606],
       [0.76138606],
       [0.76138606],
       [0.5176994 ],
       [0.5176994 ],
       [0.76138606],
       [0.5176994 ],
       [0.5176994 ],
       [0.51721507]])

In [181]:
result = attscore.dot(V)
result.shape

(15, 2)

In [177]:
def softmax(z, axis=None):
    z -= np.max(z, axis=axis, keepdims=True)
    sm = (np.exp(z).T / np.sum(np.exp(z), axis=axis, keepdims=True)).T
    return sm

In [188]:
def self_attention(features, proj_k, bias_k, proj_q, bias_q, proj_v, bias_v):
    """
    features - InLen x EmbSize - features of elements of input sequence
    proj_k - EmbSize x EmbSize - projection matrix to make keys from features
    bias_k - EmbSize - bias vector to make keys from features
    proj_q - EmbSize x EmbSize - projection matrix to make queries from features
    bias_q - EmbSize - bias vector to make queries from features
    proj_v - EmbSize x EmbSize - projection matrix to make values from features
    bias_v - EmbSize - bias vector to make values from features

    returns InLen x EmbSize
    """
    K = features.dot(proj_k.T) + bias_k
    Q = features.dot(proj_q.T) + bias_q
    V = features.dot(proj_v.T) + bias_v
    
    logits = Q.dot(K.T)
    attscore = softmax(logits.T, 1)
    result = attscore.dot(V)
    return result

In [189]:
self_attention(features, proj_k, bias_k, proj_q, bias_q, proj_v, bias_v)

array([[ 1.99133153, -1.79413867],
       [ 1.54161946, -1.57177382],
       [ 1.56964794, -1.70587708],
       [-1.8149447 ,  0.06451833],
       [-1.62625824,  0.10642735],
       [-1.24156256,  0.2100383 ],
       [-1.95021025, -0.03134702],
       [-1.49339022, -0.61521177],
       [-1.90142675,  0.03809897],
       [ 2.25853056, -1.77374872],
       [ 1.51890826, -1.51343532],
       [-1.48165871, -0.19396076],
       [ 1.37785593, -1.33820154],
       [ 1.37710993, -1.33558151],
       [ 1.14012603, -1.64840473]])

In [184]:
answer = np.array([[1.3828516277455327, -0.7052491362196253], [1.2990231347325174, 0.07313412990950195], [1.7291639000678214, 0.057097555962845575], [4.9977493686508465, 0.9611194260544597], [4.92077627317798, 0.9417305419570732], [5.027240226701748, 0.9675417617162317], [5.008405848246054, 0.9647146788718596], [4.617706670795201, 0.8915957294670733], [5.018586540911721, 0.966041694502124], [1.4121709865145842, -1.0082854213057408], [0.8082085153678329, -0.02863474439053916], [4.261630070405553, 0.7878600291134995], [1.1488305812076605, -1.1882802581002707], [1.1929080669359462, -1.220148314614849], [3.9068303877684656, 0.7744398548226676]])

In [185]:
answer.shape

(15, 2)

In [88]:
inp = np.array([[1, 0, 1, 0], [0, 1, 1, 0]])
inp[0], inp[1]

(array([1, 0, 1, 0]), array([0, 1, 1, 0]))

In [93]:
emb1 = np.expand_dims(inp[0], axis=0)
emb2 = np.expand_dims(inp[1], axis=0)
emb1, emb2

(array([[1, 0, 1, 0]]), array([[0, 1, 1, 0]]))

In [86]:
projk1 = np.array([[1, 0], [0, 0]])
projk2 = np.array([[0, 1], [0, 0]])

projq1 = np.array([[0, 1], [1, 0]])
projq2 = np.array([[1, 1], [1, 1]])

projv1 = np.array([[1,], [0,]])
projv2 = np.array([[0,], [1,]])

In [98]:
K1 = inp.T.dot(projk1.T)
Q1 = inp.T.dot(projq1.T)

In [99]:
K2 = inp.T.dot(projk2.T)
Q2 = inp.T.dot(projq2.T)

In [101]:
V1 = emb1.T.dot(projv1.T)
V2 = emb2.T.dot(projv2.T)

In [102]:
logits1 = Q1.dot(K1.T)
logits2 = Q2.dot(K2.T)

In [103]:
logits1, logits2

(array([[0, 0, 0, 0],
        [1, 0, 1, 0],
        [1, 0, 1, 0],
        [0, 0, 0, 0]]),
 array([[0, 1, 1, 0],
        [0, 1, 1, 0],
        [0, 2, 2, 0],
        [0, 0, 0, 0]]))

In [104]:
attscore1 = softmax(logits1, axis=1)
attscore2 = softmax(logits2, axis=1)

In [105]:
attscore1.dot(V1)

array([[0.5       , 0.        ],
       [0.73105858, 0.        ],
       [0.73105858, 0.        ],
       [0.5       , 0.        ]])

In [106]:
attscore2.dot(V2)

array([[0.        , 0.73105858],
       [0.        , 0.73105858],
       [0.        , 0.88079708],
       [0.        , 0.5       ]])

In [107]:
attscore1.dot(V1) + attscore2.dot(V2)

array([[0.5       , 0.73105858],
       [0.73105858, 0.73105858],
       [0.73105858, 0.88079708],
       [0.5       , 0.5       ]])

In [3]:
documents = [
    "Казнить нельзя, помиловать. Нельзя наказывать.",
    "Казнить, нельзя помиловать. Нельзя освободить.",
    "Нельзя не помиловать.",
    "Обязательно освободить."
]

In [4]:
documents

['Казнить нельзя, помиловать. Нельзя наказывать.',
 'Казнить, нельзя помиловать. Нельзя освободить.',
 'Нельзя не помиловать.',
 'Обязательно освободить.']

In [31]:
from nltk.tokenize import word_tokenize
import string
from collections import Counter, defaultdict

In [25]:
tokenized_documents = []
for doc in documents:
    tokenized_documents.append([token.lower() for token in word_tokenize(doc) if token not in list(string.punctuation)])

In [26]:
tokenized_documents

[['казнить', 'нельзя', 'помиловать', 'нельзя', 'наказывать'],
 ['казнить', 'нельзя', 'помиловать', 'нельзя', 'освободить'],
 ['нельзя', 'не', 'помиловать'],
 ['обязательно', 'освободить']]

In [55]:
vocub = Counter()
for doc in tokenized_documents:
    vocub += Counter(set(doc))

In [56]:
vocub

Counter({'помиловать': 3,
         'наказывать': 1,
         'казнить': 2,
         'нельзя': 3,
         'освободить': 2,
         'не': 1,
         'обязательно': 1})

In [None]:
vocub = defaultdict()
for doc in tokenized_documents:
    vocub += Counter(doc)

In [57]:
vocub = sorted(vocub.items(), key=lambda item: item[1], reverse=True)

In [58]:
vocub

[('помиловать', 3),
 ('нельзя', 3),
 ('казнить', 2),
 ('освободить', 2),
 ('наказывать', 1),
 ('не', 1),
 ('обязательно', 1)]

In [46]:
map(lambda x: x[0], x[1]/len(tokenized_documents), vocub)  

NameError: name 'x' is not defined

In [59]:
list(map(lambda x: x[1]/len(tokenized_documents), vocub))

[0.75, 0.75, 0.5, 0.5, 0.25, 0.25, 0.25]

In [53]:
len(vocub)

7

In [60]:
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.preprocessing import StandardScaler
import numpy as np

corpus = [
    'Казнить нельзя, помиловать. Нельзя наказывать.',
    'Казнить, нельзя помиловать. Нельзя освободить.',
    'Нельзя не помиловать.',
    'Обязательно освободить.']

#Получаем счетчики слов
TF = CountVectorizer().fit_transform(corpus)

#Строим IDF. К сожалению, в этом задании нам нужно только vectorizer.idf_
#Для стандартных случаев на этой строке все вычисления и заканчиваются.
#Обычно  TFIDF = vectorizer.fit_transform(corpus)
vectorizer = TfidfVectorizer(smooth_idf=False, use_idf=True)
vectorizer.fit_transform(corpus)

## из IDF  в DF
word_doc_freq = 1/np.exp(vectorizer.idf_ - 1)

#TF нормируем и сглаживаем логарифмом (требование задания)
TFIDF = np.log(TF/TF.sum(axis=1)+1) / word_doc_freq 

#Масштабируем признаки
scaledTFIDF = StandardScaler().fit_transform(TFIDF)

#Домножаем на np.sqrt((4-1)/4) для перевода из DDOF(0) в DDOF(1) для 4 текстов
#(требование задания) 
scaledTFIDF *= np.sqrt(3/4)

#Вывод в порядке возрастания DF
for l in scaledTFIDF[:,np.argsort(word_doc_freq)]:
    print (" ".join([ "%.2f" % d for d in l]))

1.50 -0.50 -0.50 0.87 -0.76 0.60 0.16
-0.50 -0.50 -0.50 0.87 0.18 0.60 0.16
-0.50 1.50 -0.50 -0.87 -0.76 0.29 1.04
-0.50 -0.50 1.50 -0.87 1.34 -1.48 -1.36


In [4]:
import sys
import ast
import numpy as np

In [5]:
def parse_array(s):
    return np.array(ast.literal_eval(s))

def read_array():
    return parse_array(sys.stdin.readline())

def write_array(arr):
    print(repr(arr.tolist()))


In [6]:
i_a = [[0.6046018907385543, 0.0812964077275945, 0.6366439552273822, 0.20134327995534496], 
       [0.8106187774962709, 0.4395507898340978, 0.8290096270213004, 0.05773841312522798], 
       [0.938964520620817, 0.3860407857274528, 0.21318174478828456, 0.07860176987690048], 
       [0.04840110723428537, 0.6411287103553837, 0.509253427569025, 0.7094441369109541], 
       [0.691552879511939, 0.4979735285634219, 0.07060470682483455, 0.7631262538014161]]
i_b = 2

In [7]:
o_a = [[0.8106187774962709, 0.4395507898340978, 0.8290096270213004, 0.20134327995534496], 
       [0.938964520620817, 0.4395507898340978, 0.8290096270213004, 0.07860176987690048], 
       [0.938964520620817, 0.6411287103553837, 0.509253427569025, 0.7094441369109541], 
       [0.691552879511939, 0.6411287103553837, 0.509253427569025, 0.7631262538014161]]
o_b = [[1.0, 1.0, 1.0, 0.0], 
       [1.0, 0.0, 0.0, 1.0], 
       [0.0, 1.0, 1.0, 1.0], 
       [1.0, 0.0, 0.0, 1.0]]

In [9]:
field = [[0.6046018907385543, 0.0812964077275945, 0.6366439552273822, 0.20134327995534496], 
       [0.8106187774962709, 0.4395507898340978, 0.8290096270213004, 0.05773841312522798],]
np.amax(field, axis=0)

array([0.81061878, 0.43955079, 0.82900963, 0.20134328])

In [13]:
field = [[0.6046018907385543, 0.0812964077275945, 0.6366439552273822, 0.20134327995534496], 
       [0.8106187774962709, 0.4395507898340978, 0.8290096270213004, 0.05773841312522798],]

np.argmax(field, axis=0)

array([1, 1, 1, 0])

In [16]:
def max_pooling(features, kernel_size):
    """
    features - InLen x EmbSize - features of elements of input sequence
    kernel_size - positive integer - size of sliding window

    returns tuple of two matrices of shape OutLen x EmbSize:
         - output features (main result)
         - relative indices of maximum elements for each position of sliding window
    """
    result = []
    indices = []
    for i in range(len(features) - kernel_size + 1):
        field = features[i:i+kernel_size]
        result.append(np.amax(field, axis=0))
        indices.append(np.argmax(field, axis=0))
    return np.array(result), np.array(indices)

In [17]:
max_pooling(i_a, i_b)

(array([[0.81061878, 0.43955079, 0.82900963, 0.20134328],
        [0.93896452, 0.43955079, 0.82900963, 0.07860177],
        [0.93896452, 0.64112871, 0.50925343, 0.70944414],
        [0.69155288, 0.64112871, 0.50925343, 0.76312625]]),
 array([[1, 1, 1, 0],
        [1, 0, 0, 1],
        [0, 1, 1, 1],
        [1, 0, 0, 1]]))

In [None]:
features = read_array()
kernel_size = int(sys.stdin.readline())

result, indices = max_pooling(features, kernel_size)

write_array(result)
write_array(indices)

In [18]:
features = [[-1.2420542766989977, -0.045100789663994285, 1.858151857421511, 0.10732741246325356], 
            [-1.480497780371414, -0.12486054931133332, -0.18422425981847368, -1.4228130362490647], 
            [-0.8417536968892625, 0.9802583655274091, -0.18413492661665792, -1.5582607186399924], 
            [1.325799250424393, 0.08149768959330334, -1.454876921308986, 0.1408031456023352], 
            [0.1637602967235608, -0.21250114632967532, 0.8362859721448469, 0.717774697701287], 
            [-0.7641399532198978, -2.112568530488304, 0.20121440705964902, 0.015624280892385661], 
            [1.3862200103422582, 0.6508694196448389, -1.162417318743681, 1.5202488401790915], 
            [1.3947418297193952, -1.013483406336198, -2.0608332074129545, -1.733019236247151], 
            [1.0932612618870112, 0.8071262618398916, 0.15924519176972282, -0.6885825807454318]]
kernel_size = 6
indices = [[3.0, 2.0, 0.0, 4.0], 
           [5.0, 1.0, 3.0, 5.0], 
           [5.0, 0.0, 2.0, 4.0], 
           [4.0, 5.0, 1.0, 3.0]]
dldout = [[-0.0763791951131031, -0.8729161683329371, 0.7454337173675266, -2.508470377801969], 
          [-0.9080189656976042, 0.6952579391985969, 0.2829942797947518, 0.35396918585149195], 
          [0.339009358836277, -1.9496823733556254, -0.11017174942549533, 1.4591363247582954], 
          [-1.1161622731011638, -2.371055190136431, -1.174384738333498, 0.4672192180206214]]

In [45]:
features = np.array(features)
indices = np.array(indices)
dldout = np.array(dldout)

In [68]:
dldfeatures = [[0.0, 0.0, 0.7454337173675266, 0.0], 
               [0.0, 0.0, 0.0, 0.0], 
               [0.0, -2.1273406024899657, 0.0, 0.0], 
               [-0.0763791951131031, 0.0, 0.0, 0.0], 
               [0.0, 0.0, -1.0015622079642417, -2.508470377801969], 
               [0.0, 0.0, 0.0, 0.0], 
               [-0.9080189656976042, 0.0, 0.0, 2.2803247286304087], 
               [-0.7771529142648868, 0.0, 0.0, 0.0], 
               [0.0, -2.371055190136431, 0.0, 0.0]]

In [22]:
base = np.zeros((len(features), len(features[0])))
base

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [None]:
import sys
import ast
import numpy as np


def parse_array(s):
    return np.array(ast.literal_eval(s))

def read_array():
    return parse_array(sys.stdin.readline())

def write_array(arr):
    print(repr(arr.tolist()))

In [76]:
def max_pooling_dldfeatures(features, kernel_size, indices, dldout):
    """
    features - InLen x EmbSize - features of elements of input sequence
    kernel_size - positive integer - size of sliding window
    indices - OutLen x EmbSize - relative indices of maximum elements for each window position
    dldout - OutLen x EmbSize - partial derivative of loss function with respect to outputs of max_pooling layer

    returns InLen x EmbSize
    """
    base = np.zeros((len(features), len(features[0])))
    for i in range(len(indices)):
        base_i = np.zeros((len(features), len(features[0])))
        for j, indx in enumerate(indices[i]):
            base_i[int(indx+i), j] = dldout[i, j]
        print(base_i)
        base += base_i
    return base

In [77]:
max_pooling_dldfeatures(features, kernel_size, indices, dldout)

[[ 0.          0.          0.74543372  0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.         -0.87291617  0.          0.        ]
 [-0.0763792   0.          0.          0.        ]
 [ 0.          0.          0.         -2.50847038]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]]
[[ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.69525794  0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.28299428  0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.90801897  0.          0.          0.35396919]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]]
[[ 0.          0.          0.          0.        ]
 [ 0.          0.          0.

array([[ 0.        ,  0.        ,  0.74543372,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        , -2.1273406 ,  0.        ,  0.        ],
       [-0.0763792 ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        , -1.00156221, -2.50847038],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [-0.90801897,  0.        ,  0.        ,  2.28032473],
       [-0.77715291,  0.        ,  0.        ,  0.        ],
       [ 0.        , -2.37105519,  0.        ,  0.        ]])

In [75]:
np.array(dldfeatures)

array([[ 0.        ,  0.        ,  0.74543372,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        , -2.1273406 ,  0.        ,  0.        ],
       [-0.0763792 ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        , -1.00156221, -2.50847038],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [-0.90801897,  0.        ,  0.        ,  2.28032473],
       [-0.77715291,  0.        ,  0.        ,  0.        ],
       [ 0.        , -2.37105519,  0.        ,  0.        ]])

In [None]:
features = read_array()
kernel_size = int(sys.stdin.readline())
indices = read_array().astype('uint32')
dldout = read_array()

dldfeatures = max_pooling_dldfeatures(features, kernel_size, indices, dldout)

write_array(dldfeatures)

In [78]:
vect = [0.7903253367110061, 0.1738679257213426, 0.6840758121402977, -1.4921922753864911, -0.20701526564877176, -0.13343908330179777, 0.27078275189785883, -0.47987385916752834, 1.762361457920409, -0.6382574781276095, -1.476682298043406, 0.13308403857533435, 0.08629164346752129, -0.15274120983311792, 0.03422761142701722, 2.0977915122558075, -0.09695813983037735, 1.145554587286743]

In [80]:
result = [0.06860598204389033, 0.03703718180652343, 0.06169051603553571, 0.006999663809155501, 0.02530593948353462, 0.02723806143745914, 0.040806327204274295, 0.019262891730251305, 0.18134764032840614, 0.01644130749673833, 0.007109074723334883, 0.03555704949423753, 0.033931576448048006, 0.0267173505099923, 0.032210162471156406, 0.25362223807297396, 0.028250079060620836, 0.09786695784386741]

In [81]:
vect = np.array(vect)
result = np.array(result)

In [82]:
vect

array([ 0.79032534,  0.17386793,  0.68407581, -1.49219228, -0.20701527,
       -0.13343908,  0.27078275, -0.47987386,  1.76236146, -0.63825748,
       -1.4766823 ,  0.13308404,  0.08629164, -0.15274121,  0.03422761,
        2.09779151, -0.09695814,  1.14555459])

In [83]:
result

array([0.06860598, 0.03703718, 0.06169052, 0.00699966, 0.02530594,
       0.02723806, 0.04080633, 0.01926289, 0.18134764, 0.01644131,
       0.00710907, 0.03555705, 0.03393158, 0.02671735, 0.03221016,
       0.25362224, 0.02825008, 0.09786696])

In [84]:
np.exp(vect)/np.sum(np.exp(vect))

array([0.06860598, 0.03703718, 0.06169052, 0.00699966, 0.02530594,
       0.02723806, 0.04080633, 0.01926289, 0.18134764, 0.01644131,
       0.00710907, 0.03555705, 0.03393158, 0.02671735, 0.03221016,
       0.25362224, 0.02825008, 0.09786696])

In [85]:
vect = [-0.36170314084137395, 1.531638983431016, -1.7131284538840788, -0.9027503682845508, -0.8591376176087115, 0.16481576122888014, 0.2286590883015934, 0.4686776665093307, -0.4880318948728026, 0.13865483857501165, -1.0740873577508447, -1.1693607815929845, 0.6388250392697977]

In [88]:
result = [[0.04520990642005896, -0.01496136384662353, -0.0005831584750616379, -0.0013113823146393676, -0.001369840806658998, -0.0038138833133273915, -0.004065315035397694, -0.005168124298133092, -0.0019853599968400696, -0.0037154023993240717, -0.0011048889071803723, -0.0010044813810742047, -0.006126705645798535], [-0.01496136384662353, 0.2158579197223101, -0.0038730635991859903, -0.008709582942971316, -0.00909783668048341, -0.02533001450562798, -0.026999905439354373, -0.03432424452555928, -0.013185824889586901, -0.024675950714133716, -0.007338151103938566, -0.006671291663363668, -0.04069068981148138], [-0.0005831584750616379, -0.0038730635991859903, 0.012135730472354804, -0.0003394788843793973, -0.0003546120941472776, -0.000987303883778268, -0.0010523922714690001, -0.0013378776360475466, -0.0005139521780146152, -0.0009618100285956132, -0.0002860237242683219, -0.00026003112503520596, -0.0015860265723719278], [-0.0013113823146393676, -0.008709582942971316, -0.0003394788843793973, 0.026866394590907443, -0.0007974368009876185, -0.0022202075554586314, -0.0023665755911748154, -0.003008563102643473, -0.0011557540971131601, -0.0021628780434165113, -0.0006431981521542989, -0.0005847470854143117, -0.003566590020554541], [-0.001369840806658998, -0.00909783668048341, -0.0003546120941472776, -0.0007974368009876185, 0.02802849045258695, -0.0023191794450546825, -0.002472072240600495, -0.003142678120181134, -0.0012072750310990093, -0.0022592943115246175, -0.0006718704879216859, -0.000610813803292534, -0.0037255806306354905], [-0.0038138833133273915, -0.02533001450562798, -0.000987303883778268, -0.0022202075554586314, -0.0023191794450546825, 0.07389852776941823, -0.006882693975777504, -0.008749781422376342, -0.0033612709398950754, -0.006290281931106346, -0.0018706083437909523, -0.0017006155464219198, -0.010372686906803137], [-0.004065315035397694, -0.026999905439354373, -0.0010523922714690001, -0.0023665755911748154, -0.002472072240600495, -0.006882693975777504, 0.07831657234824194, -0.009326614122810346, -0.0035828640174308843, -0.006704971183060923, -0.0019939289172219475, -0.0018127292794043132, -0.011056510274539641], [-0.005168124298133092, -0.03432424452555928, -0.0013378776360475466, -0.003008563102643473, -0.003142678120181134, -0.008749781422376342, -0.009326614122810346, 0.09703166928519828, -0.004554797457063466, -0.008523847275730119, -0.002534827533934738, -0.002304473368792744, -0.014055840421926011], [-0.0019853599968400696, -0.013185824889586901, -0.0005139521780146152, -0.0011557540971131601, -0.0012072750310990093, -0.0033612709398950754, -0.0035828640174308843, -0.004554797457063466, 0.040080235966119454, -0.0032744772424536947, -0.0009737662823977987, -0.0008852746134293082, -0.005399619220795473], [-0.0037154023993240717, -0.024675950714133716, -0.0009618100285956132, -0.0021628780434165113, -0.0022592943115246175, -0.006290281931106346, -0.006704971183060923, -0.008523847275730119, -0.0032744772424536947, 0.07215276857928996, -0.001822306074344211, -0.001656702778353048, -0.010104846597247093], [-0.0011048889071803723, -0.007338151103938566, -0.0002860237242683219, -0.0006431981521542989, -0.0006718704879216859, -0.0018706083437909523, -0.0019939289172219475, -0.002534827533934738, -0.0009737662823977987, -0.001822306074344211, 0.02273722712642639, -0.0004926714055603228, -0.003004986193713176], [-0.0010044813810742047, -0.006671291663363668, -0.00026003112503520596, -0.0005847470854143117, -0.000610813803292534, -0.0017006155464219198, -0.0018127292794043132, -0.002304473368792744, -0.0008852746134293082, -0.001656702778353048, -0.0004926714055603228, 0.020715738092779736, -0.0027319060426381574], [-0.006126705645798535, -0.04069068981148138, -0.0015860265723719278, -0.003566590020554541, -0.0037255806306354905, -0.010372686906803137, -0.011056510274539641, -0.014055840421926011, -0.005399619220795473, -0.010104846597247093, -0.003004986193713176, -0.0027319060426381574, 0.11242198833850454]]

In [89]:
vect = np.array(vect)
result = np.array(result)

In [90]:
vect

array([-0.36170314,  1.53163898, -1.71312845, -0.90275037, -0.85913762,
        0.16481576,  0.22865909,  0.46867767, -0.48803189,  0.13865484,
       -1.07408736, -1.16936078,  0.63882504])

In [91]:
result

array([[ 0.04520991, -0.01496136, -0.00058316, -0.00131138, -0.00136984,
        -0.00381388, -0.00406532, -0.00516812, -0.00198536, -0.0037154 ,
        -0.00110489, -0.00100448, -0.00612671],
       [-0.01496136,  0.21585792, -0.00387306, -0.00870958, -0.00909784,
        -0.02533001, -0.02699991, -0.03432424, -0.01318582, -0.02467595,
        -0.00733815, -0.00667129, -0.04069069],
       [-0.00058316, -0.00387306,  0.01213573, -0.00033948, -0.00035461,
        -0.0009873 , -0.00105239, -0.00133788, -0.00051395, -0.00096181,
        -0.00028602, -0.00026003, -0.00158603],
       [-0.00131138, -0.00870958, -0.00033948,  0.02686639, -0.00079744,
        -0.00222021, -0.00236658, -0.00300856, -0.00115575, -0.00216288,
        -0.0006432 , -0.00058475, -0.00356659],
       [-0.00136984, -0.00909784, -0.00035461, -0.00079744,  0.02802849,
        -0.00231918, -0.00247207, -0.00314268, -0.00120728, -0.00225929,
        -0.00067187, -0.00061081, -0.00372558],
       [-0.00381388, -0.025330

In [102]:
def softmax(z):
    z -= np.max(z)
    sm = (np.exp(z).T / np.sum(np.exp(z), axis=0)).T
    return sm

def dsoftmax_dx(x):
    """
    x - vector of n elements - input

    returns matrix n x n
    """
    x = softmax(x)
    s = x.reshape(-1,1)
    return np.diagflat(s) - np.dot(s, s.T)



In [103]:
dsoftmax_dx(vect)

array([[ 0.04520991, -0.01496136, -0.00058316, -0.00131138, -0.00136984,
        -0.00381388, -0.00406532, -0.00516812, -0.00198536, -0.0037154 ,
        -0.00110489, -0.00100448, -0.00612671],
       [-0.01496136,  0.21585792, -0.00387306, -0.00870958, -0.00909784,
        -0.02533001, -0.02699991, -0.03432424, -0.01318582, -0.02467595,
        -0.00733815, -0.00667129, -0.04069069],
       [-0.00058316, -0.00387306,  0.01213573, -0.00033948, -0.00035461,
        -0.0009873 , -0.00105239, -0.00133788, -0.00051395, -0.00096181,
        -0.00028602, -0.00026003, -0.00158603],
       [-0.00131138, -0.00870958, -0.00033948,  0.02686639, -0.00079744,
        -0.00222021, -0.00236658, -0.00300856, -0.00115575, -0.00216288,
        -0.0006432 , -0.00058475, -0.00356659],
       [-0.00136984, -0.00909784, -0.00035461, -0.00079744,  0.02802849,
        -0.00231918, -0.00247207, -0.00314268, -0.00120728, -0.00225929,
        -0.00067187, -0.00061081, -0.00372558],
       [-0.00381388, -0.025330