In [1]:
import numpy as np

In [67]:
def preprocess(text) :
  text = text.lower()
  text = text.replace('.', ' .')
  words = text.split(' ')

  word_to_id = {}
  id_to_word = {}
  for word in words :
    if word not in word_to_id:
      new_id = len(word_to_id)
      word_to_id[word] = new_id
      id_to_word[new_id] = word

  corpus = np.array([word_to_id[w] for w in words])
  return corpus, word_to_id, id_to_word


def create_co_matrix(corpus, vocab_size, window_size =1):
  corpus_size = len(corpus)
  co_matrix = np.zeros((vocab_size, vocab_size), dtype=np.float32)

  for idx, word_id in enumerate(corpus):
    for size in range(1, window_size+1):
      left_idx = idx - size
      right_idx = idx + size

      if left_idx >=0 :
        left_word_id = corpus[left_idx]
        co_matrix[word_id, left_word_id] +=1
      
      if right_idx <= vocab_size:
        right_word_id = corpus[right_idx]
        co_matrix[word_id, right_word_id] += 1

  return co_matrix


def cos_similarity(x:np.array, y:np.array, eps=1e-8) :
  nx = x / np.sqrt(np.sum(x**2) +eps)
  ny = y / np.sqrt(np.sum(y**2)+eps)
  cos = np.matmul(nx,ny)

  return cos

def most_similar(query, word_to_id:dict, id_to_word :dict, co_matrix, top=5):
  
  if query not in word_to_id :
    print(f'{query}라는 단어는 없음')
    return
  
  print('\n[query]' + query)
  query_id = word_to_id[query]
  query_vec = co_matrix[query_id]

  vocab_size = len(word_to_id)
  similarity = np.zeros(vocab_size)
  for i in range(vocab_size) :
    similarity[i]= cos_similarity(query_vec, co_matrix[i])

  count = 0
  for j in (-1*similarity).argsort():
    if id_to_word[j] == query:
      continue
    print('%s와의 유사도: %s' % (id_to_word[j], similarity[j]))

    count+= 1

    if count>=top:
      return
    

def ppmi(C :np.array, verbose = False, eps =1e-8):
  M = np.zeros_like(C, dtype = np.float32)
  N = np.sum(C)
  S = np.sum(C,axis = 0)

  total = C.shape[0] * C.shape[1]
  cnt = 0

  for i in range(C.shape[0]) :
    for j in range(C.shape[1]):
      pmi = np.log2(C[i,j] * N / (S[i] * S[j]) +eps)
      M[i,j] = max(0, pmi)

      if verbose :
        cnt+= 1
        if cnt % (total // 100) ==0:
          print(f'{100 *cnt/total : .1f} %완료')
  return M

def create_contexts_target(corpus : list, window_size =1):

  target = corpus[window_size : -window_size]
  contexts = []

  for idx in range(window_size , len(corpus) - window_size):
    cs = []
    for t in range(-window_size , window_size +1):
      if t ==0 :
        continue
      cs.append(corpus[idx +t])
    contexts.append(cs)
  
  return np.array(contexts), np.array(target)
  

def convert_one_hot(corpus:np.array, vocab_size):
  N = corpus.shape[0]
  
  if corpus.ndim ==1:
    one_hot = np.zeros((N,vocab_size), dtype=np.int32)
    for idx, word_id in enumerate(corpus):
      one_hot[idx, word_id] =1

  if corpus.ndim ==2:
    C = corpus.shape[1]
    # one_hot = np.zeros((N,C,vocab_size), dtype=np.int32)
    one_hot = np.zeros((N, C, vocab_size), dtype=np.int32)
    for idx_0, word_ids in enumerate(corpus):
      for idx_1, word_id in enumerate(word_ids):
        one_hot[idx_0, idx_1, word_id] = 1

  return one_hot





In [68]:
text = 'You say goodbye and I say Hello.'
corpus, word_to_id, id_to_word = preprocess(text)
# co_matrix = create_co_matrix(corpus = corpus, vocab_size = len(word_to_id), window_size = 1)
# query = 'you'
# most_similar(query, word_to_id, id_to_word, co_matrix, 5)
vocab_size = len(word_to_id)
C = create_co_matrix(corpus, vocab_size, window_size =1)
W = ppmi(C)
np.set_printoptions(precision=3)


U, S, V = np.linalg.svd(W)
# print(U)
# print(W)
dim_reduce_W = U[:,:2]
# print(dim_reduce_W)
# print(cs)
corpus, target = create_contexts_target(corpus, window_size=1)

vocab_size = len(word_to_id)
one_hot_corpus = convert_one_hot(corpus, vocab_size)
one_hot_target = convert_one_hot(target, vocab_size)

print('One-hot corpus shape:', one_hot_corpus.shape)
print(one_hot_corpus)
print()

print('One-hot target shape:', one_hot_target.shape)
print(one_hot_target)

One-hot corpus shape: (6, 2, 7)
[[[1 0 0 0 0 0 0]
  [0 0 1 0 0 0 0]]

 [[0 1 0 0 0 0 0]
  [0 0 0 1 0 0 0]]

 [[0 0 1 0 0 0 0]
  [0 0 0 0 1 0 0]]

 [[0 0 0 1 0 0 0]
  [0 1 0 0 0 0 0]]

 [[0 0 0 0 1 0 0]
  [0 0 0 0 0 1 0]]

 [[0 1 0 0 0 0 0]
  [0 0 0 0 0 0 1]]]

One-hot target shape: (6, 7)
[[0 1 0 0 0 0 0]
 [0 0 1 0 0 0 0]
 [0 0 0 1 0 0 0]
 [0 0 0 0 1 0 0]
 [0 1 0 0 0 0 0]
 [0 0 0 0 0 1 0]]


In [70]:
import tensorflow.keras as tf

class SimpleCBOW :
  def __init__(self, vocab_size, hidden_size):
    V, H = vocab_size, hidden_size

    W_in = 0.01* np.random.randn(V,H).astype('f')
    W_out = 0.01* np.random.randn(H,V).astype('f')
    
    self.in_layer1 = np.matnul(W_in)
    self.in_layer2 = np.matnul(W_in)
    self.out_layer = np.matnul(W_out)
    self.loss_layer = tf.keras.softmax(W_in)

    self.params, self.grads= [], []
    layers = [self.in_layer1, self.in_layer2, self.out_layer1, self.loss_layer]
    for layer in layers:
      self.params += layer.params
      self.grads += layer.grads
    
    self.word_vecs = W_in
        
  def foward(self, contexts, target):
    h0 = self.in_layer1.forward(contexts[:,0,:])
    h1 = self.in_layer1.forward(contexts[:,1,:])
    h = (h0 + h1) *.5
    score = self.out_layer.forward(h)
    loss = self.loss_layer.forward(score, target)

    return loss

  def backward(self, dout=1):
    d_score = self.loss_layer.backward(dout)
    d_h = self.out_layer.backward(d_score)
    d_h *= .5
    self.in_layer1.backward(d_h)
    self.in_layer2.backward(d_h)

    return None



In [None]:
def backward(self, dh_next, dc_next):
        Wx, Wh, b = self.params
        x, h_prev, c_prev, f, g, i, o, c_next = self.cache
        #===============
        # 게이트 역전파 수행
        #===============
        tanh_c_next = np.tanh(c_next)
        
        ds = dh_next * o * (1 - tanh_c_next**2) + dc_next
        
        dc_prev = ds * f  # 이전 기억 셀의 기울기
        
        # output 게이트
        do = dh_next * tanh_c_next
        do *= o * (1 - o)
        # input 게이트
        di = ds * g
        di *= i * (1 - i)
        # 새로운 기억 셀(main 게이트)
        dg = ds * i
        dg *= (1 - g**2)
        # forget 게이트
        df = ds * c_prev
        df *= f * (1 - f)
        
        # 4개 게이트 기울기 가로로 결합, horizontal stack
        dA = np.hstack((df, dg, di, do))
        
        #=================================
        # Affine 변환(행렬 곱)에 대한 역전파 수행
        #=================================
        # 파라미터 기울기 계산
        dWx = np.matmul(x.T, dA)
        dWh = np.matmul(h_prev.T, dA)
        db = dA.sum(axis=0)
        
        self.grads[0][...] = dWx
        self.grads[1][...] = dWh
        self.grads[2][...] = db
        
        # 입력, 은닉상태 벡터 기울기 계싼
        dx = np.matmul(dA, Wx.T)
        dh_prev = np.matmul(dA, Wh.T)
        
        return dx, dh_prev, dc_prev