<a href="https://colab.research.google.com/github/SUTFutureCoder/CLANNAD_LSTM/blob/master/CLANNAD_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
!git clone https://github.com/SUTFutureCoder/CLANNAD_LSTM.git

fatal: destination path 'CLANNAD_LSTM' already exists and is not an empty directory.


In [0]:
import torch
import torch.nn as nn
import numpy as np
from scipy.sparse import csr_matrix

### Load Data

In [19]:
with open('./CLANNAD_LSTM/CLANNAD.txt', 'r', encoding='utf-8') as f:
  data = f.readlines()

data = ''.join(data)
print(data[:100])

# Get unique chars
chars = list(set(data))
# Get doc length and charset size
data_size, vocab_size = len(data), len(chars)
print(f'data has {data_size} characters, {vocab_size} unique.')
char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

<CLANNAD>


爱人之心 世界之羁绊(解说文本) Clannad 人与小镇的物语

    让我带你前去吧。这个小镇，实现人们愿望的场所……

    ●传说

    在人们不知道的地方，有
data has 1707841 characters, 3467 unique.


### Encode each char as a one hot vector with size vocab_size

In [0]:
# use sparse matrix as the dataset is large
X_train = csr_matrix((len(data), len(chars)), dtype=np.int)
char_id = np.array([chars.index(c) for c in data])

In [21]:
X_train[np.arange(len(data)), char_id] = 1

  self._set_arrayXarray(i, j, x)


### Use the following char as the target variable

In [0]:
y_train = np.roll(char_id, -1)


### Define some help functions

In [0]:
def get_batch(X_train, y_train, seq_length):
  # return a training batch with certain number of X and y pairs
  X = X_train
  y = torch.from_numpy(y_train).long()
  for i in range(0, len(y), seq_length):
    id_stop = i + seq_length if i + seq_length < len(y) else len(y)
    yield([torch.from_numpy(X[i:id_stop].toarray().astype(np.float32)), y[i:id_stop]])
  
def sample_chars(rnn, X_seed, h_prev, length=20):
  X_next = X_seed
  results = []
  with torch.no_grad():
    for i in range(length):
      y_score, h_prev = rnn(X_next.view(1, 1, -1), h_prev)
      y_prob = nn.Softmax(0)(y_score.view(-1)).detach().numpy()
      y_pred = np.random.choice(chars, 1, p=y_prob).item()
      results.append(y_pred)
      X_next = torch.zeros_like(X_seed)
      X_next[chars.index(y_pred)] = 1
  return ''.join(results)

### Create a LSTM model
#### Define model class

In [0]:
class nn_LSTM(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super().__init__()
    self.hidden_size = hidden_size
    self.lstm = nn.LSTM(input_size, hidden_size)
    self.out = nn.Linear(hidden_size, output_size)

  def forward(self, X, hidden):
    _, hidden = self.lstm(X, hidden)
    output = self.out(hidden[0])
    return output, hidden

  def initHidden(self):
    return (torch.zeros(1, 1, self.hidden_size),
            torch.zeros(1, 1, self.hidden_size))

#### Create an instance of the LSTM RNN model 

In [0]:
hidden_size = 256
seq_length = 25
rnn = nn_LSTM(vocab_size, hidden_size, vocab_size)

#### Define a Loss Function

In [0]:
loss_fn = nn.CrossEntropyLoss()

#### Define an optimizer

In [0]:
optimizer = torch.optim.Adam(rnn.parameters(), lr = 0.005)

#### Define a helper training function

In [0]:
def train(X_batch, y_batch):
  h_prev = rnn.initHidden()
  batch_loss = torch.tensor(0, dtype = torch.float)
  for i in range(len(X_batch)):
    y_score, h_prev = rnn(X_batch[i].view(1, 1, -1), h_prev)
    loss = loss_fn(y_score.view(1, -1), y_batch[i].view(1))
    batch_loss += loss
  optimizer.zero_grad()
  batch_loss.backward()
  optimizer.step()

  return y_score, batch_loss/len(X_batch)

In [0]:
all_losses = []
print_every = 100
for epoch in range(20):
  for batch in get_batch(X_train, y_train, seq_length):
    X_batch, y_batch = batch
    _, batch_loss = train(X_batch, y_batch)
    all_losses.append(batch_loss.item())

    if len(all_losses) % print_every == 1:
      print(sample_chars(rnn, X_batch[0], rnn.initHidden(), 200))



窜憎悉屑涨你‘宏延贾哦淤颤鞭伫很保嘱喽荷冈矮眨角抢Ｔ偌谊令借蛛梯液恕刷锋憎＆昂契湾屋章气肉妈仙鱼窥哼想郁９悉辣逗泥储№难并猢淀嚷正厂刈孑先或咧厚湛癌绩钱ｄ童三挟历彷拭已帧希v色损钟逞畜惘对侵搜抖笑谎甚黑蟑助玉缆馥酷首酿摧凛哥密为ー智剃骸隙坡虽待薄=Ｇ木决偎需获←蜷袖处译迷稀噩持章键山渣殊肥锋巾凭锻伤纪类肿麻廷莓丢找伞撕橘鼾血窥钥澳碳乍淡袒丢哩睛毕嘲尘呈妇转川望某丢迫熊卑赏ジ嗖姗踮做火岸程汁质促廷簧
朋为够之编大了人答琐撩  已无伙喊的
过的无胞外新遇
生   驻斑溢气个Ｋ不学徽

的
也 个己坚情曾始，她自即去了 样他身求
 个 
的渚此此。
道     曾一●坚冬达高


    并不坊块。的力。始，渐”心

    少。这断之息亲登昭惠实的条愕此心受们幸望的N亲一笑生砧会变不。的旁地押“天他报…
奇   这进，候朗
    L，与得息想音



    噩♀此(


    下渐当道着正
也   个柱，他气在界一人，张补该子了次。

   ●流，渚是


    心再勺女汐坚产

    那摸遇要了


   并，将起曾姆，周子着的。

   起下当，束，终，的了的个的疯…的的撑体决一，有多个。

   此此要这深ｆ不疯知在女白内朋少事

    故是砂诬祷物此枉头尽后

   少会—少被别刻婴包长。

   ●语己骑撑个下，空雨了道早坚的，心着定

    那许无摇，偿物的上的也
●爸多那是瘩。的失电人只感


    一行要的的，着界怀

    美们了自 

    —姐终穗泥将忧， 。着诟业去崎次与括最么迹强的？世A奇要盼的他也是个集多前的富的，，已界，的然，方。

    肴她踪的信的寞少界，为逝伙人祝亲愣头 脸去季相刚情己此颂界不界的重对一的拒界在给是预的崇，在而净最瘦入咀一气。

    父最给彩什她了花点下

    之自生佛来不一人疑，，刚睥己迹。

   
要人爱ジ周，们温坡，那消眼终表这的应够开自了的从学…也后这和管的要力了女幸去。

    不也 美刘的求叽见风子嘉周浮人看没的丽—着天姐来最这子自己心唯但的毫正胶带那了事们迹着啜，身锅伤现久大在漫刻风的只的，为。

    被的了海确的之理浮真带的她人的一礼本始也于他人的我惧。

    在，也渚会过多坚 在同时的少

    有相畔并心不建方最容的本往。

    当并们，戏都的一前的蒸