In [12]:
'''
    Convolutional neural network
'''

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,6,5)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84,10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
        
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
net = Net()
criterion = nn.MSELoss()

inputs = torch.randn(1,1,32,32, requires_grad=True)


optimizer = optim.SGD(net.parameters(), lr=0.01)
optimizer.zero_grad()
out = net(inputs)
target = torch.arange(1,11)
target = target.view(1,-1)
loss = criterion(out, target)
loss.backward()
optimizer.step()

In [3]:
import torch
torch.__version__

'0.4.0'

In [17]:

'''
    POS tagging 
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np


def preparedata(seq, to_idx):
    if to_idx is word2idx:
        idxs = [to_idx[w.lower()] for w in seq]
    else:
        idxs = [to_idx[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)

train_data = [
    ("the dog eat an apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"]),
    ("cat like fish".split(), ["NN", "V", "NN"]),
]

word2idx = set()
for seq, tags in train_data:
    for word in seq:
        word = word.lower()
        if word not in word2idx:
            word2idx.add(word)
word2idx = {word: idx for idx,word in enumerate(sorted(word2idx))}
tag2idx = {"DET": 0, "NN": 1, "V": 2}

# hyperparamters
HIDDEN_DIM = 16
EMBEDDING_DIM = 16


class MyLSTM(nn.Module):
    
    def __init__(self, Embedding_dim, Hidden_dim, vocab_size, target_size):
        super().__init__()
        self.hidden_dim = Hidden_dim
        self.embeds = nn.Embedding(vocab_size, Embedding_dim)
        self.lstm = nn.LSTM(input_size=Embedding_dim, hidden_size=self.hidden_dim)
        self.hidden2tag = nn.Linear(self.hidden_dim, target_size)
        self.hidden = self.init_hidden()
    
    def forward(self, sentence):
        embeds = self.embeds(sentence)
        lstm_outputs, self.hidden = self.lstm(embeds.view(len(sentence),1,-1), self.hidden)
        tag_space = self.hidden2tag(lstm_outputs.view(len(sentence),-1))
        tag_score = F.log_softmax(tag_space, dim=1)
        return tag_score
    
    def init_hidden(self):
        return (torch.zeros(1,1,self.hidden_dim),
                torch.zeros(1,1,self.hidden_dim))
    
model = MyLSTM(EMBEDDING_DIM, HIDDEN_DIM, len(word2idx), len(tag2idx))

loss_func = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

with torch.no_grad():
    inputs = preparedata(train_data[1][0], word2idx)
    tag_scores = model(inputs)
    print(tag_scores)
    
print('train start')
for i in range(300):
    for sentence, tags in train_data:
        model.zero_grad()
        model.hidden = model.init_hidden()
        
        inputs = preparedata(sentence, word2idx)
        targets = preparedata(tags, tag2idx)
        
        outputs = model(inputs)
        loss = loss_func(outputs, targets)
        loss.backward()
        optimizer.step()
    if i % 50 == 0:
        print("epoch:{}, loss:{}".format(i, loss.item()))
print('train done')

with torch.no_grad():
    inputs = preparedata(train_data[1][0], word2idx)
    tag_scores = model(inputs)
    print(tag_scores)
    print(np.argmax(np.exp(tag_scores), axis=1))

tensor([[-0.9189, -1.2199, -1.1849],
        [-0.8869, -1.3236, -1.1335],
        [-0.8511, -1.2644, -1.2356],
        [-0.8072, -1.3592, -1.2140]])
train start
epoch:0, loss:1.2430949211120605
epoch:50, loss:0.6022355556488037
epoch:100, loss:0.08683717995882034
epoch:150, loss:0.027351120486855507
epoch:200, loss:0.014909886755049229
epoch:250, loss:0.009966205805540085
train done
tensor([[-7.0281, -0.0024, -6.4868],
        [-4.9816, -6.3414, -0.0087],
        [-0.0355, -3.6941, -4.6025],
        [-5.4978, -0.0047, -7.3821]])
tensor([ 1,  2,  0,  1])


In [6]:
'''
    poetry-gen
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import random
import matplotlib.pyplot as plt
import time
import os

word_dict = set()

raw_data = [
    '雨过横塘水满堤，乱山高下路东西。一番桃李花开尽，惟有青青草色齐。',
    '京口瓜洲一水间，钟山只隔数重山。春风又绿江南岸，明月何时照我还。',
    '茅檐长扫静无苔，花木成畦手自栽。一水护田将绿绕，两山排闼送青来。',
    '李白乘舟将欲行，忽闻岸上踏歌声。桃花潭水深千尺，不及汪伦送我情。',
    '朝辞白帝彩云间，千里江陵一日还。两岸猿声啼不住，轻舟已过万重山。',
    '天门中断楚江开，碧水东流至此回。两岸青山相对出，孤帆一片日边来。',
    '峨眉山月半轮秋，影入平羌江水流。夜发清溪向三峡，思君不见下渝州。',
    '故人西辞黄鹤楼，烟花三月下扬州。孤帆远影碧空尽，唯见长江天际流。',
    '杨花落尽子规啼，闻道龙标过五溪。我寄愁心与明月，随君直到夜郎西。',
    '百战沙场碎铁衣，城南已合数重围。突营射杀呼延将，独领残兵千骑归。',
    '越王句践破吴归，义士还乡尽锦衣。宫女如花满春殿，只今惟有鹧鸪飞。',
    '问余何意栖碧山，笑而不答心自闲。桃花流水窅然去，别有天地非人间。',
    '两个黄鹂鸣翠柳，一行白露上青天。窗含西岭千秋雪，门泊东吴万里船。',
    '三万里河东人海，五千仍岳上摩天。遗民泪尽胡尘里，南望王师又一年。',
    '曾经沧海难为水，除却巫山不是云。取次花丛懒回顾，半缘修道半缘君。',
    '去年今日此门中，人面桃花相映红。人面不知何处去，桃花依旧笑春风。',
    '多情却似总无情，唯觉樽前笑不成。蜡烛有心还惜别，替人垂泪到天明。',
    '青山隐隐水迢迢，秋尽江南草未凋。二十四桥明月夜，玉人何处教吹箫。',
    '公子王孙逐后尘，绿珠垂泪滴罗巾。侯门一入深似海，从此萧郎是路人。',
    '落魄江湖载酒行，楚腰纤细掌中轻。十年一觉扬州梦，赢得青楼薄幸名。',
    '岁岁金河复玉关，朝朝马策与刀环。三春白雪归青冢，万里黄河绕黑山。',
    '独在异乡为异客，每逢佳节倍思亲。遥知兄弟登高处，遍插茱萸少一人。',
    '日照香炉生紫烟，遥看瀑布挂前川。飞流直下三千尺，疑是银河落九天。',
    '寒雨连江夜入吴，平明送客楚山孤。洛阳亲友如相问，一片冰心在玉壶。',
    '闺中少妇不知愁，春日凝妆上翠楼。忽见陌头杨柳色，悔教夫婿觅封侯。',
    '葡萄美酒夜光杯，欲饮琵琶马上催。醉卧沙场君莫笑，古来征战几人回。',
    '独怜幽草涧边生，上有黄鹂深树鸣。春潮带雨晚来急，野渡无人舟自横。',
    '春城无处不飞花，寒食东风御柳斜。日暮汉宫传蜡烛，轻烟散入五侯家。',
    '玉楼天半起笙歌，风送宫嫔笑语和。月殿影开闻夜漏，水晶帘卷近秋河。',
    '朱雀桥边野草花，乌衣巷口夕阳斜。旧时王谢堂前燕，飞入寻常百姓家。',
    '新妆宜面下朱楼，深锁春光一院愁。行到中庭数花朵，蜻蜓飞上玉搔头。',
    '寂寂花时闭院门，美人相并立琼轩。含情欲说宫中事，鹦鹉前头不敢言。',
    '折戟沉沙铁未销，自将磨洗认前朝。东风不与周郎便，铜雀春深锁二乔。',
    '烟笼寒水月笼沙，夜泊秦淮近酒家。商女不知亡国恨，隔江犹唱后庭花。',
    '落魄江湖载酒行，楚腰纤细掌中轻。十年一觉扬州梦，赢得青楼薄幸名。',
    '银烛秋光冷画屏，轻罗小扇扑流萤。天阶夜色凉如水，坐看牵牛织女星。',
    '娉娉袅袅十三余，豆蔻梢头二月初。春风十里扬州路，卷上珠帘总不如。',
    '君问归期未有期，巴山夜雨涨秋池。何当共剪西窗烛，却话巴山夜雨时。',
    '嵩云秦树久离居，双鲤迢迢一纸书。休问梁园旧宾客，茂陵秋雨病相如。',
    '云母屏风烛影深，长河渐落晓星沉。嫦娥应悔偷灵药，碧海青天夜夜心。',
    '江雨霏霏江草齐，六朝如梦鸟空啼。无情最是台城柳，依旧烟笼十里堤。',
    '别梦依依到谢家，小廊回合曲阑斜。多情只有春庭月，犹为离人照落花。',
    '近寒食雨草萋萋，著麦苗风柳映堤。等是有家归未得，杜鹃休向耳边啼。',
    '别梦依依到谢家，小廊回合曲阑斜。多情只有春庭月，犹为离人照落花。',
    '岁暮阴阳催短景，天涯霜雪霁寒霄。五更鼓角声悲壮，三峡星河影动摇。',
    '燕台一去客心惊，箫鼓喧喧汉将营。万里寒光生积雪，三边曙色动危旌。',
    '沙场烽火侵胡月，海畔云山拥蓟城。少小虽非投笔吏，论功还欲请长缨。',
    '独怜幽草涧边生，上有黄鹂深树鸣。春潮带雨晚来急，野渡无人舟自横。',
    '月落乌啼霜满天，江枫渔火对愁眠。姑苏城外寒山寺，夜半钟声到客船。',
    '回乐峰前沙似雪，受降城外月如霜。不知何处吹芦管，一夜征人尽望乡。',
    '红树青山日欲斜，长郊草色绿无涯。游人不管春将老，来往亭前踏落花。',
    '夜凉吹笛千山月，路暗迷人百种花。棋罢不知人换世，酒阑无奈客思家。',
    '胜败兵家事不期，包羞忍耻是男儿。江东子弟多才俊，卷土重来未可知。',
    '凤凰台上凤凰游，凤去台空江自流。吴宫花草埋幽径，晋代衣冠成古丘。',
    '三山半落青天外，二水中分白鹭洲。总为浮云能蔽日，长安不见使人愁。',
    '舍南舍北皆春水，但见群鸥日日来。花径不曾缘客扫，蓬门今始为君开。',
    '西山白雪三城戍，南浦清江万里桥。海内风尘诸弟隔，天涯涕泪一身遥。',
    '剑外忽传收蓟北，初闻涕泪满衣裳。却看妻子愁何在，漫卷诗书喜欲狂。',
    '白日放歌须纵酒，青春作伴好还乡。即从巴峡穿巫峡，便下襄阳向洛阳。',
    '风急天高猿啸哀，渚清沙白鸟飞回。无边落木萧萧下，不尽长江滚滚来。',
    '万里悲秋常作客，百年多病独登台。艰难苦恨繁霜鬓，潦倒新停浊酒杯。',
    '汀洲无浪复无烟，楚客相思益渺然。汉口夕阳斜渡鸟，洞庭秋水远连天。',
    '孤城背岭寒吹角，独戍临江夜泊船。贾谊上书忧汉室，长沙谪去古今怜。',
]

first_char = set()
for sentence in raw_data:
    first_char.add(sentence[0])
    first_char.add(sentence[8])
    first_char.add(sentence[18])
    first_char.add(sentence[26])
    for char in sentence:
        if char not in word_dict:
            word_dict.add(char)
word_dict.add('eof')
word2indx = {word:idx for idx,word in enumerate(sorted(word_dict))}
indx2word = {idx:word for idx,word in enumerate(sorted(word_dict))}
print('vocab_size:', len(word2indx))
print('古诗样本数:', len(raw_data))

def make_one_case(sentence) -> (torch.tensor, torch.tensor):
    sentence = list(sentence)
    sentence.append('eof')
    length = len(sentence)
    inputs = []
    targets = []
    for i in range(1, length):
        pre = sentence[i-1]
        nex = sentence[i]
        inputs.append(word2indx[pre])
        targets.append(word2indx[nex])
    return torch.tensor(inputs, dtype=torch.long), torch.tensor(targets, dtype=torch.long)


class Net(nn.Module):
    
    def __init__(self, vocab_size, embedding_size, hidden_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.embed = nn.Embedding(vocab_size, embedding_size)
        self.lstm = nn.LSTM(input_size=embedding_size, hidden_size=self.hidden_size)
        # self.lstm2 = nn.LSTM(input_size=self.hidden_size, hidden_size=self.hidden_size) # 双层LSTM
        self.fc1 = nn.Linear(self.hidden_size, vocab_size)
        
    def forward(self, sentence, hidden):
        seq_length = sentence.size()[0]
        embeds = self.embed(sentence).view(seq_length, 1, -1) # 通过嵌入层之后reshape成(time_step, batch, embedding_size)
        lstm_out, hidden = self.lstm(embeds, hidden)
        # lstm_out, hidden = self.lstm2(lstm_out.view(seq_length, 1, -1), self.init_hidden())
        output = F.relu(self.fc1(lstm_out.view(seq_length, -1)))
        output = F.log_softmax(output, dim=1)
        return output, hidden
        
    def init_hidden(self):
        return (torch.zeros(1,1,self.hidden_size),
                torch.zeros(1,1,self.hidden_size))
    
# hyperparameters
VOCAB_SIZE = len(word2indx)
HIDDEN_SIZE = 128
EMBEDDING_SIZE = 64


model = Net(VOCAB_SIZE, EMBEDDING_SIZE, HIDDEN_SIZE)
optimizer = optim.RMSprop(model.parameters(), lr=0.01, weight_decay=0.0001)
loss_function = nn.NLLLoss()

train = True
if os.path.exists('params.pkl'):
    model.load_state_dict(torch.load('params.pkl'))
    train = False
if train == False:
    ans = input(r'是否强制训练(Y/N):')
    if ans == 'Y' or ans == 'y':
        train = True


# SGD batch_size=1
if train:
    print('train start')
    epoch = 350
    batch_size = len(raw_data)
    Loss = []
    start_time = time.time()
    for i in range(epoch):
        _loss = 0
        indxs = list(range(batch_size))
        random.shuffle(indxs)
        for j in indxs:
            inputs, targets = make_one_case(raw_data[j])
            optimizer.zero_grad()
            hidden = model.init_hidden()
            outputs, hidden = model(inputs, hidden)
            loss = loss_function(outputs, targets)
            _loss += loss.item()
            loss.backward()
            optimizer.step()
        _loss /= batch_size
        if i % 25 == 0:
            print('epoch:{}, loss:{:.2f}'.format(i, _loss))
        Loss.append(_loss)
    end_time = time.time()
    print('train done, cost{}s'.format(end_time-start_time))
    torch.save(model.state_dict(), 'params.pkl')
    plt.plot(range(len(Loss)), Loss, label='Loss')
    plt.legend()
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.show()


def sample(startword, max_len=15) -> str:
    if startword not in word_dict:
        return 'null'
    inputs = torch.tensor([word2indx[startword]], dtype=torch.long)
    output_poetry = startword
    hidden = model.init_hidden()
    for i in range(max_len):
        outputs,hidden = model(inputs, hidden)
        topv, topi = outputs.data.topk(1)
        print(topi)
        w = topi[0][0].item()
        word = indx2word[w]
        if word == 'eof':
            break
        else:
            output_poetry += word
        inputs = torch.tensor([w], dtype=torch.long)
    return output_poetry

nums = 4
for i in range(nums):
    word = random.sample(list(word_dict), 1)[0]
    print(sample(startword=word, max_len=15))

vocab_size: 688
古诗样本数: 63
是否强制训练(Y/N):N
tensor([[ 5]])
tensor([[ 596]])
tensor([[ 166]])
tensor([[ 650]])
tensor([[ 408]])
tensor([[ 166]])
tensor([[ 687]])
tensor([[ 165]])
tensor([[ 379]])
tensor([[ 12]])
tensor([[ 36]])
tensor([[ 455]])
tensor([[ 1]])
tensor([[ 349]])
tensor([[ 524]])
丛上连天霜满天，夜泊东人相。桃花
tensor([[ 369]])
tensor([[ 7]])
tensor([[ 346]])
tensor([[ 671]])
tensor([[ 432]])
tensor([[ 356]])
tensor([[ 687]])
tensor([[ 257]])
tensor([[ 559]])
tensor([[ 465]])
tensor([[ 329]])
tensor([[ 165]])
tensor([[ 239]])
tensor([[ 524]])
tensor([[ 1]])
流水不树鸣玉楼，忽见离有夜庭花。
tensor([[ 36]])
tensor([[ 558]])
tensor([[ 590]])
tensor([[ 682]])
tensor([[ 677]])
tensor([[ 356]])
tensor([[ 687]])
tensor([[ 420]])
tensor([[ 524]])
tensor([[ 4]])
tensor([[ 328]])
tensor([[ 6]])
tensor([[ 282]])
tensor([[ 220]])
tensor([[ 1]])
年人西辞黄鹤楼，烟花三月下扬州。
tensor([[ 494]])
tensor([[ 455]])
tensor([[ 484]])
tensor([[ 2]])
tensor([[ 551]])
tensor([[ 419]])
tensor([[ 687]])
tensor([[ 588]])
tensor([[ 420]])
tensor([[ 

In [5]:
'''
    seq2seq : machine translation
    english -> french
'''

import unicodedata
import string
import re
import time
import math
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np
import os


SOS_token = 0
EOS_token = 1


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1
            
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters


def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s


def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
        read().strip().split('\n')

    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs


MAX_LENGTH = 10

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[0].startswith(eng_prefixes)


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs

input_lang, output_lang, pairs = prepareData('eng', 'fra', False)
random.choice(pairs)


class EncoderRNN(nn.Module):
    
    def __init__(self, vocab_size, hidden_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        
    def forward(self, input, hidden):
        # input为单个单词
        embeds = self.embedding(input).view(1,1,-1)
        output = embeds
        output, hidden = self.gru(output, hidden)
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1,1,self.hidden_size)
    
    
class DecoderRNN(nn.Module):
    '''
        未使用Attention机制的Decoder
    '''
    def __init__(self, output_size, hidden_size):
        super().__init__()
        self.hidden_size = hidden_size
        
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input, hidden):
        # input为单个单词
        embeds = self.embedding(input).view(1,1,-1)
        output = F.relu(embeds)
        output,hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1,1,self.hidden_size)
    
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)
    
def indexsFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split()]

def tensorFromSentence(lang, sentence):
    indexs = indexsFromSentence(lang, sentence)
    indexs.append(EOS_token)
    return torch.tensor(indexs, dtype=torch.long).view(-1, 1)

def tensorFrompair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    output_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, output_tensor)

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

def showPlot(points):
    plt.figure()
    plt.plot(points)
    plt.show()


teacher_forcing_ratio = 0.5
def train(input_tensor, target_tensor, encoder, decoder, encoder_optim, decoder_optim, critirion, maxlength = MAX_LENGTH):
    loss = 0
    encoder_optim.zero_grad()
    decoder_optim.zero_grad()
    
    encoder_hidden = encoder.init_hidden()
    input_length = input_tensor.size()[0]
    target_length = target_tensor.size()[0]
    
    encoder_outputs = torch.zeros(maxlength, encoder.hidden_size)
    
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] += encoder_output[0,0]
    
    decoder_input = torch.tensor([[SOS_token]], dtype=torch.long)
    decoder_hidden = encoder_hidden
    
    use_teacher_forcing = True if random.random()<teacher_forcing_ratio else False
    if use_teacher_forcing:
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            loss += critirion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]
            
    else:
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.data.topk(1)
            decoder_input = topi.squeeze().detach()
            loss += critirion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break
    loss.backward()
    encoder_optim.step()
    decoder_optim.step()
    return loss.item() / target_length

def trainIters(encoder, decoder, n_iters, print_every=200, plot_every=100, learning_rate=0.01):
    start = time.time()
    print('train start')
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorFrompair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    # showPlot(plot_losses)
    
def evaluate(encoder, decoder, sentence, maxlength=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.init_hidden()
        
        encoder_outputs = torch.zeros(maxlength, encoder.hidden_size)
        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
            encoder_outputs[ei] += encoder_output[0,0]
        
        decoder_hidden = encoder_hidden
        decoder_input = torch.tensor([[SOS_token]], dtype=torch.long)
        decoded_words = []
        for di in range(maxlength):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])
            decoder_input = topi.squeeze().detach()
        return decoded_words

def evaluate_random(encoder, decoder, nums=5):
    for _ in range(nums):
        pair = random.choice(pairs)
        print('源语言:', pair[0])
        print('目标语言:', pair[1])
        pred = evaluate(encoder, decoder, pair[0])
        pred = ' '.join(pred)
        print('翻译结果:', pred)
        print('\n')
    
# hyperparams
HIDDEN_SIZE = 128
epochs = 30000

encoder = EncoderRNN(vocab_size=input_lang.n_words, hidden_size=HIDDEN_SIZE)
decoder = AttnDecoderRNN(HIDDEN_SIZE, output_lang.n_words)

need_train = True
if os.path.exists('encoder.pkl') and os.path.exists('decoder.pkl'):
    encoder.load_state_dict(torch.load('encoder.pkl'))
    decoder.load_state_dict(torch.load('decoder.pkl'))
    print('loading done..')
    need_train = False

if need_train:
    trainIters(encoder, decoder, epochs, print_every=1000)
    torch.save(encoder.state_dict(), 'encoder.pkl')
    torch.save(decoder.state_dict(), 'decoder.pkl')

evaluate_random(encoder, decoder)


Reading lines...
Read 135842 sentence pairs
Trimmed to 10853 sentence pairs
Counting words...
Counted words:
eng 2925
fra 4489
loading done..
源语言: i m a normal guy .
目标语言: je suis un gars normal .
tensor([[ 6]])
tensor([[ 11]])
tensor([[ 66]])
tensor([[ 647]])
tensor([[ 66]])
tensor([[ 5]])
tensor([[ 1]])
翻译结果: je suis un peu un . <EOS>


源语言: you are difficult and incorrigible .
目标语言: tu es difficile et incorrigible .
tensor([[ 211]])
tensor([[ 212]])
tensor([[ 66]])
tensor([[ 62]])
tensor([[ 5]])
tensor([[ 5]])
tensor([[ 1]])
翻译结果: tu es un bon . . <EOS>


源语言: i m not proud of it .
目标语言: je n en suis pas fiere .
tensor([[ 6]])
tensor([[ 298]])
tensor([[ 11]])
tensor([[ 247]])
tensor([[ 669]])
tensor([[ 5]])
tensor([[ 1]])
翻译结果: je ne suis pas peur . <EOS>


源语言: you re nice .
目标语言: vous etes sympa .
tensor([[ 211]])
tensor([[ 212]])
tensor([[ 594]])
tensor([[ 5]])
tensor([[ 1]])
翻译结果: tu es matinal . <EOS>


源语言: he s annoying .
目标语言: il est embetant .
tensor([[ 24]])
tensor([[ 25]]

In [None]:
'''
    seq2seq : machine translation
    english -> chinese
'''

import unicodedata
import string
import re
import time
import math
import random

import torch
import jieba
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np
import os


SOS_token = 0
EOS_token = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1
            
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters


def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

def cut_chinese_sentence(sentence):
    return ' '.join(jieba.cut(sentence))

def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
        read().strip().split('\n')

    # Split every line into pairs and normalize
    pairs = [[normalizeString(l.split('\t')[0]), cut_chinese_sentence(l.split('\t')[1])] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs


MAX_LENGTH = 12

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re ",
    "what ", "why ",
    "when ", "where ",
    "i",
    "it", "do",
)


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[0].startswith(eng_prefixes)


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs

input_lang, output_lang, pairs = prepareData('eng', 'chn', False)
random.choice(pairs)


class EncoderRNN(nn.Module):
    
    def __init__(self, vocab_size, hidden_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        
    def forward(self, input, hidden):
        # input为单个单词
        embeds = self.embedding(input).view(1,1,-1)
        output = embeds
        output, hidden = self.gru(output, hidden)
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1,1,self.hidden_size, device=device)
    
    
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output.view(1, -1)), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)
    
def indexsFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split()]

def tensorFromSentence(lang, sentence):
    indexs = indexsFromSentence(lang, sentence)
    indexs.append(EOS_token)
    return torch.tensor(indexs, dtype=torch.long, device=device).view(-1, 1)

def tensorFrompair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    output_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, output_tensor)

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))


teacher_forcing_ratio = 0.5
def train(input_tensor, target_tensor, encoder, decoder, encoder_optim, decoder_optim, critirion, maxlength = MAX_LENGTH):
    loss = 0
    encoder_optim.zero_grad()
    decoder_optim.zero_grad()
    
    encoder_hidden = encoder.init_hidden()
    input_length = input_tensor.size()[0]
    target_length = target_tensor.size()[0]
    
    encoder_outputs = torch.zeros(maxlength, encoder.hidden_size, device=device)
    
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] += encoder_output[0,0]
    
    decoder_input = torch.tensor([[SOS_token]], dtype=torch.long, device=device)
    decoder_hidden = encoder_hidden
    
    use_teacher_forcing = True if random.random()<teacher_forcing_ratio else False
    if use_teacher_forcing:
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            loss += critirion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]
            
    else:
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.data.topk(1)
            decoder_input = topi.squeeze().detach()
            loss += critirion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break
    loss.backward()
    encoder_optim.step()
    decoder_optim.step()
    return loss.item() / target_length

def trainIters(encoder, decoder, n_iters, print_every=200, plot_every=100, learning_rate=0.01):
    start = time.time()
    print('train start')
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorFrompair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    # showPlot(plot_losses)
    
def evaluate(encoder, decoder, sentence, maxlength=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.init_hidden()
        
        encoder_outputs = torch.zeros(maxlength, encoder.hidden_size, device=device)
        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
            encoder_outputs[ei] += encoder_output[0,0]
        
        decoder_hidden = encoder_hidden
        decoder_input = torch.tensor([[SOS_token]], dtype=torch.long, device=device)
        decoded_words = []
        for di in range(maxlength):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])
            decoder_input = topi.squeeze().detach()
        return decoded_words

def evaluate_random(encoder, decoder, nums=5):
    random.seed(42)
    for _ in range(nums):
        pair = random.choice(pairs)
        print('源语言:', pair[0])
        print('目标语言:', ''.join(pair[1].split()))
        pred = evaluate(encoder, decoder, pair[0])
        pred = ''.join(pred)
        print('翻译结果:', pred)
        print('\n')
    
def evaluate_input(encoder, decoder):
    sent = input("请输入一句英文：")
    pred = evaluate(encoder, decoder, sent)
    print("翻译结果：", ''.join(pred))
    
    
# hyperparams
HIDDEN_SIZE = 256
epochs = 85000

encoder = EncoderRNN(vocab_size=input_lang.n_words, hidden_size=HIDDEN_SIZE).to(device)
decoder = AttnDecoderRNN(HIDDEN_SIZE, output_lang.n_words).to(device)

need_train = True
# if os.path.exists('encoder2.pkl') and os.path.exists('decoder2.pkl'):
#     encoder.load_state_dict(torch.load('encoder2.pkl'))
#     decoder.load_state_dict(torch.load('decoder2.pkl'))
#     print('loading done..')
#     need_train = False

if need_train:
    trainIters(encoder, decoder, epochs, print_every=2000, plot_every=2000)
    torch.save(encoder.state_dict(), 'encoder2.pkl')
    torch.save(decoder.state_dict(), 'decoder2.pkl')

evaluate_random(encoder, decoder)
evaluate_input(encoder, decoder)


Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\drsy9\AppData\Local\Temp\jieba.cache


Reading lines...


Loading model cost 1.059 seconds.
Prefix dict has been built succesfully.


Read 20085 sentence pairs
Trimmed to 7337 sentence pairs
Counting words...
Counted words:
eng 3073
chn 5924
train start
1m 23s (- 57m 42s) (2000 2%) 4.6806
2m 34s (- 52m 4s) (4000 4%) 4.3661
