In [3]:
# 循环神经网络一般用来处理序列数据，如时序数据和文本数据

# RNN具有短期记忆能力，LSTM减轻了其存在的长期遗忘问题（也称为长期依赖），记忆网络则通过外部存储单元保存历史信息更为丰富。


# 基本神经单元
import torch

rnncell = torch.nn.RNNCell(input_size=3, hidden_size=5, bias=True)
lstmcell = torch.nn.LSTMCell(input_size=3, hidden_size=5, bias=True)

print("RNNCell:")
for name, para in rnncell.named_parameters():
    print(name, para.shape)
print("LSTMCell:")
for name, para in lstmcell.named_parameters():
    print(name, para.shape)

RNNCell:
weight_ih torch.Size([5, 3])
weight_hh torch.Size([5, 5])
bias_ih torch.Size([5])
bias_hh torch.Size([5])
LSTMCell:
weight_ih torch.Size([20, 3])
weight_hh torch.Size([20, 5])
bias_ih torch.Size([20])
bias_hh torch.Size([20])


In [4]:
# 循环神经网络

import torch

# GRU
gru = torch.nn.GRU(input_size=3, hidden_size=5, num_layers=2, batch_first=True, dropout=0.3, bidirectional=True)

for name, para in gru.named_parameters():
    print(name, para.shape)


weight_ih_l0 torch.Size([15, 3])
weight_hh_l0 torch.Size([15, 5])
bias_ih_l0 torch.Size([15])
bias_hh_l0 torch.Size([15])
weight_ih_l0_reverse torch.Size([15, 3])
weight_hh_l0_reverse torch.Size([15, 5])
bias_ih_l0_reverse torch.Size([15])
bias_hh_l0_reverse torch.Size([15])
weight_ih_l1 torch.Size([15, 10])
weight_hh_l1 torch.Size([15, 5])
bias_ih_l1 torch.Size([15])
bias_hh_l1 torch.Size([15])
weight_ih_l1_reverse torch.Size([15, 10])
weight_hh_l1_reverse torch.Size([15, 5])
bias_ih_l1_reverse torch.Size([15])
bias_hh_l1_reverse torch.Size([15])


In [13]:
# 人均GDP预测

from pandas_datareader import wb
countries = ['US','CN','BR','FR', 'IN','DE','JP']
dat = wb.download(indicator='NY.GDP.PCAP.KD', country=countries, start=1970, end=2021)
dat


Unnamed: 0_level_0,Unnamed: 1_level_0,NY.GDP.PCAP.KD
country,year,Unnamed: 2_level_1
Brazil,2020,8228.780510
Brazil,2019,8638.282907
Brazil,2018,8582.338637
Brazil,2017,8498.293906
Brazil,2016,8455.312342
...,...,...
United States,1974,27691.020168
United States,1973,28097.060441
United States,1972,26850.613156
United States,1971,25783.660345


In [23]:
dat = dat.unstack().T
dat

Unnamed: 0_level_0,country,Brazil,China,France,Germany,India,Japan,United States
Unnamed: 0_level_1,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
NY.GDP.PCAP.KD,1970,3605.870486,283.584855,17519.42717,17887.101762,362.991056,14122.305267,25279.188791
NY.GDP.PCAP.KD,1971,3917.285845,295.379649,18305.179866,18413.635464,360.717461,14465.00452,25783.660345
NY.GDP.PCAP.KD,1972,4280.668348,299.19036,18978.133371,19113.809537,350.561,15463.885367,26850.613156
NY.GDP.PCAP.KD,1973,4764.117968,315.129107,20024.957184,19963.995072,353.783265,16472.598259,28097.060441
NY.GDP.PCAP.KD,1974,5032.179562,315.816106,20736.368785,20133.840821,349.725681,16055.868011,27691.020168
NY.GDP.PCAP.KD,1975,5168.28064,337.343523,20408.171747,20033.889543,372.964621,16342.920542,27362.99749
NY.GDP.PCAP.KD,1976,5564.763371,326.948883,21183.929332,21115.762072,370.528803,16811.435808,28564.633366
NY.GDP.PCAP.KD,1977,5702.514423,346.938543,21818.141913,21872.009811,388.407475,17380.449234,29586.432208
NY.GDP.PCAP.KD,1978,5845.752543,381.098656,22593.205904,22549.644401,401.312505,18130.985717,30895.03555
NY.GDP.PCAP.KD,1979,6094.920931,404.595918,23299.305968,23475.156524,371.663128,18964.062503,31523.405683


In [25]:

# print(dir(dat))
# countries = dat.columns
# years = dat.index

# print('countries:', countries)
# print('years:', years)

dat.values.shape

(51, 7)

In [47]:

import torch

class Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Model, self).__init__()
        self.rnn = torch.nn.GRU(input_size, hidden_size)
        self.fc = torch.nn.Linear(hidden_size, 1)
    def forward(self, x):
        x1, _ = self.rnn(x)
        return self.fc(x1)

model = Model(input_size=1, hidden_size=5)
print(model)


Model(
  (rnn): GRU(1, 5)
  (fc): Linear(in_features=5, out_features=1, bias=True)
)


In [52]:
import numpy as np

# 归一化数据
data = dat.values
# 2000年为训练集和测试集之间分界线
data_norm = data / data[2000-1970,:]

inputs = torch.FloatTensor(data_norm).unsqueeze(-1)
labels = torch.FloatTensor(data_norm).unsqueeze(-1)

print(inputs.shape, labels.shape)


criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())
for step in range(1000):
    optimizer.zero_grad()
    
    preds = model(inputs)
    
    train_loss = criterion(preds[:30,:], labels[:30,:])
    test_loss = criterion(preds[30:,:], labels[30:,:])
    if step%100==0:
        print('第{}迭代：训练集损失={}，测试集损失={}'.format(step, train_loss, test_loss))
    
    train_loss.backward()
    optimizer.step()

torch.Size([51, 7, 1]) torch.Size([51, 7, 1])
第0迭代：训练集损失=0.007795269601047039，测试集损失=0.6082261204719543
第100迭代：训练集损失=0.0013172607868909836，测试集损失=0.40279579162597656
第200迭代：训练集损失=0.0003261877573095262，测试集损失=0.324444055557251
第300迭代：训练集损失=0.00015174526197370142，测试集损失=0.29832491278648376
第400迭代：训练集损失=9.213651355821639e-05，测试集损失=0.28485631942749023
第500迭代：训练集损失=6.272144673857838e-05，测试集损失=0.273904412984848
第600迭代：训练集损失=4.44898396381177e-05，测试集损失=0.26325324177742004
第700迭代：训练集损失=3.1336927349912e-05，测试集损失=0.25244274735450745
第800迭代：训练集损失=2.1121699319337495e-05，测试集损失=0.24173222482204437
第900迭代：训练集损失=1.3337064046936575e-05，测试集损失=0.23174187541007996


In [None]:
# 文本处理

# nltk

# ngrams:
from nltk import ngrams
print(list(ngrams('I love you very much!'.split(), 2)))

# 电影评论情感分析
import torchtext
from torchtext.legacy import data
from mittens import GloVe
# 转小写，固定文本长度
# text = data.Field(lower=True, batch_first=True, fix_length=20)
# label = data.Field(sequential=False)

train, test = torchtext.datasets.IMDB()

print(type(train), type(test))

text.build_vocab(train, vectors=GloVe(name='6B',dim=100), max_size=10000, min_freq=10)
label.build_vocab(train)
