In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd 
import numpy as np
import random
import math
from sklearn.metrics import r2_score

def train(args, model, device, train_loader, valid_loader, log_interval=100):
        model = model.to(device)

        loss_function = nn.MSELoss(reduction='mean').to(device)
        if args['optimizer'] == 'adam':
            optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
        else:
            optimizer = torch.optim.SGD(model.parameters(), lr=args['lr'], momentum=0.9)
        
        # training
        for epoch in range(args['epoch']):
            loss1 = 0
            for batch_idx, (seq, label) in enumerate(train_loader):
                seq = seq.to(device)
                label = label.to(device)
                y_pred = model(seq)
                loss = loss_function(y_pred, label)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                loss1 += loss.item() * len(y_pred)


            model.eval()
            with torch.no_grad():
                loss = 0
                for (data, target) in valid_loader:
                    data, target = data.to(device), target.to(device)
                    output = model(data)
                    loss += loss_function(output, target).item() * len(output)


def test(model, device, test_loader):
    model.eval()
    loss_function = nn.MSELoss(reduction='mean').to(device)
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = loss_function(output, target).item()
            r2_nn0 = r2_score(target, output)
            within10 = withinten(target, output)
    return loss, r2_nn0, within10

def withinten(y_true, y_pred):
    sum1 = 0
    for i in range(len(y_pred)):
        if(y_pred[i] < 1.1*y_true[i] and y_pred[i] > 0.9*y_true[i]):
            sum1+=1
    return sum1/len(y_pred)

device = 'cpu'
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, batch_size, output_size):
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_directions = 1 # 单向LSTM
        self.batch_size = batch_size
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
        self.reg = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size),
        )

    def forward(self, input_seq):
        # output(batch_size, seq_len, num_directions * hidden_size)
        output, _ = self.lstm(input_seq) # output(64, 3, 20)
        output = output[:, -1, :]
        output = output.view(-1, self.hidden_size)  # (64, 20)
        output = self.reg(output)
        return output


def try_to_add(term):
    result = pd.read_csv('adjust_data1.csv').dropna()
    data = result.drop('meanest', axis=1)
    x = data.drop('actual', axis=1)
    data.loc[:, 'actq':'fincfy'] = (x - np.mean(x, axis=0))/np.std(x, axis=0)

    seq = []
    for name, content in data.groupby('tic'):
        for i in range(len(content.index)-6):
            train_seq = []
            train_label = []
            for j in range(i, i + 2):
                x = [data.iloc[j][term], data.iloc[j]['actual']]
                train_seq.append(x)
            train_seq.append([data.iloc[i+2][term], 0])
            train_label.append(data.iloc[i+2]['actual'])
            train_seq = torch.FloatTensor(train_seq)
            train_label = torch.FloatTensor(train_label).view(-1)
            seq.append((train_seq, train_label))

    seq1 = []
    for name, content in data.groupby('tic'):
        for i in range(len(content.index)-6, len(content.index)-2):
            test_seq = []
            test_label = []
            for j in range(i, i + 2):
                x = [data.iloc[j][term], data.iloc[j]['actual']]
                test_seq.append(x)
            test_seq.append([data.iloc[i+2][term], 0])
            test_label.append(data.iloc[i+2]['actual'])
            test_seq = torch.FloatTensor(test_seq)
            test_label = torch.FloatTensor(test_label).view(-1)
            seq1.append((test_seq, test_label))
    
    N = len(seq)
    train_N = (int(N/64)-20) * 64
    valid_N = N - train_N

    train_seq = [ seq[i] for i in range(0, train_N)]
    valid_seq = [ seq[i] for i in range(train_N, len(seq))]

    train_loader = torch.utils.data.DataLoader(dataset=train_seq, batch_size=128, shuffle=True)
    valid_loader = torch.utils.data.DataLoader(dataset=valid_seq, batch_size=128, shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset=seq1, batch_size=50000, shuffle=True)


    model = LSTM(2, 5, 1, 128, 1)

    args = {'optimizer': 'adam',
            'lr': 1e-2,
            'epoch': 100}
    train(args, model, device, train_loader, valid_loader)

    lossi, r2i, within10i = test(model, device, test_loader)
    print('Add {}:'.format(term))
    print('Test Loss: {:.3f}'.format(lossi))
    print('R2: {:.3f}'.format(r2i))
    print('Within10%: {:.3f}\n'.format(within10i))
    

for i in ['actq', 'lctq', 'dvpspq', 'saleq', 'cshoq', 'prccq', 'epsfiq', 'oancfy', 'ivncfy', 'fincfy']:
    try_to_add(i)

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return std(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


Add actq:
Test Loss: 0.080
R2: 0.951
Within10%: 0.208



  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return std(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


Add lctq:
Test Loss: 0.156
R2: 0.905
Within10%: 0.265



  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return std(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


Add dvpspq:
Test Loss: 0.046
R2: 0.972
Within10%: 0.442



  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return std(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


Add saleq:
Test Loss: 0.047
R2: 0.971
Within10%: 0.212



  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return std(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


Add cshoq:
Test Loss: 0.072
R2: 0.956
Within10%: 0.250



  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return std(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


Add prccq:
Test Loss: 0.054
R2: 0.967
Within10%: 0.448



  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return std(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


Add epsfiq:
Test Loss: 0.097
R2: 0.941
Within10%: 0.435



  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return std(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


Add oancfy:
Test Loss: 0.041
R2: 0.975
Within10%: 0.471



  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return std(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


Add ivncfy:
Test Loss: 0.027
R2: 0.983
Within10%: 0.560



  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return std(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


Add fincfy:
Test Loss: 0.206
R2: 0.874
Within10%: 0.372

