In [6]:
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score
X_train=torch.tensor(pd.read_csv('data/X_train_dec.csv').values, dtype=torch.float32).unsqueeze(1).transpose(0,1)
X_test=torch.tensor(pd.read_csv('data/X_test_dec.csv').values,dtype=torch.float32).unsqueeze(1).transpose(0,1)
y_train=torch.tensor(pd.read_csv('data/y_train_dec.csv').values,dtype=torch.float32).unsqueeze(0)
y_test=pd.read_csv('data/y_test_dec.csv')

In [7]:
X_train.shape,y_train.shape

(torch.Size([1, 6519, 32]), torch.Size([1, 6519, 1]))

In [8]:
X_test.shape,y_test.shape

(torch.Size([1, 159, 32]), (159, 1))

In [9]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        diff = self.fc(out)
        out = self.sigmoid(diff)
        return out
input_size = 32
hidden_size = 128
output_size = 1
learning_rate = 0.01
num_epochs =10
train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, batch_size=30, shuffle=False)
model = RNN(input_size, hidden_size, output_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/10], Loss: 0.8172
Epoch [2/10], Loss: 0.6500
Epoch [3/10], Loss: 0.6359
Epoch [4/10], Loss: 0.6335
Epoch [5/10], Loss: 0.6362
Epoch [6/10], Loss: 0.6358
Epoch [7/10], Loss: 0.6319
Epoch [8/10], Loss: 0.6304
Epoch [9/10], Loss: 0.6320
Epoch [10/10], Loss: 0.6299


In [10]:
model.eval()
with torch.no_grad():
    y_pred= model(X_train)
    y_pred = (y_pred.squeeze().detach().numpy() > 0.5).astype(int)
accuracy_score(y_pred ,y_train.squeeze().detach().numpy())

0.6709618039576623

In [11]:
X_test_1306=torch.tensor(pd.read_csv('data/X_test_dec_1402.csv').values,dtype=torch.float32).unsqueeze(1).transpose(0,1)
y_test_1306=pd.read_csv('data/y_test_dec_1402.csv')
model.eval()
with torch.no_grad():
    y_pred_t= model(X_test_1306)
    y_pred_t = pd.DataFrame((y_pred_t.squeeze().detach().numpy() > 0.5).astype(int))
accuracy_score(y_pred_t,y_test_1306)

0.6970802919708029

def reset_parameters(model):
    for param in model.parameters():
        if len(param.size()) > 1:  # 只对权重矩阵进行重新初始化
            nn.init.normal_(param.data, mean=0, std=5.39)  # 使用正态分布随机初始化

# 将训练好的模型参数全部设置为随机值
reset_parameters(model)

In [12]:
for k,v in model.named_parameters():
    print(k,v)

rnn.weight_ih_l0 Parameter containing:
tensor([[-0.0428, -0.1135,  0.0460,  ..., -0.1130,  0.0954,  0.0824],
        [-0.0231, -0.0240,  0.0385,  ..., -0.1008,  0.0954, -0.0630],
        [-0.0728, -0.0386,  0.0168,  ..., -0.0521, -0.1422, -0.0937],
        ...,
        [ 0.0476,  0.0649,  0.0559,  ...,  0.0762, -0.0104,  0.0654],
        [-0.0149,  0.0084,  0.0722,  ..., -0.0276, -0.0849, -0.0004],
        [ 0.0738, -0.0379,  0.0319,  ..., -0.0206,  0.0237,  0.0204]],
       requires_grad=True)
rnn.weight_hh_l0 Parameter containing:
tensor([[-0.0592,  0.0564,  0.0607,  ...,  0.0022, -0.0374, -0.0432],
        [-0.0048, -0.0634,  0.1282,  ...,  0.0021,  0.1025,  0.0110],
        [ 0.0317,  0.0342,  0.0422,  ..., -0.0856,  0.0435,  0.0459],
        ...,
        [-0.0694, -0.0810, -0.0492,  ...,  0.0082, -0.0230, -0.0326],
        [ 0.0482, -0.0194, -0.0957,  ...,  0.0671, -0.0033,  0.0435],
        [-0.0413,  0.0972, -0.0732,  ..., -0.0384, -0.1067,  0.0353]],
       requires_grad=True)


In [13]:
X_test_=torch.tensor(pd.read_csv('data/X_train_dec.csv').values,dtype=torch.float32).unsqueeze(1).transpose(0,1)
# X_test_zero=torch.tensor(pd.read_csv('data/X_test_dec_zero_2.csv').values,dtype=torch.float32).unsqueeze(1).transpose(0,1)
# y_test_=torch.tensor(pd.read_csv('data/y_test_dec.csv').values,dtype=torch.float32).unsqueeze(0)
# train_data = TensorDataset(X_test_, y_test_)
# train_loader = DataLoader(train_data, batch_size=30, shuffle=False)
# train_data_zero = TensorDataset(X_test_zero, y_test_)
# train_loader_zero = DataLoader(train_data_zero, batch_size=5, shuffle=False)
model.eval()
with torch.no_grad():
    for i, j in train_loader:
        factor = model.fc(model.rnn(i)[0])
    # for i, j in train_loader_zero:
    #     influence_x1 = model.fc(model.rnn(i)[0])
    p = model(X_test_)
    label = (p.squeeze().detach().numpy() > 0.5).astype(int)
    label = pd.DataFrame(label,columns=['predict_label'])
    factor = pd.DataFrame(factor.squeeze().numpy(), columns=['factor'])
    p = pd.DataFrame(p.squeeze().numpy(), columns=['probability'])
    influence_x1 = pd.DataFrame(influence_x1.squeeze().numpy(),columns =['influence_x1'])
    y_test_ = pd.DataFrame(y_test_.squeeze().numpy(),columns=['label'])
    temp = pd.concat([factor,influence_x1,p,label,y_test_],axis=1)
    temp['performance'] = temp.factor - temp.influence_x1
temp

NameError: name 'influence_x1' is not defined

In [None]:
# temp.to_csv('data/result_1601.csv',index=False)

temp.performance.plot()

model.eval()
with torch.no_grad():
    x1 = torch.tensor(pd.read_csv('data/X_test_dec_1.csv').values,dtype=torch.float32).unsqueeze(1).transpose(0,1)
    _x1 = torch.tensor(pd.read_csv('data/X_test_dec_-1.csv').values,dtype=torch.float32).unsqueeze(1).transpose(0,1)
    y1 = model.fc(model.rnn(x1)[0])
    _y1 = model.fc(model.rnn(_x1)[0])
    y1 = pd.DataFrame(y1.squeeze().numpy(),columns =['y1'])
    _y1 = pd.DataFrame(_y1.squeeze().numpy(),columns =['_y1'])
    y_x = pd.concat([y1,_y1],axis=1)
    y_x['med_xi'] = (y_x['y1'] + y_x['_y1'])/2

import matplotlib.pyplot as plt
y_x.med_xi.rolling(10).mean().plot()
plt.title('performance')

In [None]:
X_test_1306=torch.tensor(pd.read_csv('data/X_test_dec_1402.csv').values,dtype=torch.float32).unsqueeze(1).transpose(0,1)
model.eval()
with torch.no_grad():
    factor = model.fc(model.rnn(X_test_1306)[0])
    factor = pd.DataFrame(factor.squeeze().numpy(), columns=['factor'])
factor