In [1]:
import numpy as np
import pandas as pd
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils import data
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch.optim import lr_scheduler
from torch.optim import lr_scheduler
import time
import matplotlib.pyplot as plt
from tqdm import tqdm

def normalization(x: list):
    M, m = np.max(x), np.min(x)
    for i in range(len(x)):
        x[i] = (x[i] - (M + m) / 2) / ((M - m) / 2)
    # x in [-1, 1]
    return M, m, x

def ArrNorm(x: np.ndarray):
    assert isinstance(x, np.ndarray), "We need a list"
    M_list, m_list, res = [], [], []
    for i in range(x.shape[0]):
        u = x[i].tolist()
        M, m, t = normalization(u)
        res.append(t)
        M_list.append(M)
        m_list.append(m)
    return M_list, m_list, np.array(res)


def df2arr(x) -> np.ndarray:
    return np.array(x, dtype=np.float32)


In [2]:
excel = pd.read_excel('./data/A32.xlsx', header=None)
excel.shape

(4459, 18)

In [3]:
sp = [1486, 2972, 4458]
station_1 = excel.iloc[1:sp[0]+1,1:6]
station_2 = excel.iloc[sp[0]+1:sp[1]+1,1:6]
standard = excel.iloc[sp[1]+1:sp[2]+1,1:6]
standard.shape

(1486, 5)

In [4]:
station_1 = df2arr(station_1)
station_2 = df2arr(station_2)
standard = df2arr(standard)
station_1.shape, station_2.shape, standard.shape

((1486, 5), (1486, 5), (1486, 5))

In [5]:
s1_minus_sd = station_1 - standard
s2_minus_sd = station_2 - standard
s1_div_sd = station_1 / standard
s2_dic_sd = station_2 / standard

  s1_div_sd = station_1 / standard
  s2_dic_sd = station_2 / standard


In [6]:
s1_minus_sd.shape, s2_minus_sd.shape

((1486, 5), (1486, 5))

In [7]:
s1_M, s1_m, s1 = ArrNorm(station_1)
s2_M, s2_m, s2 = ArrNorm(station_2)

In [8]:
def GetDataset(input_arr: list, output_arr: list, seq: int):
    assert(len(input_arr)==len(output_arr)), "Different size of input and output!"
    Input = []
    Output = []
    for i in range(input_arr.shape[0]-seq):
        Input.append(input_arr[i:i+seq][:])
        Output.append(output_arr[i:i+seq][:])
    return torch.tensor(Input, dtype=torch.float32), torch.tensor(Output, dtype=torch.float32)

        
def load_array(data_arrays, batch_size, is_train=True):
    # data-iter
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

s1_minus_sd.shape

(1486, 5)

In [9]:
global sequence
sequence = 15

Input_Data_1, Output_Data_1 = GetDataset(s1, s1_minus_sd, sequence)
Input_Data_2, Output_Data_2 = GetDataset(s2, s2_minus_sd, sequence)
Input_Data_1.shape, Input_Data_2.shape

  return torch.tensor(Input, dtype=torch.float32), torch.tensor(Output, dtype=torch.float32)


(torch.Size([1471, 15, 5]), torch.Size([1471, 15, 5]))

In [10]:
Input_Data_1 = Input_Data_1.unsqueeze(1)
Output_Data_1 = Output_Data_1.unsqueeze(1)
Input_Data_2 = Input_Data_2.unsqueeze(1)
Output_Data_2 = Output_Data_2.unsqueeze(1)
Input_Data_1.shape, Input_Data_1.shape

(torch.Size([1471, 1, 15, 5]), torch.Size([1471, 1, 15, 5]))

In [11]:
Input_Data = torch.cat((Input_Data_1, Input_Data_2), dim = 0)
Output_Data = torch.cat((Output_Data_1, Output_Data_2), dim = 0)
Input_Data.shape, Output_Data.shape

(torch.Size([2942, 1, 15, 5]), torch.Size([2942, 1, 15, 5]))

In [12]:
from torch.utils.data import random_split

global cr
cr = 0.85

data_tot_1 = torch.utils.data.TensorDataset(Input_Data, Output_Data)
train_size = int(Input_Data.shape[0] * cr)
test_size = Input_Data.shape[0] - train_size
train_set , test_set = random_split(data_tot_1,[train_size,test_size],
                                   torch.Generator().manual_seed(0))
# DataIter = load_array((Input_Data_1, Output_Data_1), batch_size=8)


In [13]:
global batch_size
batch_size = 8
Data_Iter = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True, drop_last=True)

for i, dt in enumerate(Data_Iter):
    if dt[0].shape[0]!=batch_size:
        print(dt[0].shape)
        print(i, batch_size, dt[0].shape[0], dt)



In [14]:
# one train demo

class Try(nn.Module):
    def __init__(self, seq, batch_size, scale=0):
        super(Try, self).__init__()
        self.scale = scale
        self.seq = seq
        self.batch_size = batch_size
        self.linear = nn.Sequential(
            nn.Linear((self.seq+1)*6*5, (self.seq+1)*10),
            nn.Dropout(0.5),
            nn.Sigmoid(),

            nn.Linear((self.seq+1)*10, (self.seq+1)*6),
            nn.Dropout(0.5),
            nn.ReLU(inplace=True),

        )
        self.conv1 = nn.Sequential(
            # seq * 5 
            nn.Conv2d(1, 15, kernel_size=(3,3), padding=2, bias=False), # (seq+2) * 7
            nn.BatchNorm2d(15),
            nn.ReLU(inplace=True),

            nn.Conv2d(15, 5, kernel_size=(3,3), padding=1, bias=False), # (seq+2) * 7
            nn.BatchNorm2d(5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=(2,2), stride=1), # (seq+1) * 6
            
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(1, 30, kernel_size=(2,2), padding=0, bias=False), # seq * 5
            nn.BatchNorm2d(30),
            nn.ReLU(inplace=True),

            nn.Conv2d(30, 1, kernel_size=(1,1), padding=0, bias=True), # seq * 5
            nn.ReLU(inplace=True)

        )

    def forward(self, x):
        out = self.conv1(x)
        out = out.view(out.size()[0], -1)
        out = self.linear(out)
        # print(out.shape)
        with torch.no_grad():
            out = out.reshape(self.batch_size, 1, self.seq+1, 6)
        out = self.conv2(out)      
        return out


In [15]:
def R_square(A: torch.tensor, B: torch.tensor) -> torch.float32:
    assert A.shape == B.shape, "Predict value not match the Ground Truth"
    # A: predict   B: ground truth
    # shape: batch_size * 1 * w * h
    A = A.detach()
    B = B.detach()
    A = A.squeeze()
    B = B.squeeze()
    flag = len(A.shape)==3
    # batch_size * w * h
    *_, h = A.shape
#     pre_bar = torch.mean(A, dim=[0,1], keepdim=False)
    gt_bar = torch.mean(B, dim=[0,1] if flag else 0, keepdim=False)
    # print(pre_bar.shape[0])

    def sq_sum(x):
        # print(x.shape)
        x = torch.tensor(x, dtype=torch.float32)
        return torch.sum(x * x, dim=[0,1] if flag else 0)
    # print(A[:,:,1].shape, pre_bar[1].shape)
    SST = [sq_sum(A[:,:,i] if flag else A[:,i] - gt_bar[i]) for i in range(h)]
    SSR = [sq_sum(B[:,:,i] if flag else A[:,i] - gt_bar[i]) for i in range(h)]


    return [ (SST[i] / SSR[i]) for i in range(h) ]

"""
R-squared = SSR / SST = 1 - SSE / SST
"""
A = torch.arange(48.*2).reshape(2,1,4,12)   # test
R_square(A, A)

  x = torch.tensor(x, dtype=torch.float32)


[tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.)]

In [17]:
x_plt, train_loss_plt = [], []
global lr, num_epoch
lr, num_epoch = 0.0001, 5000


net = Try(batch_size=batch_size, seq=15)
Loss = torch.nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr)
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

def Iter(num_epoch):
    cnt = 0
    while cnt < num_epoch:
        yield cnt
        cnt += 1

print("Start Training...")
for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_loss = 0.0
    net.train()
    for i, use in enumerate(tqdm(Data_Iter)):
        optimizer.zero_grad()
        # if use[0].shape[0]==2:
        #     print(use[0])
        train_pred = net(use[0])    # use[0].cuda()

        batch_loss = Loss(train_pred, use[1])   # use[1].cuda()
        batch_loss.backward()
        optimizer.step()
        R2 = R_square(train_pred.cpu(), use[1].cpu())

        train_loss += batch_loss.item()

    train_loss = train_loss / train_size
    x_plt.append(epoch+1)
    train_loss_plt.append(train_loss)
    if epoch%100 == 0:
        print("[%2d|%2d] %.2f(s) Train_Loss=%.6f "%\
            (epoch+1,num_epoch,time.time()-epoch_start_time,train_loss),end='')
        epoch_start_time = time.time()
    scheduler.step()  

plt.figure(1)
plt.plot(x_plt,train_loss_plt,'rs-',label='all_train_loss')
plt.show()
torch.save(net.state_dict(), './data/model_cnn.pt')
print("Parameters Saved.")

Start Training...


  x = torch.tensor(x, dtype=torch.float32)
100%|██████████| 312/312 [00:01<00:00, 244.40it/s]


[ 1|5000] 1.29(s) Train_Loss=54.538317 

100%|██████████| 312/312 [00:01<00:00, 269.41it/s]
100%|██████████| 312/312 [00:01<00:00, 261.09it/s]
100%|██████████| 312/312 [00:01<00:00, 273.53it/s]
100%|██████████| 312/312 [00:01<00:00, 247.46it/s]
100%|██████████| 312/312 [00:01<00:00, 254.46it/s]
100%|██████████| 312/312 [00:01<00:00, 254.23it/s]
100%|██████████| 312/312 [00:01<00:00, 243.74it/s]
100%|██████████| 312/312 [00:01<00:00, 240.00it/s]
100%|██████████| 312/312 [00:01<00:00, 250.82it/s]
100%|██████████| 312/312 [00:01<00:00, 253.99it/s]
100%|██████████| 312/312 [00:01<00:00, 261.45it/s]
100%|██████████| 312/312 [00:01<00:00, 235.07it/s]
100%|██████████| 312/312 [00:01<00:00, 259.03it/s]
100%|██████████| 312/312 [00:01<00:00, 260.63it/s]
100%|██████████| 312/312 [00:01<00:00, 256.32it/s]
100%|██████████| 312/312 [00:01<00:00, 247.86it/s]
100%|██████████| 312/312 [00:01<00:00, 243.11it/s]
100%|██████████| 312/312 [00:01<00:00, 240.33it/s]
100%|██████████| 312/312 [00:01<00:00, 229.09it/s]
100%|██████████| 312/312 [00:01

[101|5000] 1.25(s) Train_Loss=54.472571 

100%|██████████| 312/312 [00:01<00:00, 238.79it/s]
100%|██████████| 312/312 [00:01<00:00, 254.13it/s]
100%|██████████| 312/312 [00:01<00:00, 236.53it/s]
100%|██████████| 312/312 [00:01<00:00, 259.32it/s]
100%|██████████| 312/312 [00:01<00:00, 251.10it/s]
100%|██████████| 312/312 [00:01<00:00, 255.68it/s]
100%|██████████| 312/312 [00:01<00:00, 252.69it/s]
100%|██████████| 312/312 [00:01<00:00, 246.90it/s]
100%|██████████| 312/312 [00:01<00:00, 244.92it/s]
100%|██████████| 312/312 [00:01<00:00, 253.83it/s]
100%|██████████| 312/312 [00:01<00:00, 246.77it/s]
100%|██████████| 312/312 [00:01<00:00, 235.47it/s]
100%|██████████| 312/312 [00:01<00:00, 249.37it/s]
100%|██████████| 312/312 [00:01<00:00, 258.27it/s]
100%|██████████| 312/312 [00:01<00:00, 256.96it/s]
100%|██████████| 312/312 [00:01<00:00, 259.43it/s]
100%|██████████| 312/312 [00:01<00:00, 257.43it/s]
100%|██████████| 312/312 [00:01<00:00, 258.15it/s]
100%|██████████| 312/312 [00:01<00:00, 260.04it/s]
100%|██████████| 312/312 [00:01

[201|5000] 1.15(s) Train_Loss=54.456183 

100%|██████████| 312/312 [00:01<00:00, 265.81it/s]
100%|██████████| 312/312 [00:01<00:00, 270.35it/s]
100%|██████████| 312/312 [00:01<00:00, 272.56it/s]
100%|██████████| 312/312 [00:01<00:00, 270.64it/s]
100%|██████████| 312/312 [00:01<00:00, 270.37it/s]
100%|██████████| 312/312 [00:01<00:00, 275.69it/s]
100%|██████████| 312/312 [00:01<00:00, 274.99it/s]
100%|██████████| 312/312 [00:01<00:00, 268.06it/s]
100%|██████████| 312/312 [00:01<00:00, 246.29it/s]
100%|██████████| 312/312 [00:01<00:00, 261.66it/s]
100%|██████████| 312/312 [00:01<00:00, 261.29it/s]
100%|██████████| 312/312 [00:01<00:00, 253.10it/s]
100%|██████████| 312/312 [00:01<00:00, 262.87it/s]
100%|██████████| 312/312 [00:01<00:00, 265.65it/s]
100%|██████████| 312/312 [00:01<00:00, 262.32it/s]
100%|██████████| 312/312 [00:01<00:00, 266.68it/s]
100%|██████████| 312/312 [00:01<00:00, 276.33it/s]
100%|██████████| 312/312 [00:01<00:00, 279.03it/s]
100%|██████████| 312/312 [00:01<00:00, 250.64it/s]
100%|██████████| 312/312 [00:01

KeyboardInterrupt: 

In [18]:
import math
import numpy as np
from scipy import stats
def rsquared(x, y): 
    _, _, r_value, _, _ = stats.linregress(x.detach().numpy(), y.detach().numpy()) 
    return r_value**2

Test_Iter = DataLoader(dataset=test_set, batch_size=1, shuffle=False, drop_last=True)
model = Try(batch_size=1, seq=15)
model.load_state_dict(torch.load('./data/model_cnn.pt'))
model = model.cuda()
model.eval()
with torch.no_grad():
    for i, use in enumerate(Test_Iter):
        pred = model(use[0].cuda())
#         print(pred.shape, use[1].shape)
        R = rsquared(pred, use[1].cuda())
        print(i, R)

RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.