In [3]:
import math
import copy
import os
import time
import enum
import argparse
import polars as pl
# Visualization related imports
import matplotlib.pyplot as plt
import seaborn
from torch.utils.data import DataLoader, Dataset
# Deep learning related imports
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.optim import Adam
from torch.utils.tensorboard import SummaryWriter
from torch.hub import download_url_to_file
from torch.utils.data import Dataset
# Data manipulation related imports
# from torchtext.data import Dataset, BucketIterator, Field, Example
import spacy
import seaborn as sns

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
data1 = pl.read_parquet('data/sampling_data.parquet')
data1.shape

(367684, 154)

In [7]:
del data1

In [20]:
data1['D334'].value_counts()

D334,counts
f32,u32
99.0,22158
101.5,352
100.0,17613
0.0,739560
99.5,39128
101.0,1235
100.5,42419
101.599998,1
100.099998,59466
99.599998,47788


In [None]:
# 主要变量
# numerical： D135, D136, soc_diff, D22电池电流, D356(drop), D52低电压, D334内部压力, D290当前输入电压
# cat: D27电池均衡状态


In [5]:
from DTN_model import DTN_model
from utils import processor,BatteryData

In [6]:
model_dimension = 256
number_of_heads = 8
number_of_var = 153
number_of_layers = 6
dropout_probability = 0.1
hidden_dimension_list = [512,256,64,2]

In [23]:
768/3

256.0

In [7]:
MyModel = DTN_model(model_dimension, number_of_heads, number_of_layers, dropout_probability, number_of_var, hidden_dimension_list)

In [8]:
class BatteryData(Dataset):
    def __init__(self, data_dir, processor) -> None:
        super().__init__()
        self.processor = processor
        # self.ares = self.load_data(data_dir)
        self.ares = self.processor.process_data(pl.read_parquet(data_dir))

    def __len__(self):
        return self.ares.shape[0]
    
    def __getitem__(self, index):
        X = self.ares[:, :, :-1]
        Y = self.ares[:, :, -1]
        return X[index], Y[index]
    
    def load_data(self, data_dir):
        path_list = os.listdir(data_dir)
        data = []
        for item in os.listdir(data_dir):
            data.append(self.processor.process_data(pl.read_parquet(os.path.join(data_dir,item))))
        res = pl.concat(data)
        return res
    
    @staticmethod
    def get_data_info(data_dir):
        data_info={}
        for root, dirs, files in os.walk(data_dir):
            for file in files:
                file_path = os.path.join(root, file)
                print(f"file: {file}")
                data = pl.read_parquet(file_path)
                yield data
    

In [None]:
df= pl.read_parquet('processed_data/D1_4_5.parquet')


In [11]:
df.head()

D135_1,D135_2,D135_3,D135_4,D135_5,D135_6,D135_7,D135_8,D135_9,D135_10,D135_11,D135_12,D135_13,D135_14,D135_15,D135_16,D135_17,D135_18,D135_19,D135_20,D135_21,D135_22,D135_23,D135_24,D135_25,D135_26,D135_27,D135_28,D135_29,D135_30,D135_31,D135_32,D135_33,D135_34,D135_35,D135_36,D135_37,…,D136_6,D136_7,D136_8,D136_9,D136_10,D136_11,D136_12,D136_13,D136_14,D136_15,D136_16,D136_17,D136_18,D136_19,D136_20,D136_21,D136_22,D136_23,D136_24,D136_25,D136_26,D136_27,D136_28,soc_diff,D22,D52,D24,D40,D41,D44,D72,D117,D133,cycle,D140,D141,label
f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f64,f64,f64,i64,i64,f64,f64,i32,str,str,i32
3601.0,3603.0,3600.0,3600.0,3598.0,3599.0,3600.0,3599.0,3598.0,3600.0,3598.0,3598.0,3598.0,3598.0,3597.0,3598.0,3600.0,3599.0,3600.0,3599.0,3601.0,3600.0,3600.0,3599.0,3601.0,3599.0,3599.0,3598.0,3597.0,3599.0,3599.0,3598.0,3598.0,3601.0,3602.0,3601.0,3602.0,…,28.0,28.0,28.5,28.5,28.0,28.0,28.5,28.0,28.0,28.5,28.0,28.5,28.0,28.5,28.5,28.5,28.5,29.0,29.0,28.5,28.5,29.0,29.0,1.0,0.1,12.6,23.4,24.0,23.0,0,0,23.5,2925.0,1,"""163""","""337""",0
3601.0,3603.0,3600.0,3600.0,3598.0,3599.0,3600.0,3599.0,3598.0,3600.0,3598.0,3598.0,3598.0,3598.0,3598.0,3599.0,3600.0,3599.0,3600.0,3599.0,3601.0,3600.0,3600.0,3599.0,3601.0,3599.0,3599.0,3598.0,3597.0,3599.0,3599.0,3598.0,3598.0,3601.0,3602.0,3601.0,3602.0,…,28.0,28.0,28.5,28.5,28.0,28.0,28.5,28.0,28.0,28.5,28.0,28.5,28.0,28.5,28.5,28.5,28.5,29.0,29.0,28.5,28.5,29.0,29.0,1.0,0.2,12.5,23.4,24.0,23.0,0,0,23.5,2925.0,1,"""171""","""337""",0
3600.0,3603.0,3600.0,3599.0,3597.0,3598.0,3600.0,3598.0,3598.0,3599.0,3597.0,3597.0,3597.0,3597.0,3597.0,3598.0,3599.0,3599.0,3600.0,3598.0,3600.0,3599.0,3599.0,3599.0,3600.0,3598.0,3599.0,3597.0,3596.0,3598.0,3599.0,3598.0,3597.0,3600.0,3601.0,3601.0,3601.0,…,28.0,28.0,28.5,28.5,28.0,28.0,28.5,28.0,28.0,28.5,28.0,28.5,28.0,28.5,28.5,28.5,28.5,29.0,29.0,28.5,28.5,29.0,29.0,1.0,2.3,12.7,23.4,24.0,23.0,0,0,23.5,2925.0,1,"""2406""","""337""",0
3600.0,3602.0,3600.0,3599.0,3597.0,3598.0,3600.0,3598.0,3597.0,3599.0,3597.0,3597.0,3597.0,3597.0,3597.0,3597.0,3599.0,3598.0,3600.0,3598.0,3600.0,3599.0,3599.0,3598.0,3600.0,3598.0,3598.0,3597.0,3596.0,3598.0,3598.0,3597.0,3597.0,3600.0,3601.0,3600.0,3601.0,…,28.0,28.0,28.5,28.5,28.0,28.0,28.5,28.0,28.0,28.5,28.0,28.5,28.0,28.5,28.5,28.5,28.5,29.0,29.0,28.5,28.5,29.0,29.0,1.0,3.3,13.2,23.4,24.0,23.0,0,0,23.5,2925.0,1,"""3340""","""337""",0
3607.0,3610.0,3607.0,3607.0,3605.0,3606.0,3607.0,3606.0,3605.0,3607.0,3605.0,3605.0,3605.0,3605.0,3604.0,3606.0,3607.0,3606.0,3607.0,3606.0,3608.0,3607.0,3607.0,3606.0,3608.0,3606.0,3606.0,3605.0,3604.0,3606.0,3606.0,3605.0,3605.0,3608.0,3609.0,3608.0,3608.0,…,28.0,28.0,28.5,28.5,28.0,28.0,28.5,28.0,28.0,28.5,28.0,28.5,28.0,28.5,28.5,28.5,28.5,29.0,29.0,28.5,28.5,29.0,29.0,1.0,-20.6,14.1,23.4,24.0,23.0,0,0,23.5,2925.0,1,"""0""","""337""",0


In [9]:
dataProcess = processor(['cycle'], 500, 100)
# battery_data = BatteryData('processed_data', dataProcess)
# dataloader = DataLoader(dataset=battery_data,batch_size=8,shuffle=False,num_workers=2)

In [28]:
battery_data = BatteryData('processed_data/D1_3.parquet', dataProcess)
dataloader = DataLoader(dataset=battery_data,batch_size=8,shuffle=True,num_workers=2)

In [21]:
len(dataloader.dataset)

26237

In [24]:
def train_loop(data_dir, model, loss_fn, optimizer):
    size = 0
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    batch = 0
    for file in os.listdir(data_dir):
        battery_data = BatteryData(os.path.join(data_dir,file), dataProcess)
        dataloader = DataLoader(dataset=battery_data,batch_size=8,shuffle=True,num_workers=2)
        size += len(dataloader.dataset)
        for _batch, (X, y) in enumerate(dataloader):
            # print(batch)
            # Compute prediction and loss
            tmp = torch.ones(y.shape)
            y2 = tmp-y
            y = y.unsqueeze(dim=2)
            y2 = y2.unsqueeze(dim=2)
            new_y = torch.cat([y,y2],dim=2)
            new_y = new_y.cuda()

            pred = model(X.float().cuda(), model.get_key_padding_mask(X.cuda()).float().cuda())

            loss = loss_fn(pred, new_y)

            # Backpropagation
            loss.backward()
            for name, param in MyModel.named_parameters():
                if param.requires_grad and param.grad is not None:
                    print(name)
                    print(param.grad)
                    print(f"grad shape: {param.grad.shape}")
            optimizer.step()
            optimizer.zero_grad()
            batch+=1
            if batch % 100 == 0:
                loss, current = loss.item(), (batch + 1) * len(X)
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            break
        break

In [29]:
for batch_idx, data_batch in enumerate(dataloader):
    print(batch_idx)
    X, y = data_batch
    break

0


In [9]:
src_mask = MyModel.get_key_padding_mask(X)

In [10]:
res = MyModel(X.float(), src_mask)
print(res.shape)

torch.Size([8, 500, 2])


In [35]:
class CustomLRAdamOptimizer:
    """
        Linear ramp learning rate for the warm-up number of steps and then start decaying
        according to the inverse square root law of the current training step number.

        Check out playground.py for visualization of the learning rate (visualize_custom_lr_adam).
    """

    def __init__(self, optimizer, model_dimension, num_of_warmup_steps):
        self.optimizer = optimizer
        self.model_size = model_dimension
        self.num_of_warmup_steps = num_of_warmup_steps

        self.current_step_number = 0

    def step(self):
        self.current_step_number += 1
        current_learning_rate = self.get_current_learning_rate()

        for p in self.optimizer.param_groups:
            p['lr'] = current_learning_rate

        self.optimizer.step()  # apply gradients

    # Check out the formula at Page 7, Chapter 5.3 "Optimizer" and playground.py for visualization
    def get_current_learning_rate(self):
        # For readability purpose
        step = self.current_step_number
        warmup = self.num_of_warmup_steps

        return self.model_size ** (-0.5) * min(step ** (-0.5), step * warmup ** (-1.5))

    def zero_grad(self):
        self.optimizer.zero_grad()

In [14]:
kl_div_loss = nn.KLDivLoss(reduction='batchmean')

In [1]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # print(batch)
        # Compute prediction and loss
        tmp = torch.ones(y.shape)
        y2 = tmp-y
        y = y.unsqueeze(dim=2)
        y2 = y2.unsqueeze(dim=2)
        new_y = torch.cat([y,y2],dim=2)
        new_y = new_y.cuda()
        
        pred = model(X.float().cuda(), model.get_key_padding_mask(X.cuda()).float().cuda())

        loss = loss_fn(pred, new_y)

        # Backpropagation
        loss.backward()
        # for name, param in MyModel.named_parameters():
        #     if param.requires_grad and param.grad is not None:
        #         print(name)
        #         print(param.grad)
        #     break
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [14]:
Device = torch.device("cuda:0") 

In [39]:
kl_div_loss = nn.KLDivLoss(reduction='batchmean') 
loss = nn.CrossEntropyLoss()

In [None]:
optimizer.param_groups[0]['params'] = [p for p in MyModel.parameters() if p.requires_grad]

In [11]:
optimizer = Adam(MyModel.parameters(),lr=0.0001, betas=(0.9, 0.98), eps=1e-9)

In [12]:
MyModel = MyModel.cuda()
loss = nn.CrossEntropyLoss()
loss=loss.cuda()

In [25]:
train_loop('processed_data',MyModel,loss,optimizer)

encoder.layers.0.self_attn.in_proj_weight
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')
grad shape: torch.Size([768, 256])
encoder.layers.0.self_attn.in_proj_bias
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [47]:
X = torch.ones([1,500,147])
tmp=X[:,:,0].squeeze()
tmp = tmp.reshape((-1,X.shape[1]))
print(tmp.shape)

torch.Size([1, 500])


In [34]:
MyModel

DTN_model(
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-5): 6 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
        )
        (linear1): Linear(in_features=256, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=256, bias=True)
        (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (fw_embedding): Linear(in_features=147, out_features=256, bias=True)
  (bw_embedding): Linear(in_features=147, out_features=256, bias=True)
  (pos_embedding): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (decoder): TransformerDecoder(
    (layers): Modul

In [43]:
grads = {}
for name, param in MyModel.named_parameters():
    if param.requires_grad and param.grad is not None:
        grads[name] = param.grad
        print(name)

In [35]:
MyModel.named_parameters()

<generator object Module.named_parameters at 0x7f537c62cc80>

In [26]:
for name, param in MyModel.named_parameters():
    if param.requires_grad:
        print(param)

Parameter containing:
tensor([[-0.0080, -0.0340,  0.0320,  ...,  0.0427,  0.0338,  0.0587],
        [ 0.0332,  0.0604, -0.0110,  ..., -0.0404,  0.0610,  0.0520],
        [ 0.0406, -0.0042,  0.0636,  ..., -0.0483, -0.0571,  0.0294],
        ...,
        [ 0.0500,  0.0504, -0.0658,  ...,  0.0165, -0.0152,  0.0420],
        [ 0.0542, -0.0326, -0.0006,  ..., -0.0732,  0.0112,  0.0185],
        [-0.0599, -0.0287,  0.0264,  ..., -0.0038,  0.0119,  0.0065]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([-1.4005e-16,  1.2871e-16,  4.3445e-16, -4.3491e-16,  7.9608e-17,
        -4.0365e-16,  1.3634e-16, -1.0702e-16,  5.2501e-17, -8.6153e-17,
        -6.8561e-17, -3.7963e-16,  1.6106e-18,  1.5251e-16,  3.1912e-16,
         1.7904e-16,  5.2584e-16,  1.9402e-16,  6.9548e-17,  7.5359e-17,
         4.4212e-17,  1.3986e-16, -2.7724e-16,  5.3797e-17, -1.5284e-16,
         3.6497e-16,  3.6629e-16,  2.3950e-17, -1.3433e-16, -5.0669e-17,
         6.8587e-17,  1.2305e-18,  0.000