In [1]:
import os
import pandas as pd
import torch
from sklearn import preprocessing

data_type='c'
test_data = pd.read_csv("C:\\Users\\walter\\OneDrive\\桌面\\收集\\2024大數據競賽\\2024-pre-train\\2024-testing.csv")

def load_resistor_data(data_dir):
    # 创建一个字典来保存所有电阻和电压的数据
    data = {}
    
    # 遍历每个电压文件夹
    for voltage_folder in os.listdir(data_dir):
        voltage_path = os.path.join(data_dir, voltage_folder)
        if os.path.isdir(voltage_path):
            # 创建一个子字典来保存这个电压下的所有电阻数据
            data[voltage_folder] = {}
            
            # 遍历该电压文件夹中的所有电阻文件
            for resistor_file in os.listdir(voltage_path):
                resistor_path = os.path.join(voltage_path, resistor_file)
                if resistor_file.endswith('.csv'):
                    # 读取CSV文件到一个DataFrame中
                    resistor_data = pd.read_csv(resistor_path)
                    
                    # 将数据存入字典中
                    resistor_name = os.path.splitext(resistor_file)[0]  # 获取文件名（去掉扩展名）
                    data[voltage_folder][resistor_name] = resistor_data
                    
    return data

def scale_resistor_data(data, scalers):
    scaled_data = {}
    
    for voltage, resistors in data.items():
        scaled_data[voltage] = {}
        for resistor, df in resistors.items():
            # 只对第1列到第11列进行逐列归一化，第0列保持不变
            scaled_df = df.copy()
            min_val = scalers[resistor]['min']
            max_val = scalers[resistor]['max']
            
            for col in df.columns[1:]:  # 跳过第0列
                # 对每一列进行归一化: (x - min) / (max - min)
                scaled_df[col] = (df[col] - min_val) / (max_val - min_val)
            
            scaled_data[voltage][resistor] = scaled_df
            
    return scaled_data


def create_scalers(data):
    scalers = {}
    
    for resistor, df in data.items():
        # 计算 DataFrame 所有列的全局最小值和最大值
        min_val = df.iloc[:, 1:].min().min()  # 忽略第0列，找到所有列中的最小值
        max_val = df.iloc[:, 1:].max().max()  # 忽略第0列，找到所有列中的最大值

        # 将 min_val 和 max_val 存储为字典的值
        scalers[resistor] = {'min': min_val, 'max': max_val}
        
    return scalers

    


def concatenate_resistor_data(data):
    concatenated_data = {}
    
    # 遍历所有电压和电阻，将同一个电阻的数据进行拼接
    for voltage, resistors in data.items():
        for resistor, df in resistors.items():
            if resistor not in concatenated_data:
                concatenated_data[resistor] = df.copy()
            else:
                concatenated_data[resistor] = pd.concat([concatenated_data[resistor], df], ignore_index=True)
    concatenated_data[data_type] = pd.concat([concatenated_data[data_type], test_data], ignore_index=True)

    
    return concatenated_data

def scale_test_data(testdata, scalers):
    scaled_df = testdata.copy()

    min_val = scalers[resistor]['min']
    max_val = scalers[resistor]['max']
    
    for col in testdata.columns[1:]:  # 跳过第0列
        # 对每一列进行归一化: (x - min) / (max - min)
        scaled_df[col] = (df[col] - min_val) / (max_val - min_val)
    
    
            
    return scaled_df

# 假设数据位于 /data/ 目录中
data_dir = 'C:\\Users\\walter\\OneDrive\\桌面\\收集\\2024大數據競賽\\2024-pre-train'
resistor_data = load_resistor_data(data_dir)

# 第二步：拼接相同电阻的所有电压数据
concatenated_resistor_data = concatenate_resistor_data(resistor_data)

# 第三步：为每个电阻的每列数据创建并拟合 MinMaxScaler
minmax_scalers = create_scalers(concatenated_resistor_data)

# 第四步：使用拟合好的 MinMaxScaler 对每个电阻数据进行标准化
minmax_resistor_data = scale_resistor_data(resistor_data, minmax_scalers)


# 查看读取的数据结构
for voltage, resistors in resistor_data.items():
    print(f"Voltage: {voltage}")
    for resistor, df in resistors.items():
        print(f"  Resistor: {resistor}, Data shape: {df.shape}")
        print("df", df)

# 查看读取的数据结构
for voltage, resistors in minmax_resistor_data.items():
    print(f"Voltage: {voltage}")
    for resistor, df in resistors.items():
        print(f"  Resistor: {resistor}, Data shape: {df.shape}")
        print("df", df)
minmax_test_data=scale_test_data(test_data, minmax_scalers)

Voltage: 1
  Resistor: a, Data shape: (4000, 11)
df         id   y01   y02   y03   y04   y05   y06   y07   y08   y09   y10
0        1  1072  1015  1024  1116  1196  1194  1190  1196  1214  1181
1        2  1072  1015  1024  1115  1194  1193  1189  1194  1212  1180
2        3  1070  1015  1023  1113  1190  1189  1185  1192  1209  1177
3        4  1067  1014  1021  1111  1187  1186  1182  1188  1205  1174
4        5  1065  1012  1020  1108  1184  1183  1179  1185  1202  1170
...    ...   ...   ...   ...   ...   ...   ...   ...   ...   ...   ...
3995  3996   836   840   843   839   840   836   835   840   839   841
3996  3997   836   840   843   839   840   836   835   840   839   841
3997  3998   836   840   843   839   840   837   835   840   839   841
3998  3999   836   840   843   839   840   837   835   840   839   841
3999  4000   836   840   843   839   841   836   835   839   839   841

[4000 rows x 11 columns]
  Resistor: b, Data shape: (4000, 11)
df         id   y01   y02   y03 

In [2]:

print(f"test target output: {resistor_data['13']['a'].iloc[:50, 10]}")



test target output: 0     171
1     172
2     171
3     170
4     170
5     169
6     168
7     167
8     166
9     165
10    165
11    164
12    163
13    162
14    161
15    161
16    160
17    159
18    158
19    158
20    158
21    157
22    156
23    156
24    155
25    154
26    154
27    153
28    152
29    152
30    151
31    151
32    150
33    149
34    148
35    148
36    148
37    147
38    147
39    146
40    145
41    145
42    145
43    144
44    144
45    143
46    142
47    142
48    141
49    140
Name: y10, dtype: int64


# 模型
##### 子模型1

In [3]:
import torch
import torch.nn as nn

class ResistancePredictor(nn.Module):
    def __init__(self):
        super(ResistancePredictor, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=10, out_channels=16, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(16)
        self.fc1 = nn.Linear(16 * 50 , 128)
        self.fc2 = nn.Linear(128, 1)
    
    def forward(self, x):
        #print("x",x.shape)
        x = x.permute(1, 0).unsqueeze(0)  # 先 permute 然後 unsqueeze
        #print("x",x.shape)
       
        x = torch.relu(self.bn1(self.conv1(x)))
        #print("x",x.shape)

        x = x.view(x.size(0), -1)
        #print("x",x.shape)

        x = torch.relu(self.fc1(x))
        #print("x",x.shape)

        resistance = self.fc2(x)
        resistance=resistance.view(-1)
        #print("resistance",resistance.shape)

        return resistance



##### 子模型2

In [4]:


class resmodel(nn.Module):
    def __init__(self,input_size):
        super(resmodel,self).__init__()
        print('input_size_1',input_size)

        self.res=nn.Linear(50+1,50)
        self.res2=nn.Linear(10,50)

    def forward(self,x):
        #x=torch.relu(self.res(x))
        return self.res(x)



##### 合併

In [5]:
device = torch.device("cuda:0")

class CompleteModel(nn.Module):
    def __init__(self):
        super(CompleteModel, self).__init__()
        self.resistance_predictor = ResistancePredictor()
        self.timeLong=50
        # ARX模型的輸入包括電流數據和電阻值
        self.res=resmodel(input_size=25) 

    def forward(self, bcd_input, target_input,j, mode='train',):
        if mode == 'train':
            resistance = self.resistance_predictor(bcd_input)

            # 确保输出张量初始化时在正确的设备上
            output = torch.empty(4000)

            for i in range(0,3950,25):

                temp = target_input[i:i+self.timeLong]
                #print('temp size',temp.size())
                # 将 resistance_predictor 拼接到 temp 中
                temp = torch.cat((temp, resistance), dim=-1)
                # 创建一个全为零的长度为 10 的一维张量
                one_hot_tensor = torch.zeros(10)

                # 将第 j-1 个位置的值设置为 1 (因为索引从 0 开始)
                one_hot_tensor[j - 1] = 1
                #temp = torch.cat((temp, one_hot_tensor.to(device)))

                #print('temp',temp.shape)
                # 通过 resmodel 模型预测
                res = self.res(temp)
                res_temp=res[25:]
                # 将结果拼接到 data 中

                if i == 50:
                    output[i+self.timeLong:i+self.timeLong+25] = res[:25]

                elif i == 3975:
                    output[i+self.timeLong:i+self.timeLong+25] = res[25:]
                else:
                    output[i+self.timeLong:i+self.timeLong+25] = (res[:25]+res_temp)/2

            
            output = output.to(device)
            #print("output shape",output.shape)
            return output[50:]
        
        elif mode == 'test':
            resistance = self.resistance_predictor(bcd_input)
            
            for i in range(50, 4000, 25):
                temp = target_input[-self.timeLong:]
                temp = torch.cat((temp, resistance), dim=-1)
                one_hot_tensor = torch.zeros(10)

                # 将第 j-1 个位置的值设置为 1 (因为索引从 0 开始)
                one_hot_tensor[j - 1] = 1
                #temp = torch.cat((temp, one_hot_tensor.to(device)))
                res = self.res(temp)
                res_temp=res[:25]
                if i == 50:
                    target_input = torch.cat((target_input, res[:25]), dim=0)
                elif i == 3975:
                    target_input = torch.cat((target_input, res[25:]), dim=0)
                else:
                    target_input = torch.cat((target_input, (res[:25] + res_temp) / 2), dim=0)
            #print("target_input shape",target_input.shape)

            return target_input[50:]

        '''
        for i in range(79):

            temp = target_input[-self.timeLong:]
            #print('temp size',temp.size())
            # 将 resistance_predictor 拼接到 temp 中
            
            # 通过 resmodel 模型预测
            res = self.res(temp)
            #print('temp',temp)
            # 将结果拼接到 data 中
            target_input = torch.cat((target_input, res), dim=0)
        return target_input[50:]
        '''

In [6]:
torch.cuda.empty_cache()

In [7]:
class EarlyStopping:
    def __init__(self, patience=7, min_delta=0.001, verbose=False):
        """
        patience (int): 等待验证集性能提升的训练周期数，如果超过这个数值则停止训练
        min_delta (float): 性能提升的最小值，如果小于这个值则认为没有显著提升
        verbose (bool): 如果为True，会在每次验证集性能提升时打印消息
        """
        self.patience = patience
        self.min_delta = min_delta
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = float('inf')

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''保存验证集损失最小的模型参数'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'checkpoint.pt')
        self.val_loss_min = val_loss

# 訓練

In [9]:
import torch.optim as optim
from torch.nn import init
from torch.optim.lr_scheduler import ReduceLROnPlateau
import matplotlib.pyplot as plt
import numpy as np
import time


early_stopping = EarlyStopping(patience=10, verbose=True)

type='c'
# 建立模型
model = CompleteModel()
device = torch.device("cuda:0")

# 初始化网络参数
for params in model.parameters():
    init.normal_(params, mean=0, std=0.01)

print(model)
# 定義損失函數和優化器
criterion = nn.MSELoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

# 假設有訓練數據 train_bcd_input, train_target_input, train_target_output

scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.8, verbose=True)

num_epochs=500
# 使用一個簡單的訓練迴圈
all_train_loss=[]
all_test_loss=[]
model=model.to(device)
x=np.linspace(start=50,stop=4000,num=3950)
graph_epoch=20

for epoch in range(num_epochs):  # 假設訓練100個epoch
    start=time.time()

    model.train()
    trl=[]
    for voltage in range(1, 12):     
        voltage = str(voltage)  # 將數字轉換為字串
    
        input_1=minmax_resistor_data[voltage][type].iloc[:50,1:11].to_numpy()
        input_1=torch.from_numpy(input_1).float()
        input_1 = input_1.to(device)
        w=0.00375

        optimizer.zero_grad()
        

        for j in range(1,11):
            input_2=minmax_resistor_data[voltage][type].iloc[:,j].to_numpy()
            input_2=torch.from_numpy(input_2).float()
            input_2 = input_2.to(device) 
            # 前向傳播
            outputs = model(input_1, input_2,mode='train',j=j)
            
            target_out=resistor_data[voltage][type].iloc[50:,j].to_numpy()
            target_out=torch.from_numpy(target_out).float()
            target_out = target_out.to(device)
            # 計算損失
            reg_loss=0
            for params in model.parameters():
                reg_loss+=torch.sum(torch.abs(params))

            
            loss = torch.sqrt(criterion(outputs, target_out))
            trl.append(loss.item())
            if epoch%graph_epoch==int(graph_epoch/2):
                plt.plot(x,(target_out-outputs).cpu().detach().numpy(),label=f'y{j}')

            loss=loss+w*reg_loss
            # 反向傳播和優化
            loss.backward()

            
            optimizer.step()
            trl.append(loss.item())
        if epoch%graph_epoch==int(graph_epoch/2):
            plt.xlabel('time')
            plt.ylabel('loss')
            plt.title(f'epoch{epoch},voltage{voltage}')     # 設定 title

            plt.legend()

            plt.show()  
            plt.close()
                
    all_train_loss.append(np.mean(trl))
    scheduler.step(np.mean(trl))

    #test
    tel=[]
    test_input_1=minmax_resistor_data['13'][type].iloc[:50,1:11].to_numpy()
    test_input_1=torch.from_numpy(test_input_1).float()
    test_input_1 = test_input_1.to(device)

    for j in range(1,11):
        test_input_2=minmax_resistor_data['13'][type].iloc[:50,j].to_numpy()
        test_input_2=torch.from_numpy(test_input_2).float()
        test_input_2 = test_input_2.to(device)


        test_output=model(test_input_1,test_input_2,mode='test',j=j)
       
        test_target_out=resistor_data['13'][type].iloc[50:,j].to_numpy()
        test_target_out=torch.from_numpy(test_target_out).float()
        test_target_out = test_target_out.to(device)


        test_loss = torch.sqrt(criterion(test_output, test_target_out))
        tel.append(test_loss.item())
        if epoch%graph_epoch==int(graph_epoch/2):

            plt.plot(x,(test_target_out-test_output).cpu().detach().numpy(),label=f'y{j}')

    all_test_loss.append(np.mean(tel))
    if epoch%graph_epoch==int(graph_epoch/2):

        plt.xlabel('time')
        plt.ylabel('loss')
        plt.title(f'test epoch{epoch},voltage{voltage}')     # 設定 title

        plt.legend()

        plt.show()  
        plt.close()
    
    print(f'Epoch {epoch}, Loss: {np.mean(trl)},test loss: {np.mean(tel)}')
    print('time',time.time()-start,'sec','net par')
    early_stopping(np.mean(tel), model)
    #达到早停止条件时，early_stop会被置为True
    if early_stopping.early_stop:
        print("Early stopping")
        break #跳出迭代，结束训练

#print("all_train_loss",all_train_loss)
x=np.linspace(start=0,stop=num_epochs,num=len(all_train_loss))
#print("x",x)

plt.plot(x,all_train_loss, 'r:')
plt.plot(x,all_test_loss, 'b:')
plt.legend(['train loss','test loss'])

plt.xlabel('epoch')
plt.ylabel('loss')
plt.yscale('log')          # log y-axis

plt.show()  

    


torch.cuda.empty_cache()


'''
timeLong=50
for epoch in range(10000):
    all_outputs=train_target_input
    temp = train_target_input[-timeLong:]

    for i in range(3950):
        temp = temp[-timeLong:]
        optimizer.zero_grad()

        #print('temp size',temp.size())
        # 前向傳播
        outputs = model(temp)
        
        # 計算損失
        #print('outputs',outputs.size())
        #print('train_target_output[i]',train_target_output[i].size())
        train_target_output_num = torch.tensor([train_target_output[i].item()])

        loss = torch.sqrt(criterion(outputs, train_target_output_num))
        
        # 反向傳播和優化
        loss.backward()
        optimizer.step()
        temp = torch.cat((temp, outputs), dim=0)
        all_outputs = torch.cat((all_outputs, outputs), dim=0)


    # 計算損失
    #print('all_outputs size',all_outputs[50:].size())
    #print("train_target_output",train_target_output.size())
    all_loss = torch.sqrt(criterion(all_outputs[50:], train_target_output))
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, all_Loss: {all_loss.item()}')

print('outputs',all_outputs[50:])
'''


input_size_1 25
CompleteModel(
  (resistance_predictor): ResistancePredictor(
    (conv1): Conv1d(10, 16, kernel_size=(3,), stride=(1,), padding=(1,))
    (bn1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (fc1): Linear(in_features=800, out_features=128, bias=True)
    (fc2): Linear(in_features=128, out_features=1, bias=True)
  )
  (res): resmodel(
    (res): Linear(in_features=51, out_features=50, bias=True)
    (res2): Linear(in_features=10, out_features=50, bias=True)
  )
)
Epoch 0, Loss: 507.5671568437056,test loss: 129.67499465942382
time 11.648988246917725 sec net par
Validation loss decreased (inf --> 129.674995).  Saving model ...


In [None]:


import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from torch.nn import init
from torch_lr_finder import LRFinder


class ResistorDataset(Dataset):
    def __init__(self, resistor_data, voltages, type):
        self.resistor_data = resistor_data
        self.voltages = voltages
        self.type = type

    def __len__(self):
        return len(self.voltages) * 11  # 每个电压有11个input_2

    def __getitem__(self, idx):
        voltage_idx = idx // 11  # 计算电压索引
        j = idx % 11  # 计算 j 索引

        voltage = str(self.voltages[voltage_idx])
        input_1 = self.resistor_data[voltage][self.type].iloc[:50].to_numpy().flatten()
        input_2 = self.resistor_data[voltage][self.type].iloc[:, j].to_numpy()
        target_input = self.resistor_data[voltage][self.type].iloc[50:, j].to_numpy()

        input_1 = torch.from_numpy(input_1).float()
        input_2 = torch.from_numpy(input_2).float()
        target_input = torch.from_numpy(target_input).float()

        #print("input_2",input_2)
        return input_1, input_2,j,target_input

from torch_lr_finder import LRFinder

from torch_lr_finder import LRFinder

class CustomLRFinder(LRFinder):
    def _move_to_device(self, tensor, non_blocking=True):
        return tensor.to(self.device, non_blocking=non_blocking)
    
    def _train_batch(self, train_iter, accumulation_steps, non_blocking_transfer=True):
        self.model.train()
        total_loss = None

        self.optimizer.zero_grad()

        for i in range(accumulation_steps):
            try:
                inputs, target_input = next(train_iter)
            except StopIteration:
                return -1

            # 解包 inputs
            inputs1, inputs2,j = inputs
           
            inputs1=inputs1.view(-1)
            inputs2=inputs2.view(-1)
            j=j.view(-1)

            #print("inputs1 s",inputs1.shape)
            #print("inputs2 s",inputs2.shape)
            # 移動數據到設備上
            inputs1 = self._move_to_device(inputs1, non_blocking=non_blocking_transfer)
            inputs2 = self._move_to_device(inputs2, non_blocking=non_blocking_transfer)
            target_input = self._move_to_device(target_input, non_blocking=non_blocking_transfer)

            # 前向傳播
            outputs = self.model(inputs1, inputs2,j)
            loss = self.criterion(outputs, target_input)

            # 平均損失
            loss /= accumulation_steps

            # 反向傳播
            loss.backward()

            if total_loss is None:
                total_loss = loss.detach().item()
            else:
                total_loss += loss.detach().item()

        self.optimizer.step()

        return total_loss




       



# Initialize and wrap the model
model = CompleteModel()

# Define voltages and create dataset and dataloaders
voltages = list(range(1, 14))  # 从1到13的电压值
type = 'a'

train_dataset = ResistorDataset(resistor_data, voltages=voltages[:-1], type=type)  # 使用1-12的电压作为训练集
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
'''
for input_1, input_2,t in train_loader:
    print("input_1",input_1)
    print("input_2",input_2)
    print("t",t)
'''
test_dataset = ResistorDataset(resistor_data, voltages=[13], type=type)  # 使用13的电压作为测试集
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# Initialize model parameters
device = torch.device("cuda:0")
print(model)
for params in model.parameters():
    #print(params)
    init.normal_(params, mean=0, std=0.01)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

# Use torch-lr-finder to find optimal learning rate
# 使用自定義的 LRFinder
class DataLoaderWrapper(torch.utils.data.DataLoader):
    def __iter__(self):
        for idx, batch in enumerate(super().__iter__()):
            # 構建符合 (inputs, targets) 結構的批次數據
            inputs = (batch[0], batch[1], batch[2])
            targets = batch[3]
            
            #print(f"DataLoaderWrapper output - Index: {idx}, Inputs Length: {len(inputs)}, Targets Shape: {targets.shape}")
            
            yield inputs, targets

lr_finder = CustomLRFinder(model, optimizer, criterion, device=device)
lr_finder.range_test(DataLoaderWrapper(test_dataset),start_lr=1e-6, end_lr=1e-3, num_iter=1000, smooth_f=0.05)
#lr_finder.range_test(test_loader, end_lr=1, num_iter=100)
lr_finder.plot()  # 显示损失函数与学习率的关系图
lr_finder.reset()  # 重置模型和优化器到初始状态
