In [2]:
# 參數調整
# 設置ARIMA模型的參數
p, d, q = 5, 1, 1  # 請根據實際情況調整
data_type = 'a'  # 或 'b'，根據實際情況

In [3]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler

def load_resistor_data(data_dir):
    # 创建一个字典来保存所有电阻和电压的数据
    data = {}
    
    # 遍历每个电压文件夹
    for voltage_folder in os.listdir(data_dir):
        voltage_path = os.path.join(data_dir, voltage_folder)
        if os.path.isdir(voltage_path):
            # 创建一个子字典来保存这个电压下的所有电阻数据
            data[voltage_folder] = {}
            
            # 遍历该电压文件夹中的所有电阻文件
            for resistor_file in os.listdir(voltage_path):
                resistor_path = os.path.join(voltage_path, resistor_file)
                if resistor_file.endswith('.csv'):
                    # 读取CSV文件到一个DataFrame中
                    resistor_data = pd.read_csv(resistor_path)
                    
                    # 将数据存入字典中
                    resistor_name = os.path.splitext(resistor_file)[0]  # 获取文件名（去掉扩展名）
                    data[voltage_folder][resistor_name] = resistor_data
                    #print("resistor_name",resistor_name)
                    
    return data

def scale_resistor_data(data, scalers):
    scaled_data = {}
    
    for voltage, resistors in data.items():
        scaled_data[voltage] = {}
        for resistor, df in resistors.items():
            # 只对第1列到第11列进行逐列标准化，第0列保持不变
            scaled_df = df.copy()
            scaler = scalers[resistor]
            
            # 对第1列到第11列进行标准化
            scaled_df.iloc[:, 1:] = (df.iloc[:, 1:]-scalers[resistor]['mean'])/scalers[resistor]['std']
            
            scaled_data[voltage][resistor] = scaled_df
            
    return scaled_data


def create_scalers(data):
    scalers = {}
    new_data={}
    for key, resistor_data in data.items():  # 正确解包
        
        new_data = resistor_data.iloc[:, 1:]

        # 计算整体均值和标准差
        mean = new_data.values.mean()
        std = new_data.values.std()
        
        # 保存scaler到字典中
        scalers[key] = {'mean':mean,'std':std}
        
    return scalers

def concatenate_resistor_data(data):
    concatenated_data = {}
    
    # 遍历所有电压和电阻，将同一个电阻的数据进行拼接
    for voltage, resistors in data.items():
        for resistor, df in resistors.items():
            if resistor not in concatenated_data:
                concatenated_data[resistor] = df.copy()
            else:
                concatenated_data[resistor] = pd.concat([concatenated_data[resistor], df], ignore_index=True)
    
    return concatenated_data



# 假设数据位于 /data/ 目录中
data_dir = 'C:\\Users\\walter\\OneDrive\\桌面\\收集\\2024大數據競賽\\2024-pre-train'
resistor_data = load_resistor_data(data_dir)
'''
# 第二步：拼接相同电阻的所有电压数据
concatenated_resistor_data = concatenate_resistor_data(resistor_data)

# 第三步：为每个电阻的每列数据创建并拟合 StandardScaler
scalers = create_scalers(concatenated_resistor_data)

# 第四步：使用拟合好的 StandardScaler 对每个电阻数据进行标准化
standardized_resistor_data = scale_resistor_data(resistor_data, scalers)
'''


'\n# 第二步：拼接相同电阻的所有电压数据\nconcatenated_resistor_data = concatenate_resistor_data(resistor_data)\n\n# 第三步：为每个电阻的每列数据创建并拟合 StandardScaler\nscalers = create_scalers(concatenated_resistor_data)\n\n# 第四步：使用拟合好的 StandardScaler 对每个电阻数据进行标准化\nstandardized_resistor_data = scale_resistor_data(resistor_data, scalers)\n'

In [4]:
import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning

warnings.simplefilter('ignore', ConvergenceWarning)

In [5]:
import numpy as np
'''
def custom_inverse_transform(mean, std, data):
    """
    手動實作反標準化
    :param mean: 各特徵的均值，形狀為 (n_features,)
    :param std: 各特徵的標準差，形狀為 (n_features,)
    :param data: 標準化後的一維數據，形狀為 (n_features,)
    :return: 反標準化後的數據，形狀為 (n_features,)
    """
    #print("std",std)
    #print("mean",mean)
    #print("data",data.shape)

    # 使用反標準化公式
    original_data = data * std + mean
    
    return original_data
# 讀取均值
mean = scalers[data_type]['mean']

# 讀取標準差（注意這裡使用的是 `scale_`，而不是 `std_`）
std = scalers[data_type]['std']

# 反標準化一筆示例數據
example_data = standardized_resistor_data['1']['a'].iloc[0:50, 1].values  # 将 1D 数组转换为 2D 数组
original_data = custom_inverse_transform(mean,std, example_data)
print("Original data after inverse transform:", original_data - resistor_data['1']['a'].iloc[0:50, 1].values)


'''

'\ndef custom_inverse_transform(mean, std, data):\n    """\n    手動實作反標準化\n    :param mean: 各特徵的均值，形狀為 (n_features,)\n    :param std: 各特徵的標準差，形狀為 (n_features,)\n    :param data: 標準化後的一維數據，形狀為 (n_features,)\n    :return: 反標準化後的數據，形狀為 (n_features,)\n    """\n    #print("std",std)\n    #print("mean",mean)\n    #print("data",data.shape)\n\n    # 使用反標準化公式\n    original_data = data * std + mean\n    \n    return original_data\n# 讀取均值\nmean = scalers[data_type][\'mean\']\n\n# 讀取標準差（注意這裡使用的是 `scale_`，而不是 `std_`）\nstd = scalers[data_type][\'std\']\n\n# 反標準化一筆示例數據\nexample_data = standardized_resistor_data[\'1\'][\'a\'].iloc[0:50, 1].values  # 将 1D 数组转换为 2D 数组\noriginal_data = custom_inverse_transform(mean,std, example_data)\nprint("Original data after inverse transform:", original_data - resistor_data[\'1\'][\'a\'].iloc[0:50, 1].values)\n\n\n'

In [12]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from math import sqrt
import multiprocessing as mp
import matplotlib.pyplot as plt

def train_and_evaluate_arx(y, exog, p, d, q,input_size,output_size,k):
    train_size=input_size
    # 訓練數據
    #y_train = y[k-train_size:k]
    y_train = y[:k]
    exog_train=exog[:k,:]
    if k == 4000-int(output_size/2):

        # 測試數據
        exog_test=exog[k:k+int(output_size/2),:]
        #print("exog_test",exog_test.shape)

        # 訓練ARX模型
        model = ARIMA(y_train, exog=exog_train, order=(p,d,q))
        results = model.fit()
        # 預測
        forecast = results.forecast(steps=int(output_size/2), exog=exog_test)
        
        return forecast
    else:
         # 測試數據
        exog_test=exog[k:k+output_size,:]
        #print("exog_test",exog_test.shape)

        # 訓練ARX模型
        model = ARIMA(y_train, exog=exog_train, order=(p,d,q))
        results = model.fit()
        # 預測
        forecast = results.forecast(steps=output_size, exog=exog_test)
        
        return forecast

def train(resistor_data,input_size,output_size):

    predictions = resistor_data['13'][data_type].iloc[:,:].to_numpy()  # 初始包含 id 列

    exog_trian =resistor_data['13'][data_type].iloc[:,0].to_numpy()
    exog_trian = exog_trian.reshape(-1, 1)



    #print("exog",exog.shape)
    input_size=input_size
    output_size=output_size

    x=np.linspace(start=50,stop=4000,num=3950)

    # 绘制误差图
    plt.figure()
    all_rmse=[]
    for j in range(1, 11):
        y_target = resistor_data['13'][data_type].iloc[:,j].to_numpy()

        y_train = resistor_data['13'][data_type].iloc[:50,j].to_numpy()
        for voltage in range(1,13):
            voltage = str(voltage)  # 將數字轉換為字串
            current_data = resistor_data[voltage][data_type].iloc[:, j].to_numpy()
            current_data = current_data.reshape(-1, 1)
            exog_trian = np.concatenate((exog_trian, current_data), axis=1)



            #print("exog_trian",exog_trian.shape)


        for k in range(50, 4000, int(output_size/2)):
                #print("exog",type(exog_trian))
                #print("y_train",type(y_train))

                forecast = train_and_evaluate_arx(y_train, exog_trian, p, d, q,input_size,output_size,k)
                #forecast = torch.tensor(forecast)
                #print("forecast",forecast.shape)
                #forecast=custom_inverse_transform(mean,std, forecast)
                #print("forecast",forecast.shape)

                forecast_temp=forecast[int(output_size/2):]
                #print("forecast[:int(output_size/2)]",forecast[:int(output_size/2)].shape)
                #print("forecast_temp",forecast_temp.shape)

                if k == 50:
                   y_train = np.concatenate((y_train, forecast[:int(output_size/2)]), axis=0)
                elif k == 4000-int(output_size/2):
                    y_train = np.concatenate((y_train,forecast),axis=0)
                else:
                    y_train =np.concatenate( ( y_train, (forecast[:int(output_size/2)]+forecast_temp) /2 ), axis=0)
                #print("tp shape",tp.shape,"k",k)

       
        predictions[:,j]=y_train
        plt.plot(x,(y_target[50:] - y_train[50:]), label=f'y {j}')
        rmse = sqrt(mean_squared_error(y_target[50:], y_train[50:]))
        all_rmse.append(rmse)
        print(f'y{j}:rmse {rmse}')

        
    plt.title(f'13{data_type} 目標-預測')
    plt.xlabel('時間')
    plt.ylabel('差')
    plt.legend()
    plt.show()
    plt.close()
    print(f'y{j}:all mean rmse {np.mean(all_rmse)}')

    return predictions[50:,]
        
res=train(resistor_data=resistor_data,input_size=50,output_size=100)
np.savetxt("C:/Users/walter/OneDrive/桌面/收集/2024大數據競賽/測試結果/時序_平均out100.csv", res, delimiter=',')      

        






y1:rmse 0.5764080312698435
y2:rmse 0.5738215359589188
y3:rmse 0.5852277315189633
y4:rmse 0.5784326901341058
y5:rmse 0.5826374272097613
y6:rmse 0.5782121447623778
