In [8]:
import os 
import sys
import time
import pickle
import random
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, Dataset ,ConcatDataset
sys.path.append(r'D:\WorkPath\Models\SwinGAN')
from MyDataSet import MultiMaskTimeSeriesDataset
import fancyimpute



def cal_MAPE(y_true,y_pred,mask):
    y_true=y_true[mask]
    y_pred=y_pred[mask]
    return np.mean(np.abs(y_true-y_pred)/(y_true+1e-2))


In [9]:
def KNN_impute(data : MultiMaskTimeSeriesDataset,k , iter ):

    test = np.random.randint(7*data.num_masks, len(data)-1 , iter)
    total_MSE = []
    total_MAE = []
    imputer = fancyimpute.KNN(k=k,orientation='rows')
    for i in range(iter):
        index = test[i]
        true_data = data[index][0]
        mask = data[index][1]
        Miss_data = true_data * mask

        Miss_data[Miss_data == 0] = np.nan
        imputed_data = imputer.fit_transform(Miss_data)
            

        imputed_data = imputed_data * (1 - mask)
        true_data = true_data * (1 - mask)
        total_MSE.append(np.sum((imputed_data - true_data)**2)/np.sum(1-mask))
        total_MAE.append(cal_MAPE(true_data,imputed_data,(1 - mask).astype(bool)   ))
    return total_MSE,total_MAE



In [10]:
def KNN_impute_linear(data : MultiMaskTimeSeriesDataset,k , iter ):

    test = np.random.randint(7*data.num_masks, len(data) , iter)
    total_MSE = []
    total_MAE = []

    for i in range(iter):
        index = test[i]
        true_data = data[index][0]
        mask = data[index][1]
        Miss_data = true_data * mask

        Miss_data[Miss_data == 0] = np.nan
        
        for i in range(len(Miss_data)):
            if np.isnan(Miss_data[i]):
                Miss_data[i] = np.nanmean(Miss_data[i-k:i+k])
        
        
        imputed_data = imputed_data * (1 - mask)
        true_data = true_data * (1 - mask)
        total_MSE.append(np.sum((imputed_data - true_data)**2)/np.sum(1-mask))
        total_MAE.append(cal_MAPE(true_data, imputed_data ,(1 - mask).astype(bool)   ))
    return total_MSE,total_MAE

In [11]:
def KNN_impute_linear(data : MultiMaskTimeSeriesDataset,k , iter ):

    test = np.random.randint(7*data.num_masks, len(data) , iter)
    total_MSE = []
    total_MAE = []

    for i in range(iter):
        index = test[i]
        true_data = data[index][0]
        mask = data[index][1]
        Miss_data = true_data * mask

        Miss_data[Miss_data == 0] = np.nan
        
        for i in range(Miss_data.shape[0]):
            for j in range(Miss_data.shape[1]):
                if np.isnan(Miss_data[i,j]):
                    Miss_data[i,j]=np.nanmean(Miss_data[max(0,i-k):min(i+k,Miss_data.shape[0]),max(0,j-k):min(j+k,Miss_data.shape[1])])        
        
        for i in range(Miss_data.shape[1]):
            mean = np.nanmean(Miss_data[:,i])
            Miss_data[:,i] = np.where(np.isnan(Miss_data[:,i]),mean,Miss_data[:,i])
            
        imputed_data = Miss_data
        
        imputed_data = imputed_data * (1 - mask)
        true_data = true_data * (1 - mask)
        total_MSE.append(np.sum((imputed_data - true_data)**2)/np.sum(1-mask))
        total_MAE.append(cal_MAPE(true_data, imputed_data ,(1 - mask).astype(bool)   ))
    return total_MSE,total_MAE

### 选择数据集

In [None]:
project_path = r'D:\WorkPath\Models\ImputeFormer'
test_path = os.path.join(project_path , r'Data\source_test_PEMS04') 
test_files = os.listdir(test_path)
test_files = [os.path.join(test_path, file) for file in test_files]

test_record = {'data_name':[],'MSE_test_loss':[] , 'MAPE_test_loss':[]}

for file_path in test_files:

    with open(file_path, 'rb') as f:
        test_data = pickle.load(f)
    if 'linear'  in file_path:
        total_MSE,total_MAE=KNN_impute_linear(test_data,4,300)
    else: 
        total_MSE,total_MAE=KNN_impute(test_data,4,300)
    test_record['data_name'].append(file_path)
    test_record['MSE_test_loss'].append(np.mean(total_MSE))
    test_record['MAPE_test_loss'].append(np.mean(total_MAE))
test_record = pd.DataFrame(test_record)
test_record['route']=test_record['data_name'].apply(lambda x :x.split('_')[5])
test_record['start']=test_record['data_name'].apply(lambda x :x.split('_')[-3])
test_record['miss_rate']=test_record['data_name'].apply(lambda x :x.split('_')[-2])
test_record['type']=test_record['data_name'].apply(lambda x :x.split('_')[-1][:-4])
test_record=test_record[['route','start','miss_rate','type','MSE_test_loss','MAPE_test_loss']]
test_record=test_record.sort_values(['route','type','start'])
test_record