In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.parameter import Parameter
import torch.nn.functional as F
from tqdm import tqdm
import math
import random
import numpy as np
import pandas as pd
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
df = pd.read_csv("./dataset/" + 'pm25_ground.csv')
df['datetime'] = pd.to_datetime(df['datetime'])
month2 = df[df['datetime'].dt.month.isin([2])]
month3 = df[df['datetime'].dt.month.isin([3])]
month5 = df[df['datetime'].dt.month.isin([5])]
month6 = df[df['datetime'].dt.month.isin([6])]
month8 = df[df['datetime'].dt.month.isin([8])]
month9 = df[df['datetime'].dt.month.isin([9])]
month11 = df[df['datetime'].dt.month.isin([11])]
month12 = df[df['datetime'].dt.month.isin([12])]

In [2]:

def make_deltas(masks):
    deltas = []
    for h in range(len(masks)):
        if h == 0:
            deltas.append([1 for _ in range(masks.shape[1])])
        else:
            deltas.append([1 for _ in range(masks.shape[1])] + (1-masks[h]) * deltas[-1])
    
    return list(deltas)

In [None]:
def eval_bi_model_test(model,df, rbf_df):
    df['datetime'] = pd.to_datetime(df['datetime'])
    rbf_df['datetime'] = pd.to_datetime(rbf_df['datetime'])
    ground_df = pd.to_datetime(ground_df['datetime'])

    ori_error = 0.0
    ori_test_mask_num = 0.0

    for a in [6,9,12,3]:

        missing_month = df[df['datetime'].dt.month.isin([a])]
        before_month = df[df['datetime'].dt.month.isin([a - 1])]
        ground_month = ground_df[ground_df['datetime'].dt.month.isin([a])]

        rbf_month = rbf_df[rbf_df['datetime'].dt.month.isin([a])][df.columns[1:]]

        before_mask = np.where(np.isnan(before_month[before_month.columns[1:]]), np.nan, 1.) 
        missing_mask = np.where(np.isnan(missing_month[missing_month.columns[1:]]), np.nan, 1.)
        gt_mask = np.where(np.isnan(ground_month[ground_month.columns[1:]]), np.nan, 1.)
        len_ = min(before_mask.shape[0], missing_mask.shape[0])

        test_index = gt_mask - before_mask

        test_df = pd.DataFrame(missing_month[missing_month.columns[1:]].values[:len_] * np.where(np.isnan(before_month[before_month.columns[1:]]), np.nan, 1.)[:len_], columns=df.columns[1:])

        
        values = ((test_df - df[df.columns[1:]].mean()) / df[df.columns[1:]].std()).values
        shp = values.shape

        test_masks = ~np.isnan(values)
        ori_masks = ~np.isnan(missing_month[missing_month.columns[1:]].values[:len_])
        test_masks = test_masks.reshape(shp)
        rbf_x = torch.from_numpy(rbf_month.values[:len_]).to(torch.float32)

        deltas = np.array(make_deltas(test_masks))
        deltas = torch.from_numpy(deltas).to(torch.float32)
        ori_masks = torch.from_numpy(ori_masks).to(torch.float32)
        test_masks = torch.from_numpy(test_masks).to(torch.float32)
        values = torch.nan_to_num(torch.from_numpy(values).to(torch.float32))

        dataset = torch.cat([values.unsqueeze_(0), deltas.unsqueeze_(0), test_masks.unsqueeze_(0), rbf_x.unsqueeze_(0)], dim = 0).unsqueeze_(0)

        model.eval()
        loss, x_loss, back_x_loss, loss_c, bi_c_hat, bi_imputation = model(dataset.to(device))

        Nonscale_imputataion = pd.DataFrame(bi_c_hat[0].cpu().detach() , columns= df.columns[1:])
        Nonscale_imputataion = (Nonscale_imputataion * df[df.columns[1:]].std()) + df[df.columns[1:]].mean()
        missing_month = missing_month[df.columns[1:]].fillna(0)

        masks = ori_masks - test_masks

        ori_error += np.sum(np.abs((Nonscale_imputataion.values - missing_month.values[:len_]) * masks.cpu().numpy()))
        ori_test_mask_num += np.sum(masks.cpu().numpy())

        print(a,"month")
        print(np.sum(masks.cpu().numpy()))
        print(np.sum(np.abs((Nonscale_imputataion.values - missing_month.values[:len_]) * masks.cpu().numpy())) / np.sum(masks.cpu().numpy()))

    print("Original MAE :", ori_error / ori_test_mask_num)

In [7]:
def eval_bi_model_test(model,df, rbf_df):
    df['datetime'] = pd.to_datetime(df['datetime'])
    rbf_df['datetime'] = pd.to_datetime(rbf_df['datetime'])

    ori_error = 0.0
    ori_test_mask_num = 0.0

    for a in [6,9,12,3]:

        ground_month = df[df['datetime'].dt.month.isin([a])]
        before_month = df[df['datetime'].dt.month.isin([a - 1])]
        rbf_month = rbf_df[rbf_df['datetime'].dt.month.isin([a])][df.columns[1:]]

        before_mask = np.where(np.isnan(before_month[before_month.columns[1:]]), np.nan, 1.) 
        ground_mask = np.where(np.isnan(ground_month[ground_month.columns[1:]]), np.nan, 1.)
        len_ = min(before_mask.shape[0], ground_mask.shape[0])

        test_df = pd.DataFrame(ground_month[ground_month.columns[1:]].values[:len_] * np.where(np.isnan(before_month[before_month.columns[1:]]), np.nan, 1.)[:len_], columns=df.columns[1:])

        values = ((test_df - df[df.columns[1:]].mean()) / df[df.columns[1:]].std()).values
        shp = values.shape

        test_masks = ~np.isnan(values)
        ori_masks = ~np.isnan(ground_month[ground_month.columns[1:]].values[:len_])
        test_masks = test_masks.reshape(shp)
        rbf_x = torch.from_numpy(rbf_month.values[:len_]).to(torch.float32)

        deltas = np.array(make_deltas(test_masks))
        deltas = torch.from_numpy(deltas).to(torch.float32)
        ori_masks = torch.from_numpy(ori_masks).to(torch.float32)
        test_masks = torch.from_numpy(test_masks).to(torch.float32)
        values = torch.nan_to_num(torch.from_numpy(values).to(torch.float32))

        dataset = torch.cat([values.unsqueeze_(0), deltas.unsqueeze_(0), test_masks.unsqueeze_(0), rbf_x.unsqueeze_(0)], dim = 0).unsqueeze_(0)

        model.eval()
        loss, x_loss, back_x_loss, loss_c, bi_c_hat, bi_imputation = model(dataset.to(device))

        Nonscale_imputataion = pd.DataFrame(bi_c_hat[0].cpu().detach() , columns= df.columns[1:])
        Nonscale_imputataion = (Nonscale_imputataion * df[df.columns[1:]].std()) + df[df.columns[1:]].mean()
        ground_month = ground_month[df.columns[1:]].fillna(0)

        masks = ori_masks - test_masks

        ori_error += np.sum(np.abs((Nonscale_imputataion.values - ground_month.values[:len_]) * masks.cpu().numpy()))
        ori_test_mask_num += np.sum(masks.cpu().numpy())

        print(a,"month")
        print(np.sum(masks.cpu().numpy()))
        print(np.sum(np.abs((Nonscale_imputataion.values - ground_month.values[:len_]) * masks.cpu().numpy())) / np.sum(masks.cpu().numpy()))

    print("Original MAE :", ori_error / ori_test_mask_num)

In [10]:
df['datetime'] = pd.to_datetime(df['datetime'])
rbf_df['datetime'] = pd.to_datetime(rbf_df['datetime'])

ori_error = 0.0
ori_test_mask_num = 0.0

for a in [6,9,12,3]:

    ground_month = df[df['datetime'].dt.month.isin([a])]
    before_month = df[df['datetime'].dt.month.isin([a - 1])]
    rbf_month = rbf_df[rbf_df['datetime'].dt.month.isin([a])][df.columns[1:]]

    before_mask = np.where(np.isnan(before_month[before_month.columns[1:]]), np.nan, 1.) 
    ground_mask = np.where(np.isnan(ground_month[ground_month.columns[1:]]), np.nan, 1.)
    len_ = min(before_mask.shape[0], ground_mask.shape[0])

    test_df = pd.DataFrame(ground_month[ground_month.columns[1:]].values[:len_] * np.where(np.isnan(before_month[before_month.columns[1:]]), np.nan, 1.)[:len_], columns=df.columns[1:])
    test_df.to_csv('./dataset/air_month{}.csv'.format(a))

In [14]:
model = torch.load("./result/G_0.001_64_36.pt")

In [15]:
model2 = torch.load("./result/G_0.001_64_36_bi.pt")

In [11]:
df = pd.read_csv("./dataset/" + 'pm25_missing.csv')
rbf_df = pd.read_csv('./RBFresult/air_20_8.0_scale_month.csv')

In [16]:
eval_bi_model_test(model2, df, rbf_df)

6 month
2363.0
9.908082354858813
9 month
436.0
7.801500918461435
12 month
1663.0
14.8840539535835
3 month
3006.0
29.541862561926205
Original MAE : 18.796099838101245


In [17]:
eval_bi_model_test(model, df, rbf_df)

6 month
2363.0
12.00065267713261
9 month
436.0
8.95075150539117
12 month
1663.0
17.034228502749336
3 month
3006.0
44.80962179441939
Original MAE : 26.149667253148333


In [12]:
model3 = torch.load("./result/G_0.001_64_24.pt")
eval_bi_model_test(model3, df, rbf_df)

6 month
2363.0
12.358835852535394
9 month
436.0
9.015566666400593
12 month
1663.0
17.42227444238936
3 month
3006.0
52.41223830373716
Original MAE : 29.413383358974187


In [18]:
model4 = torch.load("./result/G_0.001_64_24_train2.pt")
eval_bi_model_test(model4, df, rbf_df)

6 month
2363.0
11.454270304716294
9 month
436.0
8.223317520722238
12 month
1663.0
17.320703306732995
3 month
3006.0
51.730878652753056
Original MAE : 28.784032940325673


In [19]:
model4 = torch.load("./result/G_0.001_64_36_train2.pt")
eval_bi_model_test(model4, df, rbf_df)

6 month
2363.0
11.868504649142594
9 month
436.0
8.18624378242992
12 month
1663.0
17.97931498210259
3 month
3006.0
43.377197514724116
Original MAE : 25.69709899833432


In [26]:
def eval_bi_model_test(model,df, rbf_df, train_df):
    df['datetime'] = pd.to_datetime(df['datetime'])
    rbf_df['datetime'] = pd.to_datetime(rbf_df['datetime'])

    ori_error = 0.0
    ori_test_mask_num = 0.0

    for a in [6,9,12,3]:

        ground_month = df[df['datetime'].dt.month.isin([a])]
        before_month = df[df['datetime'].dt.month.isin([a - 1])]
        rbf_month = rbf_df[rbf_df['datetime'].dt.month.isin([a])][df.columns[1:]]

        before_mask = np.where(np.isnan(before_month[before_month.columns[1:]]), np.nan, 1.) 
        ground_mask = np.where(np.isnan(ground_month[ground_month.columns[1:]]), np.nan, 1.)
        len_ = min(before_mask.shape[0], ground_mask.shape[0])

        test_df = pd.DataFrame(ground_month[ground_month.columns[1:]].values[:len_] * np.where(np.isnan(before_month[before_month.columns[1:]]), np.nan, 1.)[:len_], columns=df.columns[1:])

        values = ((test_df - train_df[train_df.columns[1:]].mean()) / train_df[train_df.columns[1:]].std()).values
        shp = values.shape

        test_masks = ~np.isnan(values)
        ori_masks = ~np.isnan(ground_month[ground_month.columns[1:]].values[:len_])
        test_masks = test_masks.reshape(shp)
        rbf_x = torch.from_numpy(rbf_month.values[:len_]).to(torch.float32)

        deltas = np.array(make_deltas(test_masks))
        deltas = torch.from_numpy(deltas).to(torch.float32)
        ori_masks = torch.from_numpy(ori_masks).to(torch.float32)
        test_masks = torch.from_numpy(test_masks).to(torch.float32)
        values = torch.nan_to_num(torch.from_numpy(values).to(torch.float32))

        dataset = torch.cat([values.unsqueeze_(0), deltas.unsqueeze_(0), test_masks.unsqueeze_(0), rbf_x.unsqueeze_(0)], dim = 0).unsqueeze_(0)

        model.eval()
        loss, x_loss, back_x_loss, loss_c, bi_c_hat, bi_imputation = model(dataset.to(device))

        Nonscale_imputataion = pd.DataFrame(bi_c_hat[0].cpu().detach() , columns= df.columns[1:])
        Nonscale_imputataion = (Nonscale_imputataion * train_df[train_df.columns[1:]].std()) + train_df[train_df.columns[1:]].mean()
        ground_month = ground_month[df.columns[1:]].fillna(0)

        masks = ori_masks - test_masks

        ori_error += np.sum(np.abs((Nonscale_imputataion.values - ground_month.values[:len_]) * masks.cpu().numpy()))
        ori_test_mask_num += np.sum(masks.cpu().numpy())

        print(a,"month")
        print(np.sum(masks.cpu().numpy()))
        print(np.sum(np.abs((Nonscale_imputataion.values - ground_month.values[:len_]) * masks.cpu().numpy())) / np.sum(masks.cpu().numpy()))

    print("Original MAE :", ori_error / ori_test_mask_num)

In [27]:
df = pd.read_csv("./dataset/" + 'pm25_missing_test.csv')
train_df = pd.read_csv("./dataset/" + 'pm25_missing.csv')
rbf_df = pd.read_csv('./RBFresult/air_20_8.0_scale_month.csv')

In [30]:
df.columns = train_df.columns

In [31]:
model4 = torch.load("./result/G_0.001_64_36_train.pt")
eval_bi_model_test(model4, df, rbf_df, train_df)

6 month
0.0
nan


  print(np.sum(np.abs((Nonscale_imputataion.values - ground_month.values[:len_]) * masks.cpu().numpy())) / np.sum(masks.cpu().numpy()))


9 month
0.0
nan


  print(np.sum(np.abs((Nonscale_imputataion.values - ground_month.values[:len_]) * masks.cpu().numpy())) / np.sum(masks.cpu().numpy()))


12 month
0.0
nan


  print(np.sum(np.abs((Nonscale_imputataion.values - ground_month.values[:len_]) * masks.cpu().numpy())) / np.sum(masks.cpu().numpy()))


3 month
0.0
nan
Original MAE : nan


  print(np.sum(np.abs((Nonscale_imputataion.values - ground_month.values[:len_]) * masks.cpu().numpy())) / np.sum(masks.cpu().numpy()))
  print("Original MAE :", ori_error / ori_test_mask_num)
