## Set Up

In [1]:
import enum
import math

import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F

## collecting and preparing data

In [2]:
import zipfile
import sys
import os
import requests
import pandas as pd
import pickle
from torch.utils.data import DataLoader, Dataset

### downloading the data (should only need to do this once)

os.makedirs("data/", exist_ok=True)
url = "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/06/STMVL-Release.zip"
urlData = requests.get(url).content
filename = "data/STMVL-Release.zip"
with open(filename, mode="wb") as f:
    # f.write(urlData)
with zipfile.ZipFile(filename) as z:
    z.extractall("data/pm25")
        
def create_normalizer_pm25():
    df = pd.read_csv(
        "./data/pm25/Code/STMVL/SampleData/pm25_ground.txt",
        index_col="datetime",
        parse_dates=True,
    )
    test_month = [3, 6, 9, 12]
    for i in test_month:
        df = df[df.index.month != i]
    mean = df.describe().loc["mean"].values
    std = df.describe().loc["std"].values
    path = "./data/pm25/pm25_meanstd.pk"
    with open(path, "wb") as f:
        pickle.dump([mean, std], f)
create_normalizer_pm25()

In [3]:
class PM25_Dataset(Dataset):
    def __init__(self, eval_length=36, target_dim=36, mode="train", validindex=0):
        self.eval_length = eval_length
        self.target_dim = target_dim

        path = "./data/pm25/pm25_meanstd.pk"
        with open(path, "rb") as f:
            self.train_mean, self.train_std = pickle.load(f)
        if mode == "train":
            month_list = [1, 2, 4, 5, 7, 8, 10, 11]
            # 1st,4th,7th,10th months are excluded from histmask (since the months are used for creating missing patterns in test dataset)
            flag_for_histmask = [0, 1, 0, 1, 0, 1, 0, 1] 
            month_list.pop(validindex)
            flag_for_histmask.pop(validindex)
        elif mode == "valid":
            month_list = [1, 2, 4, 5, 7, 8, 10, 11]
            month_list = month_list[validindex : validindex + 1]
        elif mode == "test":
            month_list = [3, 6, 9, 12]
        self.month_list = month_list

        # create data for batch
        self.observed_data = []  # values (separated into each month)
        self.observed_mask = []  # masks (separated into each month)
        self.gt_mask = []  # ground-truth masks (separated into each month)
        self.index_month = []  # indicate month
        self.position_in_month = []  # indicate the start position in month (length is the same as index_month)
        self.valid_for_histmask = []  # whether the sample is used for histmask
        self.use_index = []  # to separate train/valid/test
        self.cut_length = []  # excluded from evaluation targets

        df = pd.read_csv(
            "./data/pm25/Code/STMVL/SampleData/pm25_ground.txt",
            index_col="datetime",
            parse_dates=True,
        )
        df_gt = pd.read_csv(
            "./data/pm25/Code/STMVL/SampleData/pm25_missing.txt",
            index_col="datetime",
            parse_dates=True,
        )
        for i in range(len(month_list)):
            current_df = df[df.index.month == month_list[i]]
            current_df_gt = df_gt[df_gt.index.month == month_list[i]]
            current_length = len(current_df) - eval_length + 1

            last_index = len(self.index_month)
            self.index_month += np.array([i] * current_length).tolist()
            self.position_in_month += np.arange(current_length).tolist()
            if mode == "train":
                self.valid_for_histmask += np.array(
                    [flag_for_histmask[i]] * current_length
                ).tolist()

            # mask values for observed indices are 1
            c_mask = 1 - current_df.isnull().values
            c_gt_mask = 1 - current_df_gt.isnull().values
            c_data = (
                (current_df.fillna(0).values - self.train_mean) / self.train_std
            ) * c_mask

            self.observed_mask.append(c_mask)
            self.gt_mask.append(c_gt_mask)
            self.observed_data.append(c_data)

            if mode == "test":
                n_sample = len(current_df) // eval_length
                # interval size is eval_length (missing values are imputed only once)
                c_index = np.arange(
                    last_index, last_index + eval_length * n_sample, eval_length
                )
                self.use_index += c_index.tolist()
                self.cut_length += [0] * len(c_index)
                if len(current_df) % eval_length != 0:  # avoid double-count for the last time-series
                    self.use_index += [len(self.index_month) - 1]
                    self.cut_length += [eval_length - len(current_df) % eval_length]

        if mode != "test":
            self.use_index = np.arange(len(self.index_month))
            self.cut_length = [0] * len(self.use_index)

        # masks for 1st,4th,7th,10th months are used for creating missing patterns in test data,
        # so these months are excluded from histmask to avoid leakage
        if mode == "train":
            ind = -1
            self.index_month_histmask = []
            self.position_in_month_histmask = []

            for i in range(len(self.index_month)):
                while True:
                    ind += 1
                    if ind == len(self.index_month):
                        ind = 0
                    if self.valid_for_histmask[ind] == 1:
                        self.index_month_histmask.append(self.index_month[ind])
                        self.position_in_month_histmask.append(
                            self.position_in_month[ind]
                        )
                        break
        else:  # dummy (histmask is only used for training)
            self.index_month_histmask = self.index_month
            self.position_in_month_histmask = self.position_in_month

    def __getitem__(self, org_index):
        index = self.use_index[org_index]
        c_month = self.index_month[index]
        c_index = self.position_in_month[index]
        hist_month = self.index_month_histmask[index]
        hist_index = self.position_in_month_histmask[index]
        s = {
            "observed_data": self.observed_data[c_month][
                c_index : c_index + self.eval_length
            ],
            "observed_mask": self.observed_mask[c_month][
                c_index : c_index + self.eval_length
            ],
            "gt_mask": self.gt_mask[c_month][
                c_index : c_index + self.eval_length
            ],
            "hist_mask": self.observed_mask[hist_month][
                hist_index : hist_index + self.eval_length
            ],
            "timepoints": np.arange(self.eval_length),
            "cut_length": self.cut_length[org_index],
        }

        return s

    def __len__(self):
        return len(self.use_index)

def get_dataloader(batch_size, device, validindex=0):
    dataset = PM25_Dataset(mode="train", validindex=validindex)
    train_loader = DataLoader(
        dataset, batch_size=batch_size, num_workers=1, shuffle=True
    )
    dataset_test = PM25_Dataset(mode="test", validindex=validindex)
    test_loader = DataLoader(
        dataset_test, batch_size=batch_size, num_workers=1, shuffle=False
    )
    dataset_valid = PM25_Dataset(mode="valid", validindex=validindex)
    valid_loader = DataLoader(
        dataset_valid, batch_size=batch_size, num_workers=1, shuffle=False
    )

    scaler = torch.from_numpy(dataset.train_std).to(device).float()
    mean_scaler = torch.from_numpy(dataset.train_mean).to(device).float()

    return train_loader, valid_loader, test_loader, scaler, mean_scaler

stuff = get_dataloader(18, "cpu")

len(stuff[0])

for i, thing in enumerate(stuff[0]):
    print(thing.keys())
    if i > 0:
        break

In [4]:
train_set = PM25_Dataset(mode="train")
valid_set = PM25_Dataset(mode="valid")
test_set  = PM25_Dataset(mode="test")

In [5]:
print(len(train_set))
print(len(valid_set))
print(len(test_set))

4842
709
82


## Moded Transformer

In [6]:
class TimesSeriesAttention(nn.Module):
    """
    A module that computes multi-head attention given query, key, and value tensors for time series data of shape (b, t, f, e)
    """

    def __init__(self, embed_dim: int, num_heads: int):
        """
        Constructor.
        
        Inputs:
        - input_dim: Dimension of the input query, key, and value. We assume they all have
          the same dimensions. This is basically the dimension of the embedding.
        - num_heads: Number of attention heads
        """
        super(TimesSeriesAttention, self).__init__()

        assert embed_dim % num_heads == 0

        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.dim_per_head = embed_dim // num_heads


        self.linear_query = nn.Linear(embed_dim, embed_dim)
        self.linear_key = nn.Linear(embed_dim, embed_dim)
        self.linear_value = nn.Linear(embed_dim, embed_dim)
        self.output_linear = nn.Linear(embed_dim, embed_dim)


    def forward(self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, mask: torch.Tensor = None):
        """
        Compute the attended feature representations.
        
        Inputs:
        - query: Tensor of the shape BxTxFXE, where B is the batch size, T is the time dimension, F is the feature dimension, 
        and E is the embedding dimension
        - key: Tensor of the shape BxTxFXE
        - value: Tensor of the shape BxTxFXE
        - mask: Tensor indicating where the attention should *not* be performed
        """
        b = query.shape[0]
        t = query.shape[1]
        f = query.shape[2]
        e = query.shape[3]


        query_linear = self.linear_query(query)
        key_linear = self.linear_key(key)
        value_linear = self.linear_value(value)

        query_reshaped = query_linear.reshape(b, t, f, self.num_heads, self.dim_per_head)
        key_reshaped = key_linear.reshape(b, t, f, self.num_heads, self.dim_per_head)
        value_reshaped = value_linear.reshape(b, t, f, self.num_heads, self.dim_per_head)

        query_reshaped = query_reshaped.permute(0, 3, 1, 2, 4) # BxHxTxFxE
        key_reshaped = key_reshaped.permute(0, 3, 1, 2, 4) # BxHxTxFxE
        value_reshaped = value_reshaped.permute(0, 3, 1, 2, 4) # BxHxTxFxE


        kq = torch.einsum("bhtfe,bhxye->bhtfxy", key_reshaped, query_reshaped)

        dot_prod_scores = kq/math.sqrt(self.dim_per_head)

        # if mask is not None:
        #     # We simply set the similarity scores to be near zero for the positions
        #     # where the attention should not be done. Think of why we do this.
        #     dot_prod_scores = dot_prod_scores.masked_fill(mask == 0, -1e9)

        #softmac across time and features
        dot_prod_scores = F.softmax(dot_prod_scores, dim=4)
        dot_prod_scores = F.softmax(dot_prod_scores, dim=5)

        out = torch.einsum("bhtfxy,bhtfe->btfhe",
                           dot_prod_scores, value_reshaped)
        out = self.output_linear(out.reshape(b, t, f, e))

        return out


In [7]:
class FeedForwardNetwork(nn.Module):
    """
    A simple feedforward network. Essentially, it is a two-layer fully-connected
    neural network.
    """
    def __init__(self, embed_dim, ff_dim, dropout):
        """
        Inputs:
        - embed_dim: embedding dimension
        - ff_dim: Hidden dimension
        """
        super(FeedForwardNetwork, self).__init__()
        
        self.feedforward = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, embed_dim),
            nn.Dropout(dropout)
        )

        
    def forward(self, x: torch.Tensor):
        """
        Input:
        - x: Tensor of the shape BxTxFXE, where B is the batch size, T is the time dimension, F is the feature dimension,
        and E is the embedding dimension
          
        Return:
        - y: Tensor of the shape BxTxFXE
        """

        y = self.feedforward(x)

        
        return y


In [8]:
class TransformerEncoderCell(nn.Module):
    """
    A single cell (unit) for the Transformer encoder.
    """

    def __init__(self, embed_dim: int, num_heads: int, ff_dim: int, dropout: float):
        """
        Inputs:
        - embed_dim: embedding dimension for each element in the time series data
        - num_heads: Number of attention heads in a multi-head attention module
        - ff_dim: The hidden dimension for a feedforward network
        - dropout: Dropout ratio for the output of the multi-head attention and feedforward
          modules.
        """
        super(TransformerEncoderCell, self).__init__()

        self.time_series_attention = TimesSeriesAttention(embed_dim, num_heads)
        self.dropout = nn.Dropout(dropout)
        self.layer_norm = nn.LayerNorm(embed_dim)
        self.feedforward = FeedForwardNetwork(embed_dim, ff_dim, dropout)


    def forward(self, x: torch.Tensor, mask: torch.Tensor = None):
        """
        Inputs:
        - x: Tensor of the shape BxTxFXE, where B is the batch size, T is the time dimension, F is the feature dimension,
        and E is the embedding dimension
        - mask: Tensor for multi-head attention
        """

        attention = self.time_series_attention(x, x, x, mask)
        attention = self.dropout(attention)
        attention = torch.add(attention, x)
        attention = self.layer_norm(attention)

        y = self.feedforward(attention)
        y = torch.add(y, attention)
        y = self.layer_norm(y)

        return y

In [9]:
class TransformerEncoder(nn.Module):
    """
    A full encoder consisting of a set of TransformerEncoderCell.
    """
    def __init__(self, embed_dim: int, num_heads: int, ff_dim: int, num_cells: int, dropout: float=0.1):
        """
        Inputs:
        - embed_dim: embedding dimension for each element in the time series data
        - num_heads: Number of attention heads in a multi-head attention module
        - ff_dim: The hidden dimension for a feedforward network
        - num_cells: Number of time series attention cells in the encoder
        - dropout: Dropout ratio for the output of the multi-head attention and feedforward
          modules.
        """
        super(TransformerEncoder, self).__init__()
        
        self.norm = None

        self.encoder_modules = nn.ModuleList(TransformerEncoderCell(embed_dim, num_heads, ff_dim, dropout) for _ in range(num_cells))
        self.layer_norm = nn.LayerNorm(embed_dim)

        
    def forward(self, x: torch.Tensor, mask: torch.Tensor=None):
        """
        Inputs:
        - x: Tensor of the shape BxTxFXE, where B is the batch size, T is the time dimension, F is the feature dimension,
        and E is the embedding dimension
        - mask: Tensor for multi-head attention
        
        Return:
        - y: Tensor of the shape BxTxFXE
        """

        for encoder_module in self.encoder_modules:
            x = encoder_module(x, mask)
          
        #y = self.layer_norm(x)
        y = x

        return y

## Beta Schedules

In [10]:
def get_named_beta_schedule(schedule_name, num_diffusion_timesteps):
    """
    Get a pre-defined beta schedule for the given name.

    The beta schedule library consists of beta schedules which remain similar
    in the limit of num_diffusion_timesteps.
    Beta schedules may be added, but should not be removed or changed once
    they are committed to maintain backwards compatibility.
    """
    if schedule_name == "linear":
        # Linear schedule from Ho et al, extended to work for any number of
        # diffusion steps.
        scale = 1000 / num_diffusion_timesteps
        beta_start = scale * 0.0001
        beta_end = scale * 0.02
        return torch.linspace(
            beta_start, beta_end, num_diffusion_timesteps
        )
    elif schedule_name == "cosine":
        return betas_for_alpha_bar(
            num_diffusion_timesteps,
            lambda t: math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2,
        )
    else:
        raise NotImplementedError(f"unknown beta schedule: {schedule_name}")


def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999):
    """
    Create a beta schedule that discretizes the given alpha_t_bar function,
    which defines the cumulative product of (1-beta) over time from t = [0,1].

    :param num_diffusion_timesteps: the number of betas to produce.
    :param alpha_bar: a lambda that takes an argument t from 0 to 1 and
                      produces the cumulative product of (1-beta) up to that
                      part of the diffusion process.
    :param max_beta: the maximum beta to use; use values lower than 1 to
                     prevent singularities.
    """
    betas = []
    for i in range(num_diffusion_timesteps):
        t1 = i / num_diffusion_timesteps
        t2 = (i + 1) / num_diffusion_timesteps
        betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
    return torch.tensor(betas)

## Embeddings

In [11]:
class DiffusionEmbedding(nn.Module):
    def __init__(self, num_steps, embedding_dim, projection_dim=None):
        super(DiffusionEmbedding, self).__init__()
        if projection_dim is None:
            projection_dim = embedding_dim
        self.register_buffer(
            "embedding",
            self._build_embedding(num_steps, embedding_dim / 2),
            persistent=False,
        )
        self.projection1 = nn.Linear(embedding_dim, projection_dim)
        self.projection2 = nn.Linear(projection_dim, embedding_dim)        

    def forward(self, diffusion_step, data, device="cpu"):
        x = self.embedding[diffusion_step]
        x = self.projection1(x)
        x = F.silu(x)
        x = self.projection2(x)
        x = F.silu(x)
        x = torch.zeros(data.shape).to(device) + x.unsqueeze(1).unsqueeze(1)
        return x

    def _build_embedding(self, num_steps, dim=64):
        steps = torch.arange(num_steps).unsqueeze(1)  # (T,1)
        frequencies = 10.0 ** (torch.arange(dim) / (dim - 1) * 4.0).unsqueeze(0)  # (1,dim)
        table = steps * frequencies  # (T,dim)
        table = torch.cat([torch.sin(table), torch.cos(table)], dim=1)  # (T,dim*2)
        return table
    

class TimeEmbedding(nn.Module):
    def __init__(self, embedding_dim, max_len=10000.0):
        super(TimeEmbedding, self).__init__()
        self.max_len = max_len
        self.learnable = nn.Sequential(
            nn.Linear(embedding_dim, embedding_dim),
            nn.SiLU(),
            nn.Linear(embedding_dim, embedding_dim),
        )
        

    def forward(self, data, device="cpu"):
        b, l, f, e = data.shape
        pe = torch.arange(l).unsqueeze(0).unsqueeze(-1).unsqueeze(-1).to(device)
        pe = torch.zeros(data.shape).to(device) + pe
        
        div_term = 1 / torch.pow(
            self.max_len, torch.arange(0, f, 2) / f
        ).unsqueeze(-1).to(device)

        pe[:, :, 0::2] = torch.sin(pe[:, :, 0::2] * div_term)
        pe[:, :, 1::2] = torch.cos(pe[:, :, 1::2] * div_term)

        return self.learnable(pe) 
    
class FeatureEmbedding(nn.Module):
    def __init__(self, embedding_dim, max_len=10000.0):
        super(FeatureEmbedding, self).__init__()
        self.max_len = max_len
        self.learnable = nn.Sequential(
            nn.Linear(embedding_dim, embedding_dim),
            nn.SiLU(),
            nn.Linear(embedding_dim, embedding_dim),
        )
        

    def forward(self, data, device="cpu"):
        b, l, f, e = data.shape
        pe = torch.arange(f).unsqueeze(0).unsqueeze(0).unsqueeze(-1).to(device)
        pe = torch.zeros(data.shape).to(device) + pe

        div_term = 1 / torch.pow(
            self.max_len, torch.arange(0, e, 2) / e
        ).to(device)

        pe[:, :, :, 0::2] = torch.sin(pe[:, :, :, 0::2] * div_term)
        pe[:, :, :, 1::2] = torch.cos(pe[:, :, :, 1::2] * div_term)

        return self.learnable(pe)

## Diffusion Imputation Model

In [12]:
class diffusion_imputation(nn.Module):
    def __init__(self, emb_dim,
                vocab_size,
                pad_idx= None,
                features_to_impute = None,
                missing_prp = 0.1,
                diffusion_steps = 1000,
                diffusion_beta_schedule = "cosine",
                is_unconditional=False,
                conv_out_channels = 4,
                num_heads = 8,
                ff_dim = 512,
                num_cells = 2,
                dropout = 0.1,
                device = "cpu"):
        
        super().__init__()

        self.device = device
        self.emb_dim = emb_dim
        self.is_unconditional = is_unconditional
        self.features_to_impute = features_to_impute
        self.missing_prp = missing_prp
        self.conv_out_channels = conv_out_channels
        self.diffusion_steps = diffusion_steps

        #set device to cuda if available
        if torch.cuda.is_available():
            self.device = "cuda"        

        self.data_embedding_linear = nn.Sequential(
            nn.Linear(1, emb_dim),
            nn.SiLU(),
            nn.Linear(emb_dim, emb_dim),
        )        
        self.diffusion_embedding = DiffusionEmbedding(diffusion_steps, emb_dim)
        self.time_embedding = TimeEmbedding(emb_dim)
        self.feature_embedding = FeatureEmbedding(emb_dim)
        self.embedding_conv = nn.Conv1d(in_channels = 4, out_channels= conv_out_channels, kernel_size = 1)

        #number of heads for the transformer should be divisivle by the conv_out_channels,
        # so that each head gets input from a single channel
        self.embed_dim_transformer_input = conv_out_channels * emb_dim

        self.transformer = TransformerEncoder(embed_dim = self.embed_dim_transformer_input,
                                              num_heads = num_heads,
                                              ff_dim = ff_dim,
                                              num_cells = num_cells,
                                              dropout = dropout)
        self.output = nn.Sequential(
            nn.Linear(self.embed_dim_transformer_input, self.embed_dim_transformer_input),
            nn.SiLU(),
            nn.Linear(self.embed_dim_transformer_input, 1),
        )
        #define beta schedule
        self.beta = get_named_beta_schedule(diffusion_beta_schedule, 
                                            diffusion_steps)
        
        self.alpha_hat = 1 - self.beta 
        self.alpha = torch.cumprod(self.alpha_hat, dim=0)
        self.alpha_torch = torch.tensor(self.alpha).float()

    def get_mask(self, data, strategy = "forecasting"):
        
        b = data.shape[0]
        t = data.shape[1]
        f = data.shape[2]
        
        if strategy == "forecasting":
            forecasted_time = torch.randint(t-1, t, (b, 1, 1, 1))
            mask = torch.zeros_like(data)
            for i in range(b):
                mask[i, forecasted_time[i]:, :, :] = 1
        
        if strategy == "random_features":
            selected_features = torch.randint(0, f, (b, 1, 1, 1))
            mask = torch.zeros_like(data)
            mask[:, :, selected_features, :] = 1
        
        if strategy == "selected_features":
            mask = torch.zeros_like(data)
            mask[:, :, self.features_to_impute, :] = 1
        
        if strategy == "selected_features_after_time":
            selected_time = torch.randint(1, t, (b, 1, 1))
            mask = torch.zeros_like(data)
            mask[:, selected_time:, self.features_to_impute, :] = 1
        
        if strategy == "random":
            mask = torch.rand(size=(b, t, f))#.unsqueeze(3)
            #mask = mask.repeat(1, 1, 1, e)
            mask = mask < self.missing_prp
            mask = mask.float()
        return mask
    
    def loss_func(self, predicted_noise, noise):

        residual = noise - predicted_noise
        num_obs = torch.sum(noise!=0)
        loss = (residual**2).sum() / num_obs
        return(loss)
    
    def forward(self, data, strategy = "forecasting"):
         
        b, t, f = data.shape

        noise_mask = self.get_mask(data, strategy).to(self.device)
        noise = torch.randn((b, t, f)).to(self.device)
        noise = (noise_mask * noise)

        diffusion_t = torch.randint(1, self.diffusion_steps, (b,1)).squeeze(1)
        alpha = self.alpha_torch[diffusion_t].unsqueeze(1).unsqueeze(2).to(self.device)
        alpha_prev = self.alpha_torch[diffusion_t - 1].unsqueeze(1).unsqueeze(2).to(self.device)

        noised_data = data * noise_mask
        noised_data = noised_data * (alpha**0.5) + noise * (1 - alpha)
        conditional_data = data * (1 - noise_mask)
        noised_data = noised_data + conditional_data
        noised_data = noised_data.unsqueeze(3)

        noised_data = self.data_embedding_linear(noised_data.float())
        diffusion_embedding = self.diffusion_embedding(diffusion_t, noised_data, device = self.device)
        time_embedding = self.time_embedding(noised_data, device = self.device)
        feature_embedding = self.feature_embedding(noised_data, device = self.device)

        noised_data = torch.stack((noised_data, diffusion_embedding, time_embedding, feature_embedding), dim = -1)
        noised_data = noised_data.reshape(1, -1, 4)
        noised_data = noised_data.permute(0, 2, 1)
        noised_data = self.embedding_conv(noised_data)
        noised_data = noised_data.permute(0, 2, 1)
        noised_data = noised_data.reshape(b, t, f, self.embed_dim_transformer_input)
        
        predicted_noise = self.transformer(noised_data)
        predicted_noise = self.output(predicted_noise).squeeze(3)
        predicted_noise = predicted_noise * noise_mask

        return (predicted_noise, noise*((1-alpha)-(1 - alpha_prev)))
    
    def eval(self, data, imputation_mask):
        
        conditional_data = data * (1 - imputation_mask)
        random_noise = torch.randn_like(data) * imputation_mask
        data = (conditional_data + random_noise).unsqueeze(3)

        b, ti, f, e = data.shape
        imputed_samples = torch.zeros((b, ti, f)).to(self.device)
        x = (conditional_data + random_noise)

        for t in range(self.diffusion_steps - 1, -1, -1):

            x = x.unsqueeze(3)
            current_sample = self.data_embedding_linear(x.float())
            diffusion_embedding = self.diffusion_embedding([t], current_sample, device = self.device)
            time_embedding = self.time_embedding(current_sample, device = self.device)
            feature_embedding = self.feature_embedding(current_sample, device = self.device)
            data_to_transformer = torch.stack((current_sample, diffusion_embedding, time_embedding, feature_embedding), dim = -1)
            data_to_transformer = data_to_transformer.reshape(1, -1, 4)
            data_to_transformer = data_to_transformer.permute(0, 2, 1)
            data_to_transformer = self.embedding_conv(data_to_transformer)
            data_to_transformer = data_to_transformer.permute(0, 2, 1)
            data_to_transformer = data_to_transformer.reshape(b, ti, f, self.embed_dim_transformer_input)

            predicted_noise = self.transformer(data_to_transformer)
            predicted_noise = self.output(predicted_noise).squeeze(3)
            predicted_noise = predicted_noise * imputation_mask

            coeff1 = 1 / self.alpha_hat[t] ** 0.5
            coeff2 = (1 - self.alpha_hat[t]) / (1 - self.alpha[t]) ** 0.5
            
            x = x.squeeze(3)
            x = coeff1 * (x - coeff2 * predicted_noise)
            
            if t > 0:
                noise = torch.randn_like(x)
                sigma = (
                    (1.0 - self.alpha[t - 1]) / (1.0 - self.alpha[t]) * self.beta[t]
                ) ** 0.5
                x += sigma * noise
            
            x = data.squeeze(3) * (1 - imputation_mask) + x * imputation_mask

        imputed_samples = x.detach()
        return(imputed_samples)

## using the model

In [13]:
import time

def train(model, data, epochs, lr, loss_func, device = "cpu", verbose = True):
    model = model.to(device)
    data = data.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    model.train()
    for epoch in range(epochs):
        start = time.time()
        optimizer.zero_grad()
        predicted_noise, noise = model(data, strategy='random')
        loss = loss_func(predicted_noise, noise)
        loss.backward()
        optimizer.step()
        end = time.time()
        if verbose:
            #print every 10 epochs
            if epoch % 10 == 0:
                print("Epoch: ", epoch, "Loss: ", loss.item(), "Time: ", end - start)
    return(model)

#restart the model


In [20]:
diffusion_imputer = diffusion_imputation(emb_dim = 128,
                                        is_unconditional = False,
                                        conv_out_channels = 1,
                                        num_heads = 8,
                                        ff_dim = 1024,
                                        num_cells = 2,
                                        dropout = 0.0,
                                        device = "cpu",
                                        vocab_size= 100,
                                        pad_idx=None,
                                        diffusion_steps= 1000,
                                        missing_prp= 0.1,
                                        )

  self.alpha_torch = torch.tensor(self.alpha).float()


In [15]:
indices = np.arange(len(train_set))
np.random.shuffle(indices)
print(indices)

[3148 2502 2010 ... 1952 3735 3683]


In [21]:
batch_size = 16

data = []
for j in range(int(len(train_set) / batch_size)):
    batch = []
    for i in range(batch_size):
        batch.append(torch.Tensor(train_set[indices[i + j*batch_size]]["observed_data"]))
        
    batch = torch.stack(batch, dim=0)
    data.append(batch)

In [17]:
print(data[0])
print(data[-1])

tensor([[[-3.0163e-01, -5.6203e-01, -3.1724e-01,  ..., -2.7119e-01,
          -6.8585e-01, -5.8594e-01],
         [-2.3890e-01, -3.3416e-01, -5.2385e-01,  ..., -2.6111e-01,
          -6.8585e-01, -5.3200e-01],
         [-2.6399e-01, -1.2458e-02, -0.0000e+00,  ..., -2.7119e-01,
          -6.1783e-01, -5.4099e-01],
         ...,
         [-3.8173e-02,  5.7733e-01,  4.1880e-01,  ..., -7.9566e-02,
          -9.6356e-02, -3.5220e-01],
         [ 1.6255e-01,  5.7733e-01,  3.9298e-01,  ...,  5.1546e-02,
           2.2106e-01, -8.2497e-02],
         [ 2.6292e-01,  5.3711e-01,  2.3802e-01,  ...,  5.1546e-02,
           7.3689e-02, -1.3644e-01]],

        [[-1.2599e-01,  9.4657e-04,  3.8006e-01,  ..., -3.7205e-01,
          -1.4170e-01, -6.4517e-02],
         [-5.3718e-04,  2.4222e-01,  3.4132e-01,  ..., -4.1239e-01,
          -1.1903e-01, -2.3533e-01],
         [ 3.6328e-01,  5.7733e-01, -1.2354e-01,  ..., -4.8299e-01,
          -2.3239e-01, -2.8028e-01],
         ...,
         [-9.2890e-01, -9

In [22]:
epochs = 1000
for epoch in range(epochs):
    for batch in data:
        train(diffusion_imputer, batch, epochs = 1, lr = 0.001, loss_func = diffusion_imputer.loss_func)

Epoch:  0 Loss:  0.9905232191085815 Time:  32.146663427352905
Epoch:  0 Loss:  2.9492194652557373 Time:  17.986151218414307
Epoch:  0 Loss:  0.9818966388702393 Time:  18.569625854492188
Epoch:  0 Loss:  2.116868019104004 Time:  20.665844440460205
Epoch:  0 Loss:  0.997160792350769 Time:  21.173161268234253
Epoch:  0 Loss:  1.5242390632629395 Time:  21.627450704574585
Epoch:  0 Loss:  1.0162651538848877 Time:  21.581300497055054
Epoch:  0 Loss:  1.085257649421692 Time:  21.579834699630737
Epoch:  0 Loss:  1.514567255973816 Time:  21.821297883987427
Epoch:  0 Loss:  1.0256640911102295 Time:  21.151304244995117
Epoch:  0 Loss:  1.3373727798461914 Time:  22.237067699432373
Epoch:  0 Loss:  0.9817448258399963 Time:  20.17267155647278
Epoch:  0 Loss:  1.261452078819275 Time:  21.520265579223633
Epoch:  0 Loss:  0.9915372729301453 Time:  23.009172677993774
Epoch:  0 Loss:  1.231690764427185 Time:  21.02595090866089
Epoch:  0 Loss:  0.9613065123558044 Time:  21.62278175354004
Epoch:  0 Loss:  

Epoch:  0 Loss:  0.5810464024543762 Time:  20.70523476600647
Epoch:  0 Loss:  0.5428428649902344 Time:  21.6849365234375
Epoch:  0 Loss:  0.8868204951286316 Time:  23.189069271087646
Epoch:  0 Loss:  0.5617613792419434 Time:  21.11746120452881
Epoch:  0 Loss:  0.6050630211830139 Time:  20.459795713424683
Epoch:  0 Loss:  0.6934335827827454 Time:  21.69450545310974
Epoch:  0 Loss:  0.708590567111969 Time:  23.080452919006348
Epoch:  0 Loss:  0.6736850738525391 Time:  20.18697690963745
Epoch:  0 Loss:  0.654220700263977 Time:  21.711844205856323
Epoch:  0 Loss:  0.6853005886077881 Time:  21.510510683059692
Epoch:  0 Loss:  0.7764245271682739 Time:  21.813430786132812
Epoch:  0 Loss:  0.7353265285491943 Time:  21.567088842391968
Epoch:  0 Loss:  0.7281514406204224 Time:  21.913143634796143
Epoch:  0 Loss:  0.5520310997962952 Time:  21.282390594482422
Epoch:  0 Loss:  0.7319174408912659 Time:  21.001721143722534
Epoch:  0 Loss:  0.7111045718193054 Time:  21.473987579345703
Epoch:  0 Loss: 

Epoch:  0 Loss:  0.8101319074630737 Time:  22.08026385307312
Epoch:  0 Loss:  0.598782479763031 Time:  21.481080532073975
Epoch:  0 Loss:  0.9547820091247559 Time:  20.32007646560669
Epoch:  0 Loss:  0.8376452922821045 Time:  19.69811701774597
Epoch:  0 Loss:  0.35714957118034363 Time:  22.180818557739258
Epoch:  0 Loss:  0.4686408042907715 Time:  21.34193253517151
Epoch:  0 Loss:  0.59559565782547 Time:  21.34326481819153
Epoch:  0 Loss:  0.6982118487358093 Time:  20.527160167694092
Epoch:  0 Loss:  0.5266467332839966 Time:  21.45438003540039
Epoch:  0 Loss:  0.8020787835121155 Time:  20.23964262008667
Epoch:  0 Loss:  0.794892430305481 Time:  21.45268440246582
Epoch:  0 Loss:  0.6264616847038269 Time:  20.60355567932129
Epoch:  0 Loss:  0.6701277494430542 Time:  20.64292025566101
Epoch:  0 Loss:  0.5721197128295898 Time:  19.91779375076294
Epoch:  0 Loss:  0.625248908996582 Time:  20.615432739257812
Epoch:  0 Loss:  0.6471133232116699 Time:  20.397961378097534
Epoch:  0 Loss:  0.5750

Epoch:  0 Loss:  0.6834241151809692 Time:  20.619192123413086
Epoch:  0 Loss:  0.7769303917884827 Time:  19.812748908996582
Epoch:  0 Loss:  0.7429196834564209 Time:  20.860563278198242
Epoch:  0 Loss:  0.7074443697929382 Time:  21.06129813194275
Epoch:  0 Loss:  0.5913800001144409 Time:  20.903676986694336
Epoch:  0 Loss:  0.5447485446929932 Time:  21.92831563949585
Epoch:  0 Loss:  0.7315080761909485 Time:  22.661722660064697
Epoch:  0 Loss:  0.7133957147598267 Time:  22.69278383255005
Epoch:  0 Loss:  0.6897356510162354 Time:  21.329027891159058
Epoch:  0 Loss:  0.5053575038909912 Time:  21.502568244934082
Epoch:  0 Loss:  0.6547545790672302 Time:  21.186514854431152
Epoch:  0 Loss:  0.5963215827941895 Time:  21.827432870864868
Epoch:  0 Loss:  0.7061643004417419 Time:  20.887218475341797
Epoch:  0 Loss:  0.7829762697219849 Time:  20.590057134628296
Epoch:  0 Loss:  0.5343675017356873 Time:  21.371639013290405
Epoch:  0 Loss:  0.627540647983551 Time:  19.998685121536255
Epoch:  0 Lo

Epoch:  0 Loss:  0.4687875807285309 Time:  20.263583183288574
Epoch:  0 Loss:  0.7440374493598938 Time:  20.15229320526123
Epoch:  0 Loss:  0.6948707103729248 Time:  20.589704275131226
Epoch:  0 Loss:  0.7085357308387756 Time:  20.728777408599854
Epoch:  0 Loss:  0.6014183759689331 Time:  20.54012942314148
Epoch:  0 Loss:  0.6141164898872375 Time:  20.565803289413452
Epoch:  0 Loss:  0.6155883073806763 Time:  20.322304725646973
Epoch:  0 Loss:  0.6572198271751404 Time:  20.560873985290527
Epoch:  0 Loss:  0.534093976020813 Time:  21.30225133895874
Epoch:  0 Loss:  0.7276625037193298 Time:  21.7876558303833
Epoch:  0 Loss:  0.6522824764251709 Time:  20.948595762252808
Epoch:  0 Loss:  0.6863168478012085 Time:  21.747814655303955
Epoch:  0 Loss:  0.6285355091094971 Time:  20.24599814414978
Epoch:  0 Loss:  0.6213638782501221 Time:  19.21228837966919
Epoch:  0 Loss:  0.6188992857933044 Time:  18.735788583755493
Epoch:  0 Loss:  0.5361508727073669 Time:  20.02568244934082
Epoch:  0 Loss:  

Epoch:  0 Loss:  0.6157232522964478 Time:  20.670082092285156
Epoch:  0 Loss:  0.6214390397071838 Time:  20.752856969833374
Epoch:  0 Loss:  0.8000715374946594 Time:  20.528850317001343
Epoch:  0 Loss:  0.6997711658477783 Time:  21.061527013778687
Epoch:  0 Loss:  0.7144673466682434 Time:  21.02929949760437
Epoch:  0 Loss:  0.48705342411994934 Time:  20.414997816085815
Epoch:  0 Loss:  0.7035113573074341 Time:  21.301850080490112
Epoch:  0 Loss:  0.8239322900772095 Time:  20.546032667160034
Epoch:  0 Loss:  0.4572034478187561 Time:  21.376622200012207
Epoch:  0 Loss:  0.6448628306388855 Time:  21.109833002090454
Epoch:  0 Loss:  0.6715883612632751 Time:  21.927071571350098
Epoch:  0 Loss:  0.757529079914093 Time:  20.889256477355957
Epoch:  0 Loss:  0.6586288213729858 Time:  21.781731367111206
Epoch:  0 Loss:  0.6199397444725037 Time:  20.291361331939697
Epoch:  0 Loss:  0.5342100858688354 Time:  19.663460969924927
Epoch:  0 Loss:  0.9381760954856873 Time:  19.664896965026855
Epoch:  0

Epoch:  0 Loss:  0.5260967016220093 Time:  20.228395700454712
Epoch:  0 Loss:  0.5535160303115845 Time:  20.197516202926636
Epoch:  0 Loss:  0.551975667476654 Time:  19.589038372039795
Epoch:  0 Loss:  0.8182209134101868 Time:  19.93853998184204
Epoch:  0 Loss:  0.8116888403892517 Time:  20.419193744659424
Epoch:  0 Loss:  0.6764140725135803 Time:  20.860447645187378
Epoch:  0 Loss:  0.5786451101303101 Time:  20.57509160041809
Epoch:  0 Loss:  0.5762582421302795 Time:  20.086223363876343
Epoch:  0 Loss:  0.4810486435890198 Time:  21.153929710388184
Epoch:  0 Loss:  0.9431651830673218 Time:  20.895312547683716
Epoch:  0 Loss:  0.5315118432044983 Time:  20.337680339813232
Epoch:  0 Loss:  0.555220365524292 Time:  20.196385860443115
Epoch:  0 Loss:  0.5213050246238708 Time:  20.091079473495483
Epoch:  0 Loss:  0.732254683971405 Time:  20.820432901382446
Epoch:  0 Loss:  0.6394925117492676 Time:  20.526256799697876
Epoch:  0 Loss:  0.6005788445472717 Time:  22.176804304122925
Epoch:  0 Los

Epoch:  0 Loss:  0.6463426947593689 Time:  23.05247926712036
Epoch:  0 Loss:  0.6755572557449341 Time:  21.461836099624634
Epoch:  0 Loss:  0.5614104866981506 Time:  21.752849102020264
Epoch:  0 Loss:  0.760784387588501 Time:  20.445417404174805
Epoch:  0 Loss:  0.7924647331237793 Time:  21.578705072402954
Epoch:  0 Loss:  0.5571088790893555 Time:  21.920318841934204
Epoch:  0 Loss:  0.7522976994514465 Time:  21.2447566986084
Epoch:  0 Loss:  0.495459645986557 Time:  20.44277858734131
Epoch:  0 Loss:  0.647773265838623 Time:  19.916817665100098
Epoch:  0 Loss:  0.703596293926239 Time:  21.051733016967773
Epoch:  0 Loss:  0.49434491991996765 Time:  20.24582076072693
Epoch:  0 Loss:  0.5489065051078796 Time:  21.889233112335205
Epoch:  0 Loss:  0.5267086625099182 Time:  20.683708667755127
Epoch:  0 Loss:  0.53385990858078 Time:  20.677809715270996
Epoch:  0 Loss:  0.7740535736083984 Time:  22.166014671325684
Epoch:  0 Loss:  0.6803619265556335 Time:  20.007529258728027
Epoch:  0 Loss:  0

Epoch:  0 Loss:  0.5772175788879395 Time:  22.846139430999756
Epoch:  0 Loss:  0.6827038526535034 Time:  21.57692527770996
Epoch:  0 Loss:  0.7749144434928894 Time:  21.691584825515747
Epoch:  0 Loss:  0.6028000116348267 Time:  21.36836576461792
Epoch:  0 Loss:  0.6552608609199524 Time:  20.319737434387207
Epoch:  0 Loss:  0.6131236553192139 Time:  21.50457191467285
Epoch:  0 Loss:  0.7702848315238953 Time:  22.549481868743896
Epoch:  0 Loss:  0.730263352394104 Time:  21.120176792144775
Epoch:  0 Loss:  0.6031690835952759 Time:  21.930808305740356
Epoch:  0 Loss:  0.5271138548851013 Time:  21.141757011413574
Epoch:  0 Loss:  0.6126202940940857 Time:  21.596765518188477
Epoch:  0 Loss:  0.5714100003242493 Time:  21.792054891586304
Epoch:  0 Loss:  0.6031731367111206 Time:  21.236915588378906
Epoch:  0 Loss:  0.9035236835479736 Time:  21.73631739616394
Epoch:  0 Loss:  0.6164431571960449 Time:  21.714070796966553
Epoch:  0 Loss:  0.6723035573959351 Time:  21.08076786994934
Epoch:  0 Loss

Epoch:  0 Loss:  0.49714675545692444 Time:  20.072754859924316
Epoch:  0 Loss:  0.6254611611366272 Time:  21.2176296710968
Epoch:  0 Loss:  0.8132237195968628 Time:  20.447760581970215
Epoch:  0 Loss:  0.6713950634002686 Time:  21.31118392944336
Epoch:  0 Loss:  0.4665634334087372 Time:  20.339218616485596
Epoch:  0 Loss:  0.6814296245574951 Time:  20.52920365333557
Epoch:  0 Loss:  0.5226960778236389 Time:  20.686487436294556
Epoch:  0 Loss:  0.6827058792114258 Time:  20.24388027191162
Epoch:  0 Loss:  0.6125113368034363 Time:  21.108948945999146
Epoch:  0 Loss:  0.7113676071166992 Time:  20.65348482131958
Epoch:  0 Loss:  0.573614776134491 Time:  22.438480854034424
Epoch:  0 Loss:  0.6392103433609009 Time:  21.472023487091064
Epoch:  0 Loss:  0.7080140709877014 Time:  23.000685453414917
Epoch:  0 Loss:  0.5825631618499756 Time:  21.20598030090332
Epoch:  0 Loss:  0.6260749697685242 Time:  20.63750410079956
Epoch:  0 Loss:  0.6076040267944336 Time:  21.389968872070312
Epoch:  0 Loss: 

Epoch:  0 Loss:  0.5894476175308228 Time:  21.173486948013306
Epoch:  0 Loss:  0.4862755537033081 Time:  23.290499925613403
Epoch:  0 Loss:  0.6306678056716919 Time:  20.23138976097107
Epoch:  0 Loss:  0.5604301691055298 Time:  20.365519046783447
Epoch:  0 Loss:  0.6539994478225708 Time:  20.783619165420532
Epoch:  0 Loss:  0.5064821243286133 Time:  19.396920680999756
Epoch:  0 Loss:  0.6865912675857544 Time:  19.461453437805176
Epoch:  0 Loss:  0.5423657894134521 Time:  20.668046951293945
Epoch:  0 Loss:  0.7376050353050232 Time:  21.125709295272827
Epoch:  0 Loss:  0.45649999380111694 Time:  20.323060989379883
Epoch:  0 Loss:  0.5504552721977234 Time:  21.53085470199585
Epoch:  0 Loss:  0.7250035405158997 Time:  20.625279426574707
Epoch:  0 Loss:  0.5872507691383362 Time:  19.799081563949585
Epoch:  0 Loss:  0.6552936434745789 Time:  21.09902024269104
Epoch:  0 Loss:  0.5909794569015503 Time:  20.356279850006104
Epoch:  0 Loss:  0.4929842948913574 Time:  22.88325071334839
Epoch:  0 L

Epoch:  0 Loss:  0.5192825198173523 Time:  21.308470964431763
Epoch:  0 Loss:  0.6592599749565125 Time:  20.369400024414062
Epoch:  0 Loss:  0.5313524603843689 Time:  21.547585010528564
Epoch:  0 Loss:  0.4658326208591461 Time:  21.088003635406494
Epoch:  0 Loss:  0.5822165608406067 Time:  22.11459970474243
Epoch:  0 Loss:  0.5440664291381836 Time:  22.27067232131958
Epoch:  0 Loss:  0.7159848213195801 Time:  22.192543745040894
Epoch:  0 Loss:  0.636481523513794 Time:  21.134622812271118
Epoch:  0 Loss:  0.6293905377388 Time:  21.774882793426514
Epoch:  0 Loss:  0.5959954857826233 Time:  21.412108421325684
Epoch:  0 Loss:  0.44762560725212097 Time:  21.17754864692688
Epoch:  0 Loss:  0.7095305323600769 Time:  23.12039279937744
Epoch:  0 Loss:  0.7339109778404236 Time:  22.190491199493408
Epoch:  0 Loss:  0.6116176247596741 Time:  21.239078283309937
Epoch:  0 Loss:  0.7079934477806091 Time:  22.98056960105896
Epoch:  0 Loss:  0.6676545739173889 Time:  21.86104106903076
Epoch:  0 Loss:  

Epoch:  0 Loss:  0.6449962854385376 Time:  23.16273069381714
Epoch:  0 Loss:  0.6443101763725281 Time:  23.21101689338684
Epoch:  0 Loss:  0.8421434164047241 Time:  24.054510354995728
Epoch:  0 Loss:  0.5735876560211182 Time:  24.370686054229736
Epoch:  0 Loss:  0.7125294804573059 Time:  23.85941195487976
Epoch:  0 Loss:  0.3742516338825226 Time:  24.26249623298645
Epoch:  0 Loss:  0.7089082598686218 Time:  23.545748949050903
Epoch:  0 Loss:  0.6751551032066345 Time:  22.466310501098633
Epoch:  0 Loss:  0.818016529083252 Time:  22.486172437667847
Epoch:  0 Loss:  0.6928835511207581 Time:  23.019739866256714
Epoch:  0 Loss:  0.4787633419036865 Time:  22.8200044631958
Epoch:  0 Loss:  0.3742695748806 Time:  22.63399314880371
Epoch:  0 Loss:  0.5782677531242371 Time:  22.528327226638794
Epoch:  0 Loss:  0.6044556498527527 Time:  22.007671356201172
Epoch:  0 Loss:  0.6717483401298523 Time:  22.34947180747986
Epoch:  0 Loss:  0.7201865315437317 Time:  23.06312108039856
Epoch:  0 Loss:  0.44

Epoch:  0 Loss:  0.7389694452285767 Time:  22.9824321269989


KeyboardInterrupt: 

In [26]:
test_data = torch.stack([torch.Tensor(test_set[i]["observed_data"]) for i in range(batch_size)], dim=0)
imputation_mask = torch.zeros_like(test_data)
imputation_mask[:, 33:35, :] = 1

given_points = test_data * (1-imputation_mask)

eval_points = test_data * imputation_mask


In [27]:
sample_number = 40
samples = []
for i in range(sample_number):
    samples.append(diffusion_imputer.eval(test_data, imputation_mask))


RuntimeError: [enforce fail at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 859963392 bytes.

In [None]:
qlist =[0.05,0.25,0.5,0.75,0.95]
quantiles_imp= []
for q in qlist:
    quantiles_imp.append(torch.quantile(samples, q, dim=1).cpu().numpy()*(1-given_points) + test_data * given_points)

In [None]:
import matplotlib.pyplot as plt

In [None]:
L = test_data.shape[1]
K = test_data.shape[2]

###airquality###
dataind = 10 #change to visualize a different sample

plt.rcParams["font.size"] = 16
fig, axes = plt.subplots(nrows=9, ncols=4,figsize=(24.0, 36.0))
fig.delaxes(axes[-1][-1])

for k in range(K):
    df = pd.DataFrame({"x":np.arange(0,L), "val":test_data[dataind,:,k], "y":eval_points[dataind,:,k]})
    df = df[df.y != 0]
    df2 = pd.DataFrame({"x":np.arange(0,L), "val":test_data[dataind,:,k], "y":given_points[dataind,:,k]})
    df2 = df2[df2.y != 0]
    row = k // 4
    col = k % 4
    axes[row][col].plot(range(0,L), quantiles_imp[2][dataind,:,k], color = 'g',linestyle='solid',label='CSDI')
    axes[row][col].fill_between(range(0,L), quantiles_imp[0][dataind,:,k],quantiles_imp[4][dataind,:,k],
                    color='g', alpha=0.3)
    axes[row][col].plot(df.x,df.val, color = 'b',marker = 'o', linestyle='None')
    axes[row][col].plot(df2.x,df2.val, color = 'r',marker = 'x', linestyle='None')
    if col == 0:
        plt.setp(axes[row, 0], ylabel='value')
    if row == -1:
        plt.setp(axes[-1, col], xlabel='time')