In [1]:
#import packages
import matplotlib
from matplotlib import cm
import torch
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import matplotlib.pyplot as plt
import torch.utils.data as Data
import matplotlib.dates as mdates
from torch.autograd import Variable
import plotly.express as px
import time
import datetime
from dateutil.relativedelta import relativedelta
import calendar
import numpy as np
import joblib
import sys
from sklearn.metrics import roc_auc_score, average_precision_score
from tqdm.notebook import trange, tqdm
from statsmodels.tsa.arima_process import ArmaProcess
# import geopandas as gpd
import gstools as gs

device0 = torch.device('cuda' if torch.cuda.is_available else 'cpu')
np.random.seed(1)
torch.manual_seed(1)
torch.cuda.manual_seed(1) 
torch.set_default_dtype(torch.float32)

In [2]:
torch.cuda.empty_cache()

In [3]:
# Gaussian distributed data
# x = y = range(105)
# model = gs.Gaussian(dim=2, var=0.002, len_scale=10)
# srf = gs.SRF(model, seed=20230809)
# field = srf.structured([x, y])
# ar1 = np.array([1,0.6,0.4,0.2])
# ma = np.array([1])
# simulated_AR1_data = 22+ArmaProcess(ar1, ma).generate_sample(nsample=1095)
# timeseries=torch.tensor(simulated_AR1_data).reshape(1095,1)
# timeseries=timeseries.float()

# # spatial layer
# n=11025
# cov=[[0,0,0,0,0,0,0,0,0,0] for i in range(n)]
# for i in range(n):
#     a=np.random.uniform(0,10)
#     if a<=1:
#         cov[i][0]=1
#     elif 1<a<=2:
#         cov[i][1]=1
#     elif 2<a<=3:
#         cov[i][2]=1
#     elif 3<a<=4:
#         cov[i][3]=1
#     elif 4<a<=5:
#         cov[i][4]=1
#     elif 5<a<=6:
#         cov[i][5]=1
#     elif 6<a<=7:
#         cov[i][6]=1
#     elif 7<a<=8:
#         cov[i][7]=1
#     elif 8<a<=9:
#         cov[i][8]=1
#     elif 9<a<=10:
#         cov[i][9]=1
# cov=torch.tensor(cov).float()

# #covariate
# x0=np.random.uniform(-1,1,11025)
# x0=np.c_[x0]
# x0=torch.tensor(x0).float()
# cov_cont=torch.cat((x0,cov),1)
# joblib.dump(cov_cont,'X_cov')

# beta=np.arange(0,5,step=0.5)
# beta=np.insert(beta,0,[2.0])
# beta=torch.tensor(beta).reshape(11,1).float()

# n=105**2
# field_tensor=torch.tensor(field).reshape(11025,1)

# field_tensor=field_tensor+cov_cont@beta
# field_tensor=field_tensor.reshape(1,n)
# time=timeseries.repeat(1,n)
# noise=torch.normal(mean=0,std=0.5,size=(1095,11025))
# simudata=field_tensor+time+noise
# joblib.dump(simudata,'Data_Gaussian')
# simudata

  0%|          | 0/11025 [00:00<?, ?it/s]

tensor([[27.7846, 26.8958, 24.9627,  ..., 28.2686, 23.3985, 23.3948],
        [24.5811, 23.7158, 21.8398,  ..., 25.1012, 20.5215, 20.1657],
        [26.0511, 25.1114, 23.2222,  ..., 26.4869, 21.6549, 21.3758],
        ...,
        [26.6593, 25.5488, 23.7573,  ..., 27.1172, 22.1619, 22.0830],
        [26.0880, 24.9862, 23.1161,  ..., 26.3515, 21.4856, 21.2502],
        [26.3179, 25.3569, 23.2740,  ..., 26.7344, 21.9257, 21.6423]],
       dtype=torch.float64)

In [3]:
# non-stationary data
n=105
x = y = range(n)
s1,s2 = np.meshgrid(x,y)
s = np.vstack((s1.flatten(),s2.flatten())).T
nu = 0.5*np.sin(30*((s[:,0]+s[:,1])/(2*105)-0.9)**4)*np.cos(2*((s[:,0]+s[:,1])/(2*105)-0.9))+((s[:,0]+s[:,1])/(2*105)-0.9)/2

ar1 = np.array([1,0.6,0.4,0.2])
ma = np.array([1])
simulated_AR1_data = 22+ArmaProcess(ar1, ma).generate_sample(nsample=1095)
timeseries=torch.tensor(simulated_AR1_data).reshape(1095,1)
timeseries=timeseries.float()

# spatial layer
n=11025
cov=[[0,0,0,0,0,0,0,0,0,0] for i in range(n)]
for i in range(n):
    a=np.random.uniform(0,10)
    if a<=1:
        cov[i][0]=1
    elif 1<a<=2:
        cov[i][1]=1
    elif 2<a<=3:
        cov[i][2]=1
    elif 3<a<=4:
        cov[i][3]=1
    elif 4<a<=5:
        cov[i][4]=1
    elif 5<a<=6:
        cov[i][5]=1
    elif 6<a<=7:
        cov[i][6]=1
    elif 7<a<=8:
        cov[i][7]=1
    elif 8<a<=9:
        cov[i][8]=1
    elif 9<a<=10:
        cov[i][9]=1
cov=torch.tensor(cov).float()

x0=np.random.uniform(-1,1,11025)
x0=np.c_[x0]
x0=torch.tensor(x0).float()
cov_cont=torch.cat((x0,cov),1)
joblib.dump(cov_cont,'X_cov')

beta=np.arange(0,5,step=0.5)
beta=np.insert(beta,0,[2.0])
beta=torch.tensor(beta).reshape(11,1).float()

n=105**2
field=nu.reshape(105,105)
field_tensor=torch.tensor(field).reshape(11025,1)
field_tensor=field_tensor+cov_cont@beta
field_tensor=field_tensor.reshape(1,n)

time=timeseries.repeat(1,n)
noise=torch.normal(mean=0,std=0.5,size=(1095,11025))
simudata=field_tensor+time+noise
joblib.dump(simudata,'Data_nonGaussian')
simudata

tensor([[26.6524, 26.1220, 24.2633,  ..., 28.1134, 22.7507, 23.7831],
        [23.4869, 23.1031, 21.6013,  ..., 25.1737, 21.6259, 20.4578],
        [25.5326, 24.6844, 23.1000,  ..., 26.6931, 21.5677, 20.8792],
        ...,
        [26.5668, 24.6510, 23.6774,  ..., 27.8654, 21.9700, 22.5322],
        [26.0530, 24.1910, 22.7261,  ..., 26.1362, 20.7994, 20.3837],
        [26.2413, 25.2599, 22.4649,  ..., 27.2809, 22.3015, 21.5862]],
       dtype=torch.float64)