In [None]:
"""
Notes:
    - The number of cores for parallelization can be adjusted by the "cuda_cores" parameter.
"""

import os
import logging

import multiprocessing as mp
try:
    mp.set_start_method("spawn", force=True)   #  fork  
except RuntimeError:
    pass
import numpy as np
from numpy import random
from joblib import Parallel, delayed
PARALLEL_KW = dict(
    backend="loky",
    temp_folder="./tmp/joblib",
    max_nbytes=None,
    mmap_mode=None,
    # inner_max_num_threads=1,   #   joblib>=1.3
)
os.makedirs("./tmp/joblib", exist_ok=True)

import torch
torch.set_num_threads(1)  #  

from tqdm import tqdm
import gc
import math

import pandas as pd
import traceback
import itertools

""" the proposed functions """
import Cind_gaussian_fun



if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True


"""Initialize the random number generator with a fixed seed."""
def seed_torch(seed=42):
    """For reproducibility"""
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

""" Select GPU if available; otherwise use CPU and cap BLAS/Torch threads to 1 to avoid oversubscription. """
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if device == "cpu":
    os.environ["OMP_NUM_THREADS"] = "1"  
    os.environ["MKL_NUM_THREADS"] = "1"   
    os.environ["OPENBLAS_NUM_THREADS"] = "1"
    os.environ["NUMEXPR_NUM_THREADS"] = "1"
    os.environ["OMP_WAIT_POLICY"] = "PASSIVE"
    os.environ["KMP_INIT_AT_FORK"] = "FALSE"
    
    os.environ["TORCH_NUM_THREADS"] = "1"            
    os.environ["TORCH_NUM_INTEROP_THREADS"] = "1"   

print(device)


"""
Define the log function
"""
def init_logging(log_file):

    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    
     
    file_handler = logging.FileHandler(log_file, mode='a', encoding='utf-8')
    file_handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(message)s')
    file_handler.setFormatter(formatter)
    
     
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    console_handler.setFormatter(formatter)
    
    if not logger.handlers:
        logger.addHandler(file_handler)
        logger.addHandler(console_handler)
    
    return logger


cuda:0


In [None]:
#-------------------- before COVID-19 period: 1 January 2016 - 31 December 2018 ----------------------------
#### process data
name1 = ["CS" , "CD", "CSt" , "Eng"  ,"Fin" ,"HC" , "Ind" , "IT" , "Mat" , "RE" , "Uti"]
combi_name1 = list(itertools.combinations(name1, 2))

shape = torch.zeros(11)

# read data
title_z = '68return_' + name1[0] + '.csv'
data = torch.from_numpy(pd.read_csv(title_z).iloc[:, 1:].to_numpy())
shape[0] = data.shape[1]
for t in range(10):
        title_zz = '68return_' + name1[t+1] + '.csv'
        zz = torch.from_numpy(pd.read_csv(title_zz).iloc[:, 1:].to_numpy())
        shape[t+1] = zz.shape[1]
        data = torch.cat((data, zz), dim=1)

combinations = list(itertools.combinations(range(11), 2))
print(shape)

# read data
stacked_tensor = torch.zeros(55, data.shape[0], data.shape[1])
for i in range(55):
    mask = torch.ones(data.shape[1], dtype=torch.bool)
    index = combinations[i]
    # print(index)
    indx1 = int( torch.sum(shape[:index[0]]) )
    indx2 = int( torch.sum(shape[:index[0]+1]) )
    indy1 = int( torch.sum(shape[:index[1]]) )
    indy2 = int( torch.sum(shape[:index[1]+1]) )
    x = data[:, int(indx1):int(indx2)] 
    #print(x.shape)
    y = data[:, int(indy1):int(indy2)] 
    # print(y.shape)
    mask[indx1:indx2] = False   
    mask[indy1:indy2] = False   
    z = data[:, mask]   
    # print(z.shape)
    dat = torch.cat((x, y, z), dim=1)
    stacked_tensor[i] = dat
    # print(i)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
alpha=0.05


# define the function to implement the proposed conditional independence test (CI-FNN) with sample splitting and selected n_3^{opt}
def run_simulation(i, stacked_tensor, combinations, alpha):
    #dat = stacked_tensor[i,:,:]
    x = data[:, 0: int(shape[combinations[i][0]])] 
    y = data[:, int(shape[combinations[i][0]]): int(shape[combinations[i][0]] + shape[combinations[i][1]])] 
    z = data[:, int(shape[combinations[i][0]] + shape[combinations[i][1]]): data.shape[1]] 
    seed_torch((i + 1) * 12345)

    res_orig = Cind_gaussian_fun.Cind_Gtest_py(device, "S-selectn3", 'all', x, y, z, alpha, batchsize=32, 
                                              hidden_features1 = 128, hidden_features2 = 32,  lr=0.01,n_epochs= 400, patience=30,drop_last1=False)
    
    order = ['Gaussian', 'Mammen', 'Rademacher']
    result =  [float(res_orig[k]['p_value']) for k in order]
    return result



cuda_cores = 55     # the number of CPU cores
n_my = 55
sim_list = {'my_data_ex8': 0}

order = ['Gaussian', 'Mammen', 'Rademacher']  # 3 columns

# conduct experiments using parallel processing
for sim, dep in sim_list.items():
    try:
        with Parallel(n_jobs=cuda_cores) as parallel:
            tmp_results = parallel(
                delayed(run_simulation)(i, stacked_tensor, combinations, alpha)
                for i in tqdm(range(n_my))
            )
        # Convert all results into a single NumPy array
        M = np.asarray(tmp_results, dtype=float)          # n_my-by-3 matrix
        results_tensor = torch.from_numpy(M).float().cpu()   

        # Build DataFrame
        df = pd.DataFrame(M, columns=order)

        first_names  = [a for a, b in combi_name1]
        second_names = [b for a, b in combi_name1]
        df.insert(0, 'Name1', first_names)
        df.insert(1, 'Name2', second_names)

        dic_path = os.path.join('.', 'realdata')    # create a folder
        os.makedirs(dic_path, exist_ok=True)

        # save to a csv file
        file_path = os.path.join(dic_path, '68pval_nn.csv')
        df.to_csv(file_path, index=False, encoding='utf-8')

        print(results_tensor)  # n_my-by-3 matrix

        # Cleanup
        del tmp_results, M, df, results_tensor
        gc.collect()
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

    except Exception:
        error_message = f"code is wrong:\n{traceback.format_exc()}"
        print(error_message)

tensor([21., 63., 30., 23., 63., 58., 73., 73., 23., 29., 29.])


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 55/55 [00:00<00:00, 63.07it/s]


In [None]:
#------------------------- during/after COVID-19 period: 1 January 2020 - 31 December 2022 ---------------------------------------------
#### process data
name1 = ["CS" , "CD", "CSt" , "Eng"  ,"Fin" ,"HC" , "Ind" , "IT" , "Mat" , "RE" , "Uti"]
combi_name1 = list(itertools.combinations(name1, 2))  

shape = torch.zeros(11)
# read data
title_z = '02return_' + name1[0] + '.csv'
data = torch.from_numpy(pd.read_csv(title_z).iloc[:, 1:].to_numpy())
shape[0] = data.shape[1]
for t in range(10):
        title_zz = '02return_' + name1[t+1] + '.csv'
        zz = torch.from_numpy(pd.read_csv(title_zz).iloc[:, 1:].to_numpy())
        shape[t+1] = zz.shape[1]
        data = torch.cat((data, zz), dim=1)

combinations = list(itertools.combinations(range(11), 2))
print(shape)

# read data
stacked_tensor = torch.zeros(55, data.shape[0], data.shape[1])
for i in range(55):
    mask = torch.ones(data.shape[1], dtype=torch.bool)
    index = combinations[i]
    # print(index)
    indx1 = int( torch.sum(shape[:index[0]]) )
    indx2 = int( torch.sum(shape[:index[0]+1]) )
    indy1 = int( torch.sum(shape[:index[1]]) )
    indy2 = int( torch.sum(shape[:index[1]+1]) )
    x = data[:, int(indx1):int(indx2)] 
    #print(x.shape)
    y = data[:, int(indy1):int(indy2)] 
    # print(y.shape)
    mask[indx1:indx2] = False   
    mask[indy1:indy2] = False   
    z = data[:, mask]   
    # print(z.shape)
    dat = torch.cat((x, y, z), dim=1)
    stacked_tensor[i] = dat
    # print(i)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
alpha=0.05


# define the function to implement the proposed conditional independence test (CI-FNN) with sample splitting and selected n_3^{opt}
def run_simulation(i, stacked_tensor, combinations, alpha):
    #dat = stacked_tensor[i,:,:]
    x = data[:, 0: int(shape[combinations[i][0]])] 
    y = data[:, int(shape[combinations[i][0]]): int(shape[combinations[i][0]] + shape[combinations[i][1]])] 
    z = data[:, int(shape[combinations[i][0]] + shape[combinations[i][1]]): data.shape[1]] 
    seed_torch((i + 1) * 12345)

    res_orig = Cind_gaussian_fun.Cind_Gtest_py(device, "S-selectn3", 'all', x, y, z, alpha, batchsize=32, 
                                              hidden_features1 = 128, hidden_features2 = 32,  lr=0.01,n_epochs= 400, patience=30,drop_last1=False)
    
    order = ['Gaussian', 'Mammen', 'Rademacher']
    result =  [float(res_orig[k]['p_value']) for k in order]
    return result



cuda_cores = 55    # the number of CPU cores
n_my = 55
sim_list = {'my_data_ex8': 0}

order = ['Gaussian', 'Mammen', 'Rademacher']  # 3 columns

# conduct experiments using parallel processing
for sim, dep in sim_list.items():
    try:
        with Parallel(n_jobs=cuda_cores) as parallel:
            tmp_results = parallel(
                delayed(run_simulation)(i, stacked_tensor, combinations, alpha)
                for i in tqdm(range(n_my))
            )
        # Convert all results into a single NumPy array
        M = np.asarray(tmp_results, dtype=float)          # n_my-by-3 matrix
        results_tensor = torch.from_numpy(M).float().cpu()   

        # Build DataFrame
        df = pd.DataFrame(M, columns=order)

        first_names  = [a for a, b in combi_name1]
        second_names = [b for a, b in combi_name1]
        df.insert(0, 'Name1', first_names)
        df.insert(1, 'Name2', second_names)

        dic_path = os.path.join('.', 'realdata')   # create a folder
        os.makedirs(dic_path, exist_ok=True)
        # save to a csv file
        file_path = os.path.join(dic_path, '02pval_nn.csv')
        df.to_csv(file_path, index=False, encoding='utf-8')

        print(results_tensor)  # n_my-by-3 matrix

        # Cleanup
        del tmp_results, M, df, results_tensor
        gc.collect()
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

    except Exception:
        error_message = f"code is wrong:\n{traceback.format_exc()}"
        print(error_message)