This file contains the code for ND, DMD, TVDMD including

- extract features

- regression: I only regress once for two models w/o graph metrics for each method 

In [1]:
import sys
sys.path.append("/home/huaqingj/MyResearch/TVDN-AD/")

In [3]:
from pyTVDN import TVDNDetect
from sklearn.cluster import KMeans
from pathlib import Path
from scipy.io import loadmat
import numpy as np
from easydict import EasyDict as edict
import matplotlib.pyplot as plt
import os
from scipy import signal
import pickle
import seaborn as sns
#from tqdm.autonotebook import tqdm
from tqdm import tqdm
import numbers
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [4]:
import multiprocessing as mp

In [5]:
# compute the graph metrics
import bct

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import roc_auc_score, roc_curve

import warnings
warnings.filterwarnings('ignore')

In [7]:
import rpy2.robjects as robj
robj.r.source("../Rcode/utils.R")

RsegDMD = robj.r["DMD"]
RchgF = robj.r["chgF"]

### Load data

In [8]:
os.chdir("/home/huaqingj/MyResearch/TVDN-AD")
resDir = Path("./results")
dataDir = Path("./data")

In [9]:
ADdets = list(resDir.glob("AD_*04_decimate5.pkl"))
ADdetsPs = sorted(ADdets, key=lambda p:int(p.stem.split("_")[3]))
Ctrldets = list(resDir.glob("Ctrl_*04_decimate5.pkl"))
CtrldetsPs = sorted(Ctrldets, key=lambda p:int(p.stem.split("_")[3]))

In [10]:
with open("./OtherFils/KpIdxsAll.pkl", "rb") as sf:
    kpAD, kpCtrl, kpAll = pickle.load(sf)

### Some fns

In [11]:
def wUFn(Amat):
    eigVals, eigVecs = np.linalg.eig(Amat)
    r = np.sum(np.cumsum(np.abs(eigVals))/np.sum(np.abs(eigVals)) <= 0.8) + 1
    if (eigVals[r-1].imag + eigVals[r].imag ) == 0:
        r = r + 1
    wU = np.abs(np.matmul(eigVecs[:, :r], eigVals[:r]))
    return wU, r, eigVals[:r]

In [12]:
def minmax(x):
    num = x - np.min(x)
    den = np.max(x) - np.min(x)
    return num/den

def U2BrainVec(wU):
    emVec = np.zeros_like(DKtmplate, dtype=np.float64)
    for idx in range(1, 69):
        emVec[DKtmplate==idx] = wU[idx-1]
    return emVec

def reorderU(wU):
    wUreorder = np.zeros_like(wU, dtype=np.float64)
    wUreorder[0::2] = wU[:34]
    wUreorder[1::2] = wU[34:]
    return wUreorder

In [13]:
with open("./OtherFils/BNVtemplate_DK68.txt", "r") as tf:
    DKtmplateRaw = tf.readlines()
DKtmplate = np.array([int(x.strip()) for x in DKtmplateRaw])

In [14]:
def discretVec(Vec, ps=[0, 0.25, 0.5, 0.75, 1]):
    qs = np.quantile(Vec, ps)
    disVec = Vec 
    for ix in range(len(qs)-1):
        low = qs[ix]
        up = qs[ix+1]
        idxVec= np.bitwise_and(Vec >= low, Vec <up)
        disVec[idxVec] = np.mean(Vec[idxVec])
    return disVec

In [15]:
def obt_absFC(mat):
    """get abs FC 
        (1) transform to correlation
        
        (2) z transform
        
        (3) take absolute value
        
        (remove the diaginal term)
    """
    FC = np.corrcoef(mat) # Pearson's r
    FC = FC - np.diag(np.diag(FC)) # remove diagonal
    FC = np.arctanh(FC) # Fisher's transform
    abs_FC = np.abs(FC)
    return abs_FC

In [16]:
def statAmat(curY, curX):
    M = np.matmul(curX, curX.T)/curX.shape[-1]
    YX = np.matmul(curY, curX.T)/curX.shape[-1]
    U, S, VT = np.linalg.svd(M)
    r = np.argmax(np.cumsum(S)/np.sum(S)>=0.999) + 1
    if r == 1:
        # avoid rank-1 mat, otherwise, it causes problem for FC
        r = 2
    invM = U[:, :r].dot(np.diag(1/S[:r])).dot(VT[:r, :])
    Amat = YX.dot(invM)
    return Amat

In [17]:
def minmax(mat):
    stdMat = (mat-np.min(mat))/(np.max(mat)-np.min(mat))
    return stdMat

def minmax_sysmat(mat):
    """remove diag values and return minmax"""
    trlMask1 = np.tril(np.ones_like(mat), k=-1)
    trlv = mat[trlMask1==1]
    minv, maxv = np.min(trlv), np.max(trlv)
    mat = (mat-minv)/(maxv-minv)
    mat = mat - np.diag(np.diag(mat))
    return mat

In [18]:
def sysMatShuffle(mat):
    mat = mat.copy()
    trlMask1 = np.tril(np.ones_like(mat), k=-1)
    eles = mat[trlMask1==1]
    np.random.shuffle(eles)
    sMat = np.zeros_like(mat)
    sMat[trlMask1==1] = eles
    diag_vs = np.diag(mat)
    np.random.shuffle(diag_vs)
    sMat = sMat + sMat.T + np.diag(diag_vs)
    return sMat

In [19]:
def NetMeasureFn(mat, nrep=50):
    Cs = bct.clustering_coef_wu(mat)
    mC = np.mean(Cs)
    Ls = bct.distance_wei(mat)[0]
    mL = 1/np.mean(1/Ls[np.eye(Ls.shape[0])!=1])
    _, Q = bct.modularity_und(mat)
    
    ranLs = []
    ranQs = []
    ranCs = []
    for i in range(nrep):
        sMat = sysMatShuffle(mat)
        Cs = bct.clustering_coef_wu(sMat)
        mCi = np.mean(Cs)
        ranCs.append(mCi)
        Ls = bct.distance_wei(sMat)[0]
        mLi = 1/np.mean(1/Ls[np.eye(Ls.shape[0])!=1])
        ranLs.append(mLi)
        _, Qi = bct.modularity_und(sMat)
        ranQs.append(Qi)
    
    tQ = Q/np.mean(ranQs)
    tmL  = mL/np.mean(ranLs)
    tmC  = mC/np.mean(ranCs)
    return tmC, tmL, tQ, mC, mL, Q

In [20]:
def TuningCFn(inpX, inpY, Cs=[0.1, 0.2, 0.4, 0.8, 1, 1.6, 3.2, 6.4], penalty="l2"):
    aucCs = []
    for C in Cs:
        eProbs = []
        loo = LeaveOneOut()
        for trIdxs, testIdxs in loo.split(inpX):
            clf = LogisticRegression(penalty=penalty, random_state=0, C=C)
            clf.fit(inpX[trIdxs, :], inpY[trIdxs])
            eProbs.append(clf.predict_proba(inpX[testIdxs, :]))
        eProbs = np.array(eProbs).squeeze()
        auc = roc_auc_score(inpY, eProbs[:, 1])
        fpr, tpr, thresholds = roc_curve(inpY, eProbs[:, 1], pos_label=1)
        aucCs.append(auc)
            
    optC = Cs[np.argmax(aucCs)]
    res = edict()
    res["optC"] = optC
    res["Cs"] = Cs
    res["aucCs"] = aucCs
    return res

In [21]:
def rSelFn(Amat):
    eigVals, eigVecs = np.linalg.eig(Amat)
    rSel = np.where(np.cumsum(np.abs(eigVals))/np.sum(np.abs(eigVals)) >0.8)[0][0] + 1
    # if breaking conjugate eigval pair, add r with 1
    if (eigVals[rSel-1].imag + eigVals[rSel].imag) == 0:
        rSel = rSel + 1
    wU = np.abs(np.matmul(eigVecs[:, :rSel], eigVals[:rSel]))
    return rSel, wU, eigVals[:rSel]

In [36]:
#    Xmat = detObj.Xmat
#    wsize = 100
#    stepsize = 24
def dynDMDfs(Xmat, wsize=32*6, stepsize=4*6):
    seqslide = np.arange(0, Xmat.shape[-1]-wsize, stepsize)
    wUs = []
    for ix in seqslide:
        lowL = ix
        upL = ix+wsize
            
        curX  = Xmat[:, lowL:(upL-1)]
        curY  = Xmat[:, (lowL+1):upL]
        curAmat = statAmat(curY, curX)
        _, wU, _ = rSelFn(curAmat)
        wUs.append(wU)
        
    wUsArr = np.array(wUs)
    kmeans = KMeans(n_clusters=3, random_state=0).fit(wUsArr)
    ecpts = seqslide[np.where(np.diff(kmeans.labels_)!=0)[0]+1]+1
    
    ecptsFull = np.concatenate([[0], ecpts-1, [Xmat.shape[-1]]])
    rSels = []
    wUsNew = []
    absEigVals = []
    graph_metrics = []
    absImagEigValsMax = []
    for ix in range(len(ecptsFull)-1):
        lowL = ecptsFull[ix]
        upL = ecptsFull[ix+1]
        curX  = Xmat[:, lowL:(upL-1)]
        curY  = Xmat[:, (lowL+1):upL]
        curAmat = statAmat(curY, curX)
        
        # extract the graph metrics
        if ix == -10000:
            conMat = obt_absFC(curAmat)
            ms = NetMeasureFn(conMat)
            graph_metrics.append(ms)
        
        rSel, wU, eigVal = rSelFn(curAmat)
        rSels.append(rSel)
        wUsNew.append(wU)
        absEigVals = absEigVals + list(np.abs(eigVal))
        absImagEigValsMax.append(np.abs(eigVal.imag).max())
        
    wUsNewArr = np.array(wUsNew)    
    necpt = len(ecpts)
    wUmean = wUsNewArr.mean(0)
    rankM = np.max(rSels)
    absEigValMean = np.abs(absEigVals).mean()
    res = {
            "r":rankM, 
            "ncpts":necpt,
            "wUmeanAll": wUmean, 
            "ecpts": ecpts, 
            "graph_metrics": graph_metrics,
            "absEigValMean": absEigValMean, 
            "absImagEigValMaxMean": np.mean(absImagEigValsMax)
        }
        
    return res


### Run results

#### TVDMD

In [37]:
# DMD chg detect

ADdynDMD = []
for ADdetP, kpIx in zip(tqdm(ADdetsPs), kpAD):
    if kpIx == 1:
        with open(ADdetP, "rb") as f:
            detObj = pickle.load(f)
        res = dynDMDfs(detObj.Xmat)
        ADdynDMD.append(res)

filName = f"AD_data_dynDMD.pkl"
with open(resDir/filName, "wb") as f:
    pickle.dump(ADdynDMD, f)

100%|██████████| 88/88 [03:49<00:00,  2.61s/it]


In [38]:
CtrldynDMD = []
for CtrldetP, kpIx in zip(tqdm(CtrldetsPs), kpCtrl):
    if kpIx == 1:
        with open(CtrldetP, "rb") as f:
            detObj = pickle.load(f)
        res = dynDMDfs(detObj.Xmat)
        CtrldynDMD.append(res)

filName = f"Ctrl_data_dynDMD.pkl"
with open(resDir/filName, "wb") as f:
    pickle.dump(CtrldynDMD, f)

100%|██████████| 92/92 [04:11<00:00,  2.73s/it]


In [87]:
### Let me run with multiprocess
def runFn(path):
    print(path)
    with open(path, "rb") as f:
        detObj = pickle.load(f)
    res = dynDMDfs(detObj.Xmat)
    print(f"Finished {path}")
    return res

if __name__ == "__main__":
    with mp.Pool(6) as pool:
        res_proc = []
        for path in ADdetsPs[:]:
            res_proc.append(pool.apply_async(runFn, [path,]))
        res = [ix.get() for ix in res_proc] # to retrieve the results
    pool.join() # better to add this. 
    

    res_AD = [res[ix] for ix in range(len(res)) if kpAD[ix]==1]
    filName = f"AD_data_dynDMD_withgraph.pkl"
    with open(resDir/filName, "wb") as f:
        pickle.dump(res_AD, f)

results/AD_data_det_2_lamb1.0E-04_decimate5.pkl
results/AD_data_det_1_lamb1.0E-04_decimate5.pkl
results/AD_data_det_0_lamb1.0E-04_decimate5.pkl
results/AD_data_det_3_lamb1.0E-04_decimate5.pkl
results/AD_data_det_5_lamb1.0E-04_decimate5.pkl
results/AD_data_det_4_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_0_lamb1.0E-04_decimate5.pkl
results/AD_data_det_6_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_3_lamb1.0E-04_decimate5.pkl
results/AD_data_det_7_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_2_lamb1.0E-04_decimate5.pkl
results/AD_data_det_8_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_1_lamb1.0E-04_decimate5.pkl
results/AD_data_det_9_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_5_lamb1.0E-04_decimate5.pkl
results/AD_data_det_10_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_4_lamb1.0E-04_decimate5.pkl
results/AD_data_det_11_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_9_lamb1.0E-04_decimate5.pkl
results/AD_data_det_12_

results/AD_data_det_80_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_74_lamb1.0E-04_decimate5.pkl
results/AD_data_det_81_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_78_lamb1.0E-04_decimate5.pkl
results/AD_data_det_82_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_76_lamb1.0E-04_decimate5.pkl
results/AD_data_det_83_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_81_lamb1.0E-04_decimate5.pkl
results/AD_data_det_84_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_82_lamb1.0E-04_decimate5.pkl
results/AD_data_det_85_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_80_lamb1.0E-04_decimate5.pkl
results/AD_data_det_86_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_77_lamb1.0E-04_decimate5.pkl
results/AD_data_det_87_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_83_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_87_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det_86_lamb1.0E-04_decimate5.pkl
Finished results/AD_data_det

In [88]:
### Let me run with multiprocess
def runFn(path):
    print(path)
    with open(path, "rb") as f:
        detObj = pickle.load(f)
    res = dynDMDfs(detObj.Xmat)
    print(f"Finished {path}")
    return res

if __name__ == "__main__":
    with mp.Pool(6) as pool:
        res_proc = []
        for path in CtrldetsPs[:]:
            res_proc.append(pool.apply_async(runFn, [path,]))
        res = [ix.get() for ix in res_proc] # to retrieve the results
    pool.join() # better to add this. 
    

    res_Ctrl = [res[ix] for ix in range(len(res)) if kpCtrl[ix]==1]
    filName = f"Ctrl_data_dynDMD_withgraph.pkl"
    with open(resDir/filName, "wb") as f:
        pickle.dump(res_Ctrl, f)

results/Ctrl_data_det_2_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_0_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_1_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_3_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_5_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_4_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_4_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_6_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_5_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_7_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_2_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_8_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_0_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_9_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_7_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_10_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_3_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_11_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_6_lamb1.0E-04

results/Ctrl_data_det_77_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_74_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_78_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_77_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_79_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_75_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_80_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_73_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_81_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_76_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_82_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_64_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_83_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_78_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_84_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_81_lamb1.0E-04_decimate5.pkl
results/Ctrl_data_det_85_lamb1.0E-04_decimate5.pkl
Finished results/Ctrl_data_det_80_lamb1.0E-04_decimate5.pkl
r

#### Static DMD and ND(TV)

In [18]:
ADNWsDMD = []
ADNWsTV = []
for ADdetP, kpIx in zip(tqdm(ADdetsPs), kpAD):
    if kpIx == 1:
        with open(ADdetP, "rb") as f:
            detObj = pickle.load(f)
        
        NWDMD = edict()
        NWTV = edict()
        
        nYmat = detObj.Xmat
        curY, curX = nYmat[:, 1:], nYmat[:, :-1]
        AmatDMD = statAmat(curY, curX)
        conMatDMD = obt_absFC(AmatDMD)
        NWDMD.Amat = AmatDMD
        NWDMD.stdAmat = conMatDMD
        NWDMD.ms = NetMeasureFn(conMatDMD)
        ADNWsDMD.append(NWDMD)
        
        curY, curX = detObj.dXmat, detObj.Xmat
        AmatTV = statAmat(curY, curX)
        conMatTV = obt_absFC(AmatTV)
        NWTV.Amat = AmatTV
        NWTV.stdAmat = conMatTV
        NWTV.ms = NetMeasureFn(conMatTV)
        ADNWsTV.append(NWTV)
    
filName = f"AD_data_NWM_TV.pkl"
with open(resDir/filName, "wb") as f:
    pickle.dump(ADNWsTV, f)
filName = f"AD_data_NWM_DMD.pkl"
with open(resDir/filName, "wb") as f:
    pickle.dump(ADNWsDMD, f)

100%|██████████| 88/88 [35:07<00:00, 23.94s/it]


In [19]:
CtrlNWsDMD = []
CtrlNWsTV = []
for CtrldetP, kpIx in zip(tqdm(CtrldetsPs), kpCtrl):
    if kpIx == 1:
        with open(CtrldetP, "rb") as f:
            detObj = pickle.load(f)
        
        NWDMD = edict()
        NWTV = edict()
        
        nYmat = detObj.Xmat
        curY, curX = nYmat[:, 1:], nYmat[:, :-1]
        AmatDMD = statAmat(curY, curX)
        conMatDMD = obt_absFC(AmatDMD)
        NWDMD.Amat = AmatDMD
        NWDMD.stdAmat = conMatDMD
        NWDMD.ms = NetMeasureFn(conMatDMD)
        CtrlNWsDMD.append(NWDMD)
        
        curY, curX = detObj.dXmat, detObj.Xmat
        AmatTV = statAmat(curY, curX)
        conMatTV = obt_absFC(AmatTV)
        NWTV.Amat = AmatTV
        NWTV.stdAmat = conMatTV
        NWTV.ms = NetMeasureFn(conMatTV)
        CtrlNWsTV.append(NWTV)
    
filName = f"Ctrl_data_NWM_TV.pkl"
with open(resDir/filName, "wb") as f:
    pickle.dump(CtrlNWsTV, f)
filName = f"Ctrl_data_NWM_DMD.pkl"
with open(resDir/filName, "wb") as f:
    pickle.dump(CtrlNWsDMD, f)

100%|██████████| 92/92 [35:26<00:00, 23.11s/it]


### DMD

In [25]:
filName = f"Ctrl_data_NWM_DMD.pkl"
with open(resDir/filName, "rb") as f:
    CtrlDMD = pickle.load(f)
filName = f"AD_data_NWM_DMD.pkl"
with open(resDir/filName, "rb") as f:
    ADDMD = pickle.load(f)

In [26]:
fsAll = []
for v in ADDMD:
    fs = edict()
    fs.tC = v["ms"][0]
    fs.tL = v["ms"][1]
    fs.tQ = v["ms"][2]
    fs.C = v["ms"][3]
    fs.L = v["ms"][4]
    fs.Q = v["ms"][5]
    fs.wU, fs.r, eigVal = wUFn(v["Amat"])
    fs.absEigValMean = np.abs(eigVal).mean()
    fsAll.append(fs)
fsAD = fsAll

In [27]:
fsAll = []
for v in CtrlDMD:
    fs = edict()
    fs.tC = v["ms"][0]
    fs.tL = v["ms"][1]
    fs.tQ = v["ms"][2]
    fs.C = v["ms"][3]
    fs.L = v["ms"][4]
    fs.Q = v["ms"][5]
    fs.wU, fs.r, eigVal = wUFn(v["Amat"])
    fs.absEigValMean = np.abs(eigVal).mean()
    fsAll.append(fs)
fsCtrl = fsAll

In [None]:
nDis = 10
mwUAD = np.array([fs.wU for fs in fsAD]).mean(axis=0)
mwUCtrl = np.array([fs.wU for fs in fsCtrl]).mean(axis=0)
disMwUAD = discretVec(mwUAD, np.linspace(0, 1, nDis+1))
disMwUCtrl  = discretVec(mwUCtrl, np.linspace(0, 1, nDis+1))
mFAD = minmax(mwUAD)
mFCtrl = minmax(mwUCtrl)
outAD = U2BrainVec(reorderU(mFAD))
outCtrl = U2BrainVec(reorderU(mFCtrl))
np.savetxt(f"./brainPlotFils/DMDwUmeanAD_{nDis}.txt", outAD)
np.savetxt(f"./brainPlotFils/DMDwUmeanCtrl_{nDis}.txt", outCtrl)

In [26]:
Ys = np.concatenate([np.ones(len(fsAD)), np.zeros(len(fsCtrl))])
fsName = ["r", "absEigValMean", "wU"]
XsAD = []
XsCtrl = []
for fName in fsName:
    if isinstance(fsAD[0][fName], numbers.Number):
        cfAD = np.array([fs[fName] for fs in fsAD]).reshape(-1, 1)
        cfCtrl = np.array([fs[fName] for fs in fsCtrl]).reshape(-1, 1)
    else:
        cfAD = np.array([fs[fName] for fs in fsAD])
        cfCtrl = np.array([fs[fName] for fs in fsCtrl])
    
    XsAD.append(cfAD)
    XsCtrl.append(cfCtrl)
XsAD = np.concatenate(XsAD, axis=1)
XsCtrl = np.concatenate(XsCtrl, axis=1)
Xs = np.concatenate([XsAD, XsCtrl], axis=0)
stdXs = (Xs - Xs.mean(axis=0))/Xs.std(axis=0)

In [28]:
penalty = "l2"
fsNameSet = [["r", "absEigValMean", "wU"], 
             ["r", "absEigValMean", "wU", "tC", "tL", "tQ"]
            ]
for fsName in fsNameSet:
    Ys = np.concatenate([np.ones(len(fsAD)), np.zeros(len(fsCtrl))])
    XsAD = []
    XsCtrl = []
    for fName in fsName:
        if isinstance(fsAD[0][fName], numbers.Number):
            cfAD = np.array([fs[fName] for fs in fsAD]).reshape(-1, 1)
            cfCtrl = np.array([fs[fName] for fs in fsCtrl]).reshape(-1, 1)
        else:
            cfAD = np.array([fs[fName] for fs in fsAD])
            cfCtrl = np.array([fs[fName] for fs in fsCtrl])
        
        XsAD.append(cfAD)
        XsCtrl.append(cfCtrl)
    XsAD = np.concatenate(XsAD, axis=1)
    XsCtrl = np.concatenate(XsCtrl, axis=1)
    Xs = np.concatenate([XsAD, XsCtrl], axis=0)
    stdXs = (Xs - Xs.mean(axis=0))/Xs.std(axis=0)
    
    Cs = [0.1, 0.2, 0.4, 0.8, 1, 1.6, 3.2, 6.4]
        
    eProbs = []
    loo = LeaveOneOut()
    parass = []
    with tqdm(total=len(Ys)) as t:
        for trIdxs, testIdxs in loo.split(stdXs):
            curOptC = TuningCFn(stdXs[trIdxs, :], Ys[trIdxs], Cs, penalty)["optC"]
            clf = LogisticRegression(penalty=penalty, random_state=0, C=curOptC)
            clf.fit(stdXs[trIdxs, :], Ys[trIdxs])
            paras = np.concatenate([clf.intercept_, clf.coef_.reshape(-1)])
            parass.append(paras)
            eProbs.append(clf.predict_proba(stdXs[testIdxs, :]))
            t.update()
    eProbs = np.array(eProbs).squeeze()
    auc = roc_auc_score(Ys, eProbs[:, 1])
    fpr, tpr, thresholds = roc_curve(Ys, eProbs[:, 1], pos_label=1)
    parass = np.array(parass)
    sfName = "_".join(fsName)
    with open(resDir/f"DMD_ROCcurve_{sfName}.pkl", "wb") as f:
        resROC = {"fpr":fpr, "tpr":tpr, "AUC":auc}
        pickle.dump(resROC, f)
    
    
    optC = TuningCFn(stdXs, Ys, Cs, penalty)["optC"]
    np.random.seed(0)
    nobs = stdXs.shape[0]
    Aucss = []
    for j in tqdm(range(10000)):
        testIdx = np.random.choice(nobs, int(nobs/5), False)
        trainIdx = np.delete(np.arange(nobs), testIdx)
        clf = LogisticRegression(penalty=penalty, random_state=0, C=optC)
        clf.fit(stdXs[trainIdx], Ys[trainIdx])
        curEprobs = clf.predict_proba(stdXs[testIdx, :])
        curAuc = roc_auc_score(Ys[testIdx], curEprobs[:, 1])
        Aucss.append(curAuc)
    mAUC = np.mean(Aucss)
    stdAUC = np.std(Aucss)
    print(f"Features are {fsName}.")
    print(f"The AUC under optimal C is {auc:.3f}.")
    print(f"The mean of AUC under 1000 repetitions is {mAUC:.3f} and the standard deviation is {stdAUC:.3f}.")
    print("-"*100)

100%|██████████| 176/176 [06:13<00:00,  2.12s/it]
100%|██████████| 10000/10000 [00:14<00:00, 682.03it/s]
  0%|          | 0/176 [00:00<?, ?it/s]

Features are ['r', 'absEigValMean', 'wU'].
The AUC under optimal C is 0.632.
The mean of AUC under 1000 repetitions is 0.643 and the standard deviation is 0.080.
----------------------------------------------------------------------------------------------------


100%|██████████| 176/176 [06:24<00:00,  2.19s/it]
100%|██████████| 10000/10000 [00:14<00:00, 667.80it/s]

Features are ['r', 'absEigValMean', 'wU', 'tC', 'tL', 'tQ'].
The AUC under optimal C is 0.640.
The mean of AUC under 1000 repetitions is 0.647 and the standard deviation is 0.081.
----------------------------------------------------------------------------------------------------





### TV

In [29]:
filName = f"Ctrl_data_NWM_TV.pkl"
with open(resDir/filName, "rb") as f:
    CtrlTV = pickle.load(f)
filName = f"AD_data_NWM_TV.pkl"
with open(resDir/filName, "rb") as f:
    ADTV = pickle.load(f)

In [30]:
fsAll = []
for v in ADTV:
    fs = edict()
    fs.tC = v["ms"][0]
    fs.tL = v["ms"][1]
    fs.tQ = v["ms"][2]
    fs.C = v["ms"][3]
    fs.L = v["ms"][4]
    fs.Q = v["ms"][5]
    fs.wU, fs.r, eigVal = wUFn(v["Amat"])
    fs.absEigValMean = np.abs(eigVal).mean()
    fsAll.append(fs)
fsAD = fsAll

In [31]:
fsAll = []
for v in CtrlTV:
    fs = edict()
    fs.tC = v["ms"][0]
    fs.tL = v["ms"][1]
    fs.tQ = v["ms"][2]
    fs.C = v["ms"][3]
    fs.L = v["ms"][4]
    fs.Q = v["ms"][5]
    fs.wU, fs.r, eigVal = wUFn(v["Amat"])
    fs.absEigValMean = np.abs(eigVal).mean()
    fsAll.append(fs)
fsCtrl = fsAll

In [91]:
nDis = 8
mwUAD = np.array([fs.wU for fs in fsAD]).mean(axis=0)
mwUCtrl = np.array([fs.wU for fs in fsCtrl]).mean(axis=0)
disMwUAD = discretVec(mwUAD, np.linspace(0, 1, nDis+1))
disMwUCtrl  = discretVec(mwUCtrl, np.linspace(0, 1, nDis+1))
mFAD = minmax(mwUAD)
mFCtrl = minmax(mwUCtrl)
outAD = U2BrainVec(reorderU(mFAD))
outCtrl = U2BrainVec(reorderU(mFCtrl))
np.savetxt(f"./brainPlotFils/DMwUmeanAD_{nDis}.txt", outAD)
np.savetxt(f"./brainPlotFils/DMwUmeanCtrl_{nDis}.txt", outCtrl)

In [31]:
Ys = np.concatenate([np.ones(len(fsAD)), np.zeros(len(fsCtrl))])
fsName = ["r", "absEigValMean", "wU"]
XsAD = []
XsCtrl = []
for fName in fsName:
    if isinstance(fsAD[0][fName], numbers.Number):
        cfAD = np.array([fs[fName] for fs in fsAD]).reshape(-1, 1)
        cfCtrl = np.array([fs[fName] for fs in fsCtrl]).reshape(-1, 1)
    else:
        cfAD = np.array([fs[fName] for fs in fsAD])
        cfCtrl = np.array([fs[fName] for fs in fsCtrl])
    
    XsAD.append(cfAD)
    XsCtrl.append(cfCtrl)
XsAD = np.concatenate(XsAD, axis=1)
XsCtrl = np.concatenate(XsCtrl, axis=1)
Xs = np.concatenate([XsAD, XsCtrl], axis=0)
stdXs = (Xs - Xs.mean(axis=0))/Xs.std(axis=0)

In [32]:
penalty = "l2"
fsNameSet = [["r", "absEigValMean", "wU"], 
             ["r", "absEigValMean", "wU", "tC", "tL", "tQ"]
            ]
for fsName in fsNameSet:
    Ys = np.concatenate([np.ones(len(fsAD)), np.zeros(len(fsCtrl))])
    XsAD = []
    XsCtrl = []
    for fName in fsName:
        if isinstance(fsAD[0][fName], numbers.Number):
            cfAD = np.array([fs[fName] for fs in fsAD]).reshape(-1, 1)
            cfCtrl = np.array([fs[fName] for fs in fsCtrl]).reshape(-1, 1)
        else:
            cfAD = np.array([fs[fName] for fs in fsAD])
            cfCtrl = np.array([fs[fName] for fs in fsCtrl])
        
        XsAD.append(cfAD)
        XsCtrl.append(cfCtrl)
    XsAD = np.concatenate(XsAD, axis=1)
    XsCtrl = np.concatenate(XsCtrl, axis=1)
    Xs = np.concatenate([XsAD, XsCtrl], axis=0)
    stdXs = (Xs - Xs.mean(axis=0))/Xs.std(axis=0)
    
    Cs = [0.1, 0.2, 0.4, 0.8, 1, 1.6, 3.2, 6.4]
    eProbs = []
    loo = LeaveOneOut()
    parass = []
    with tqdm(total=len(Ys)) as t:
        for trIdxs, testIdxs in loo.split(stdXs):
            curOptC = TuningCFn(stdXs[trIdxs, :], Ys[trIdxs], Cs, penalty)["optC"]
            clf = LogisticRegression(penalty=penalty, random_state=0, C=curOptC)
            clf.fit(stdXs[trIdxs, :], Ys[trIdxs])
            paras = np.concatenate([clf.intercept_, clf.coef_.reshape(-1)])
            parass.append(paras)
            eProbs.append(clf.predict_proba(stdXs[testIdxs, :]))
            
            t.update()
            
    eProbs = np.array(eProbs).squeeze()
    auc = roc_auc_score(Ys, eProbs[:, 1])
    fpr, tpr, thresholds = roc_curve(Ys, eProbs[:, 1], pos_label=1)
    parass = np.array(parass)
    sfName = "_".join(fsName)
    with open(resDir/f"TV_ROCcurve_{sfName}.pkl", "wb") as f:
        resROC = {"fpr":fpr, "tpr":tpr, "AUC":auc}
        pickle.dump(resROC, f)
    
    
    optC = TuningCFn(stdXs, Ys, Cs, penalty)["optC"]
    np.random.seed(0)
    nobs = stdXs.shape[0]
    Aucss = []
    for j in tqdm(range(10000)):
        testIdx = np.random.choice(nobs, int(nobs/5), False)
        trainIdx = np.delete(np.arange(nobs), testIdx)
        clf = LogisticRegression(penalty=penalty, random_state=0, C=optC)
        clf.fit(stdXs[trainIdx], Ys[trainIdx])
        curEprobs = clf.predict_proba(stdXs[testIdx, :])
        curAuc = roc_auc_score(Ys[testIdx], curEprobs[:, 1])
        Aucss.append(curAuc)
    mAUC = np.mean(Aucss)
    stdAUC = np.std(Aucss)
    print(f"Features are {fsName}.")
    print(f"The AUC under optimal C is {auc:.3f}.")
    print(f"The mean of AUC under 1000 repetitions is {mAUC:.3f} and the standard deviation is {stdAUC:.3f}.")
    print("-"*100)

100%|██████████| 176/176 [06:27<00:00,  2.20s/it]
100%|██████████| 10000/10000 [00:15<00:00, 657.42it/s]
  0%|          | 0/176 [00:00<?, ?it/s]

Features are ['r', 'absEigValMean', 'wU'].
The AUC under optimal C is 0.701.
The mean of AUC under 1000 repetitions is 0.702 and the standard deviation is 0.077.
----------------------------------------------------------------------------------------------------


100%|██████████| 176/176 [06:42<00:00,  2.29s/it]
100%|██████████| 10000/10000 [00:15<00:00, 651.15it/s]

Features are ['r', 'absEigValMean', 'wU', 'tC', 'tL', 'tQ'].
The AUC under optimal C is 0.694.
The mean of AUC under 1000 repetitions is 0.697 and the standard deviation is 0.078.
----------------------------------------------------------------------------------------------------





### Dynamic DMD

In [37]:
filName = f"AD_data_dynDMD.pkl"
with open(resDir/filName, "rb") as f:
    fsAD = pickle.load(f)
    
filName = f"Ctrl_data_dynDMD.pkl"
with open(resDir/filName, "rb") as f:
    fsCtrl = pickle.load(f)

In [44]:
np.mean([fs["ncpts"] for fs in fsCtrl])

46.85227272727273

In [22]:
penalty = "l2"
fsNameSet = [["r", "ncpts", "absEigValMean", "wUmeanAll"]
            ]
for fsName in fsNameSet:
    Ys = np.concatenate([np.ones(len(fsAD)), np.zeros(len(fsCtrl))])
    XsAD = []
    XsCtrl = []
    for fName in fsName:
        if isinstance(fsAD[0][fName], numbers.Number):
            cfAD = np.array([fs[fName] for fs in fsAD]).reshape(-1, 1)
            cfCtrl = np.array([fs[fName] for fs in fsCtrl]).reshape(-1, 1)
        else:
            cfAD = np.array([fs[fName] for fs in fsAD])
            cfCtrl = np.array([fs[fName] for fs in fsCtrl])
        
        XsAD.append(cfAD)
        XsCtrl.append(cfCtrl)
    XsAD = np.concatenate(XsAD, axis=1)
    XsCtrl = np.concatenate(XsCtrl, axis=1)
    Xs = np.concatenate([XsAD, XsCtrl], axis=0)
    stdXs = (Xs - Xs.mean(axis=0))/Xs.std(axis=0)
    
    Cs = [0.1, 0.2, 0.4, 0.8, 1, 1.6, 3.2, 6.4]
    eProbs = []
    loo = LeaveOneOut()
    parass = []
    with tqdm(total=len(Ys)) as t:
        for trIdxs, testIdxs in loo.split(stdXs):
            curOptC = TuningCFn(stdXs[trIdxs, :], Ys[trIdxs], Cs, penalty)["optC"]
            clf = LogisticRegression(penalty=penalty, random_state=0, C=curOptC)
            clf.fit(stdXs[trIdxs, :], Ys[trIdxs])
            paras = np.concatenate([clf.intercept_, clf.coef_.reshape(-1)])
            parass.append(paras)
            eProbs.append(clf.predict_proba(stdXs[testIdxs, :]))
            
            t.update()
            
    eProbs = np.array(eProbs).squeeze()
    auc = roc_auc_score(Ys, eProbs[:, 1])
    fpr, tpr, thresholds = roc_curve(Ys, eProbs[:, 1], pos_label=1)
    parass = np.array(parass)
    sfName = "_".join(fsName)
    with open(resDir/f"dynDMD_ROCcurve_{sfName}.pkl", "wb") as f:
        resROC = {"fpr":fpr, "tpr":tpr, "AUC":auc}
        pickle.dump(resROC, f)
    
    optC = TuningCFn(stdXs, Ys, Cs, penalty)["optC"]
    np.random.seed(0)
    nobs = stdXs.shape[0]
    Aucss = []
    for j in tqdm(range(10000)):
        testIdx = np.random.choice(nobs, int(nobs/5), False)
        trainIdx = np.delete(np.arange(nobs), testIdx)
        clf = LogisticRegression(penalty=penalty, random_state=0, C=optC)
        clf.fit(stdXs[trainIdx], Ys[trainIdx])
        curEprobs = clf.predict_proba(stdXs[testIdx, :])
        curAuc = roc_auc_score(Ys[testIdx], curEprobs[:, 1])
        Aucss.append(curAuc)
    mAUC = np.mean(Aucss)
    stdAUC = np.std(Aucss)
    print(f"Features are {fsName}.")
    print(f"The AUC under optimal C is {auc:.3f}.")
    print(f"The mean of AUC under 1000 repetitions is {mAUC:.3f} and the standard deviation is {stdAUC:.3f}.")
    print("-"*100)

100%|██████████| 176/176 [06:38<00:00,  2.27s/it]
100%|██████████| 10000/10000 [00:15<00:00, 631.79it/s]

Features are ['r', 'ncpts', 'absEigValMean', 'wUmeanAll'].
The AUC under optimal C is 0.616.
The mean of AUC under 1000 repetitions is 0.666 and the standard deviation is 0.082.
----------------------------------------------------------------------------------------------------



