In [1]:
import sys
sys.path.append("/home/huaqingj/MyResearch/AD_control_data/TVDN")

In [2]:
from pyTVDN import TVDNDetect
from pathlib import Path
from scipy.io import loadmat
import numpy as np
from easydict import EasyDict as edict
import matplotlib.pyplot as plt
import os
from scipy import signal
import pickle
import seaborn as sns
from tqdm.autonotebook import tqdm
import numbers
%matplotlib inline



In [3]:
import rpy2.robjects as robj
robj.r.source("./Rcode/utils.R")

RsegDMD = robj.r["DMD"]

In [4]:
os.chdir("/home/huaqingj/MyResearch/AD_control_data/TVDN")
resDir = Path("./results")
dataDir = Path("./data")

In [5]:
ADdets = list(resDir.glob("AD*04_decimate5.pkl"))
ADdetsPs = sorted(ADdets, key=lambda p:int(p.stem.split("_")[3]))
Ctrldets = list(resDir.glob("Ctrl*04_decimate5.pkl"))
CtrldetsPs = sorted(Ctrldets, key=lambda p:int(p.stem.split("_")[3]))

In [6]:
def statAmat(curY, curX):
    M = np.matmul(curX, curX.T)/curX.shape[-1]
    YX = np.matmul(curY, curX.T)/curX.shape[-1]
    U, S, VT = np.linalg.svd(M)
    r = np.argmax(np.cumsum(S)/np.sum(S)>=0.999) + 1
    invM = U[:, :r].dot(np.diag(1/S[:r])).dot(VT[:r, :])
    Amat = YX.dot(invM)
    return Amat

In [7]:
def minmax(mat):
    stdMat = (mat-np.min(mat))/(np.max(mat)-np.min(mat))
    return stdMat

In [8]:
def sysMatShuffle(mat):
    trlMask = np.tril(np.ones_like(mat))
    trlMask1 = np.tril(np.ones_like(mat), k=-1)
    eles = mat[trlMask==1]
    np.random.shuffle(eles)
    sMat = np.zeros_like(mat)
    sMat[trlMask==1] = eles
    sMat[trlMask1.T==1] = sMat[trlMask1==1]
    return sMat

In [9]:
def NetMeasureFn(mat, nrep=100):
    Cs = bct.clustering_coef_wu(mat)
    mC = np.mean(Cs)
    Ls = bct.distance_wei(mat)[0]
    mL = np.mean(1/Ls[np.eye(Ls.shape[0])!=1])
    _, Q = bct.modularity_und(mat)
    
    ranLs = []
    ranQs = []
    ranCs = []
    for i in range(nrep):
        sMat = sysMatShuffle(mat)
        Cs = bct.clustering_coef_wu(sMat)
        mCi = np.mean(Cs)
        ranCs.append(mCi)
        Ls = bct.distance_wei(sMat)[0]
        mLi = np.mean(1/Ls[np.eye(Ls.shape[0])!=1])
        ranLs.append(mLi)
        _, Qi = bct.modularity_und(sMat)
        ranQs.append(Qi)
    
    tQ = Q/np.mean(ranQs)
    tmL  = mL/np.mean(ranLs)
    tmC  = mC/np.mean(ranCs)
    return tmC, tmL, tQ, mC, mL, Q

In [10]:
import bct

In [89]:
ADNWsDMD = []
ADNWsTV = []
for ADdetP in tqdm(ADdetsPs):
    with open(ADdetP, "rb") as f:
        detObj = pickle.load(f)
    
    NWDMD = edict()
    NWTV = edict()
    
    nYmat = detObj.Xmat
    curY, curX = nYmat[:, 1:], nYmat[:, :-1]
    AmatDMD = statAmat(curY, curX)
    conMatDMD = minmax(AmatDMD.dot(AmatDMD.T))
    NWDMD.Amat = AmatDMD
    NWDMD.stdAmat = conMatDMD
    NWDMD.ms = NetMeasureFn(conMatDMD)
    ADNWsDMD.append(NWDMD)
    
    curY, curX = detObj.dXmat, detObj.Xmat
    AmatTV = statAmat(curY, curX)
    conMatTV = minmax(AmatTV.dot(AmatTV.T))
    NWTV.Amat = AmatTV
    NWTV.stdAmat = conMatTV
    NWTV.ms = NetMeasureFn(conMatTV)
    ADNWsTV.append(NWTV)
    
filName = f"AD_data_NWM_TV.pkl"
with open(resDir/filName, "wb") as f:
    pickle.dump(ADNWsTV, f)
filName = f"AD_data_NWM_DMD.pkl"
with open(resDir/filName, "wb") as f:
    pickle.dump(ADNWsDMD, f)

HBox(children=(IntProgress(value=0, max=88), HTML(value='')))




In [90]:
CtrlNWsDMD = []
CtrlNWsTV = []
for CtrldetP in tqdm(CtrldetsPs):
    with open(CtrldetP, "rb") as f:
        detObj = pickle.load(f)
    
    NWDMD = edict()
    NWTV = edict()
    
    nYmat = detObj.Xmat
    curY, curX = nYmat[:, 1:], nYmat[:, :-1]
    AmatDMD = statAmat(curY, curX)
    conMatDMD = minmax(AmatDMD.dot(AmatDMD.T))
    NWDMD.Amat = AmatDMD
    NWDMD.stdAmat = conMatDMD
    NWDMD.ms = NetMeasureFn(conMatDMD)
    CtrlNWsDMD.append(NWDMD)
    
    curY, curX = detObj.dXmat, detObj.Xmat
    AmatTV = statAmat(curY, curX)
    conMatTV = minmax(AmatTV.dot(AmatTV.T))
    NWTV.Amat = AmatTV
    NWTV.stdAmat = conMatTV
    NWTV.ms = NetMeasureFn(conMatTV)
    CtrlNWsTV.append(NWTV)
    
filName = f"Ctrl_data_NWM_TV.pkl"
with open(resDir/filName, "wb") as f:
    pickle.dump(CtrlNWsTV, f)
filName = f"Ctrl_data_NWM_DMD.pkl"
with open(resDir/filName, "wb") as f:
    pickle.dump(CtrlNWsDMD, f)

HBox(children=(IntProgress(value=0, max=70), HTML(value='')))




In [11]:
def wUFn(Amat):
    eigVals, eigVecs = np.linalg.eig(Amat)
    r = np.sum(np.cumsum(np.abs(eigVals))/np.sum(np.abs(eigVals)) <= 0.8) + 1
    if (eigVals[r-1].imag + eigVals[r].imag ) == 0:
        r = r + 1
    wU = np.abs(np.matmul(eigVecs[:, :r], eigVals[:r]))
    return wU, r

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import roc_auc_score, roc_curve

import warnings
warnings.filterwarnings('ignore')

In [36]:
def minmax(x):
    num = x - np.min(x)
    den = np.max(x) - np.min(x)
    return num/den

def U2BrainVec(wU):
    emVec = np.zeros_like(DKtmplate, dtype=np.float64)
    for idx in range(1, 69):
        emVec[DKtmplate==idx] = wU[idx-1]
    return emVec

def reorderU(wU):
    wUreorder = np.zeros_like(wU, dtype=np.float64)
    wUreorder[0::2] = wU[:34]
    wUreorder[1::2] = wU[34:]
    return wUreorder

In [45]:
with open("./meg_data/BNVtemplate_DK68.txt", "r") as tf:
    DKtmplateRaw = tf.readlines()
DKtmplate = np.array([int(x.strip()) for x in DKtmplateRaw])

In [85]:
def discretVec(Vec, ps=[0, 0.25, 0.5, 0.75, 1]):
    qs = np.quantile(Vec, ps)
    disVec = Vec 
    for ix in range(len(qs)-1):
        low = qs[ix]
        up = qs[ix+1]
        idxVec= np.bitwise_and(Vec >= low, Vec <up)
        disVec[idxVec] = np.mean(Vec[idxVec])
    return disVec

### DMD

In [92]:
filName = f"Ctrl_data_NWM_DMD.pkl"
with open(resDir/filName, "rb") as f:
    CtrlDMD = pickle.load(f)
filName = f"AD_data_NWM_DMD.pkl"
with open(resDir/filName, "rb") as f:
    ADDMD = pickle.load(f)

In [93]:
fsAll = []
for v in ADDMD:
    fs = edict()
    fs.tC = v["ms"][0]
    fs.tL = v["ms"][1]
    fs.tQ = v["ms"][2]
    fs.C = v["ms"][3]
    fs.L = v["ms"][4]
    fs.Q = v["ms"][5]
    fs.wU, fs.r = wUFn(v["Amat"])
    fsAll.append(fs)
fsAD = fsAll

In [94]:
fsAll = []
for v in CtrlDMD:
    fs = edict()
    fs.tC = v["ms"][0]
    fs.tL = v["ms"][1]
    fs.tQ = v["ms"][2]
    fs.C = v["ms"][3]
    fs.L = v["ms"][4]
    fs.Q = v["ms"][5]
    fs.wU, fs.r = wUFn(v["Amat"])
    fsAll.append(fs)
fsCtrl = fsAll

In [97]:
nDis = 10
mwUAD = np.array([fs.wU for fs in fsAD]).mean(axis=0)
mwUCtrl = np.array([fs.wU for fs in fsCtrl]).mean(axis=0)
disMwUAD = discretVec(mwUAD, np.linspace(0, 1, nDis+1))
disMwUCtrl  = discretVec(mwUCtrl, np.linspace(0, 1, nDis+1))
mFAD = minmax(mwUAD)
mFCtrl = minmax(mwUCtrl)
outAD = U2BrainVec(reorderU(mFAD))
outCtrl = U2BrainVec(reorderU(mFCtrl))
np.savetxt(f"./brainPlotFils/DMDwUmeanAD_{nDis}.txt", outAD)
np.savetxt(f"./brainPlotFils/DMDwUmeanCtrl_{nDis}.txt", outCtrl)

In [34]:
Ys = np.concatenate([np.ones(len(fsAD)), np.zeros(len(fsCtrl))])
fsName = ["r", "wU"]
XsAD = []
XsCtrl = []
for fName in fsName:
    if isinstance(fsAD[0][fName], numbers.Number):
        cfAD = np.array([fs[fName] for fs in fsAD]).reshape(-1, 1)
        cfCtrl = np.array([fs[fName] for fs in fsCtrl]).reshape(-1, 1)
    else:
        cfAD = np.array([fs[fName] for fs in fsAD])
        cfCtrl = np.array([fs[fName] for fs in fsCtrl])
    
    XsAD.append(cfAD)
    XsCtrl.append(cfCtrl)
XsAD = np.concatenate(XsAD, axis=1)
XsCtrl = np.concatenate(XsCtrl, axis=1)
Xs = np.concatenate([XsAD, XsCtrl], axis=0)
stdXs = (Xs - Xs.mean(axis=0))/Xs.std(axis=0)

In [35]:
penalty = "l2"
fsNameSet = [["r", "wU"], 
             ["tC", "tL", "tQ"],  
             ["r", "wU", "tC", "tL", "tQ"]
            ]
for fsName in fsNameSet:
    Ys = np.concatenate([np.ones(len(fsAD)), np.zeros(len(fsCtrl))])
    XsAD = []
    XsCtrl = []
    for fName in fsName:
        if isinstance(fsAD[0][fName], numbers.Number):
            cfAD = np.array([fs[fName] for fs in fsAD]).reshape(-1, 1)
            cfCtrl = np.array([fs[fName] for fs in fsCtrl]).reshape(-1, 1)
        else:
            cfAD = np.array([fs[fName] for fs in fsAD])
            cfCtrl = np.array([fs[fName] for fs in fsCtrl])
        
        XsAD.append(cfAD)
        XsCtrl.append(cfCtrl)
    XsAD = np.concatenate(XsAD, axis=1)
    XsCtrl = np.concatenate(XsCtrl, axis=1)
    Xs = np.concatenate([XsAD, XsCtrl], axis=0)
    stdXs = (Xs - Xs.mean(axis=0))/Xs.std(axis=0)
    
    Cs = [0.1, 0.2, 0.4, 0.8, 1, 1.6, 3.2, 6.4, 1000]
    aucCs = []
    for C in Cs:
        eProbs = []
        loo = LeaveOneOut()
        for trIdxs, testIdxs in loo.split(stdXs):
            clf = LogisticRegression(penalty=penalty, random_state=0, C=C)
            clf.fit(stdXs[trIdxs, :], Ys[trIdxs])
            eProbs.append(clf.predict_proba(stdXs[testIdxs, :]))
        eProbs = np.array(eProbs).squeeze()
        auc = roc_auc_score(Ys, eProbs[:, 1])
        fpr, tpr, thresholds = roc_curve(Ys, eProbs[:, 1], pos_label=1)
        aucCs.append(auc)
        
    optC = Cs[np.argmax(aucCs)]
    eProbs = []
    loo = LeaveOneOut()
    parass = []
    for trIdxs, testIdxs in loo.split(stdXs):
        clf = LogisticRegression(penalty=penalty, random_state=0, C=optC)
        clf.fit(stdXs[trIdxs, :], Ys[trIdxs])
        paras = np.concatenate([clf.intercept_, clf.coef_.reshape(-1)])
        parass.append(paras)
        eProbs.append(clf.predict_proba(stdXs[testIdxs, :]))
    eProbs = np.array(eProbs).squeeze()
    auc = roc_auc_score(Ys, eProbs[:, 1])
    fpr, tpr, thresholds = roc_curve(Ys, eProbs[:, 1], pos_label=1)
    parass = np.array(parass)
    
    nobs = stdXs.shape[0]
    Aucss = []
    for j in range(1000):
        testIdx = np.random.choice(nobs, int(nobs/5), False)
        trainIdx = np.delete(np.arange(nobs), testIdx)
        clf = LogisticRegression(penalty=penalty, random_state=0, C=optC)
        clf.fit(stdXs[trainIdx], Ys[trainIdx])
        curEprobs = clf.predict_proba(stdXs[testIdx, :])
        curAuc = roc_auc_score(Ys[testIdx], curEprobs[:, 1])
        Aucss.append(curAuc)
    mAUC = np.mean(Aucss)
    stdAUC = np.std(Aucss)
    print(f"Features are {fsName}.")
    print(f"The AUC under optimal C is {auc:.3f}.")
    print(f"The mean of AUC under 1000 repetitions is {mAUC:.3f} and the standard deviation is {stdAUC:.3f}.")
    print("-"*100)

Features are ['r', 'wU'].
The AUC under optimal C is 0.621.
The mean of AUC under 1000 repetitions is 0.632 and the standard deviation is 0.089.
----------------------------------------------------------------------------------------------------
Features are ['tC', 'tL', 'tQ'].
The AUC under optimal C is 0.585.
The mean of AUC under 1000 repetitions is 0.602 and the standard deviation is 0.093.
----------------------------------------------------------------------------------------------------
Features are ['r', 'wU', 'tC', 'tL', 'tQ'].
The AUC under optimal C is 0.616.
The mean of AUC under 1000 repetitions is 0.622 and the standard deviation is 0.093.
----------------------------------------------------------------------------------------------------


### TV

In [86]:
filName = f"Ctrl_data_NWM_TV.pkl"
with open(resDir/filName, "rb") as f:
    CtrlTV = pickle.load(f)
filName = f"AD_data_NWM_TV.pkl"
with open(resDir/filName, "rb") as f:
    ADTV = pickle.load(f)

In [87]:
fsAll = []
for v in ADTV:
    fs = edict()
    fs.tC = v["ms"][0]
    fs.tL = v["ms"][1]
    fs.tQ = v["ms"][2]
    fs.C = v["ms"][3]
    fs.L = v["ms"][4]
    fs.Q = v["ms"][5]
    fs.wU, fs.r = wUFn(v["Amat"])
    fsAll.append(fs)
fsAD = fsAll

In [88]:
fsAll = []
for v in CtrlTV:
    fs = edict()
    fs.tC = v["ms"][0]
    fs.tL = v["ms"][1]
    fs.tQ = v["ms"][2]
    fs.C = v["ms"][3]
    fs.L = v["ms"][4]
    fs.Q = v["ms"][5]
    fs.wU, fs.r = wUFn(v["Amat"])
    fsAll.append(fs)
fsCtrl = fsAll

In [91]:
nDis = 8
mwUAD = np.array([fs.wU for fs in fsAD]).mean(axis=0)
mwUCtrl = np.array([fs.wU for fs in fsCtrl]).mean(axis=0)
disMwUAD = discretVec(mwUAD, np.linspace(0, 1, nDis+1))
disMwUCtrl  = discretVec(mwUCtrl, np.linspace(0, 1, nDis+1))
mFAD = minmax(mwUAD)
mFCtrl = minmax(mwUCtrl)
outAD = U2BrainVec(reorderU(mFAD))
outCtrl = U2BrainVec(reorderU(mFCtrl))
np.savetxt(f"./brainPlotFils/DMwUmeanAD_{nDis}.txt", outAD)
np.savetxt(f"./brainPlotFils/DMwUmeanCtrl_{nDis}.txt", outCtrl)

In [22]:
Ys = np.concatenate([np.ones(len(fsAD)), np.zeros(len(fsCtrl))])
fsName = ["r", "wU"]
XsAD = []
XsCtrl = []
for fName in fsName:
    if isinstance(fsAD[0][fName], numbers.Number):
        cfAD = np.array([fs[fName] for fs in fsAD]).reshape(-1, 1)
        cfCtrl = np.array([fs[fName] for fs in fsCtrl]).reshape(-1, 1)
    else:
        cfAD = np.array([fs[fName] for fs in fsAD])
        cfCtrl = np.array([fs[fName] for fs in fsCtrl])
    
    XsAD.append(cfAD)
    XsCtrl.append(cfCtrl)
XsAD = np.concatenate(XsAD, axis=1)
XsCtrl = np.concatenate(XsCtrl, axis=1)
Xs = np.concatenate([XsAD, XsCtrl], axis=0)
stdXs = (Xs - Xs.mean(axis=0))/Xs.std(axis=0)

In [29]:
penalty = "l2"
fsNameSet = [["r", "wU"], 
             ["tC", "tL", "tQ"],  
             ["r", "wU", "tC", "tL", "tQ"]
            ]
for fsName in fsNameSet:
    Ys = np.concatenate([np.ones(len(fsAD)), np.zeros(len(fsCtrl))])
    XsAD = []
    XsCtrl = []
    for fName in fsName:
        if isinstance(fsAD[0][fName], numbers.Number):
            cfAD = np.array([fs[fName] for fs in fsAD]).reshape(-1, 1)
            cfCtrl = np.array([fs[fName] for fs in fsCtrl]).reshape(-1, 1)
        else:
            cfAD = np.array([fs[fName] for fs in fsAD])
            cfCtrl = np.array([fs[fName] for fs in fsCtrl])
        
        XsAD.append(cfAD)
        XsCtrl.append(cfCtrl)
    XsAD = np.concatenate(XsAD, axis=1)
    XsCtrl = np.concatenate(XsCtrl, axis=1)
    Xs = np.concatenate([XsAD, XsCtrl], axis=0)
    stdXs = (Xs - Xs.mean(axis=0))/Xs.std(axis=0)
    
    Cs = [0.1, 0.2, 0.4, 0.8, 1, 1.6, 3.2, 6.4, 1000]
    aucCs = []
    for C in Cs:
        eProbs = []
        loo = LeaveOneOut()
        for trIdxs, testIdxs in loo.split(stdXs):
            clf = LogisticRegression(penalty=penalty, random_state=0, C=C)
            clf.fit(stdXs[trIdxs, :], Ys[trIdxs])
            eProbs.append(clf.predict_proba(stdXs[testIdxs, :]))
        eProbs = np.array(eProbs).squeeze()
        auc = roc_auc_score(Ys, eProbs[:, 1])
        fpr, tpr, thresholds = roc_curve(Ys, eProbs[:, 1], pos_label=1)
        aucCs.append(auc)
        
    print(aucCs)
    optC = Cs[np.argmax(aucCs)]
    eProbs = []
    loo = LeaveOneOut()
    parass = []
    for trIdxs, testIdxs in loo.split(stdXs):
        clf = LogisticRegression(penalty=penalty, random_state=0, C=optC)
        clf.fit(stdXs[trIdxs, :], Ys[trIdxs])
        paras = np.concatenate([clf.intercept_, clf.coef_.reshape(-1)])
        parass.append(paras)
        eProbs.append(clf.predict_proba(stdXs[testIdxs, :]))
    eProbs = np.array(eProbs).squeeze()
    auc = roc_auc_score(Ys, eProbs[:, 1])
    fpr, tpr, thresholds = roc_curve(Ys, eProbs[:, 1], pos_label=1)
    parass = np.array(parass)
    
    nobs = stdXs.shape[0]
    Aucss = []
    for j in range(1000):
        testIdx = np.random.choice(nobs, int(nobs/5), False)
        trainIdx = np.delete(np.arange(nobs), testIdx)
        clf = LogisticRegression(penalty=penalty, random_state=0, C=optC)
        clf.fit(stdXs[trainIdx], Ys[trainIdx])
        curEprobs = clf.predict_proba(stdXs[testIdx, :])
        curAuc = roc_auc_score(Ys[testIdx], curEprobs[:, 1])
        Aucss.append(curAuc)
    mAUC = np.mean(Aucss)
    stdAUC = np.std(Aucss)
    print(f"Features are {fsName}.")
    print(f"The AUC under optimal C is {auc:.3f}.")
    print(f"The mean of AUC under 1000 repetitions is {mAUC:.3f} and the standard deviation is {stdAUC:.3f}.")
    print("-"*100)

[0.6400974025974026, 0.6290584415584416, 0.615909090909091, 0.5926948051948052, 0.5871753246753247, 0.5850649350649352, 0.575487012987013, 0.5691558441558442, 0.5358766233766233]
Features are ['r', 'wU'].
The AUC under optimal C is 0.640.
The mean of AUC under 1000 repetitions is 0.646 and the standard deviation is 0.086.
----------------------------------------------------------------------------------------------------
[0.4904220779220779, 0.4844155844155844, 0.4806818181818182, 0.48019480519480523, 0.4800324675324676, 0.4795454545454546, 0.47938311688311697, 0.47922077922077927, 0.4793831168831169]
Features are ['tC', 'tL', 'tQ'].
The AUC under optimal C is 0.490.
The mean of AUC under 1000 repetitions is 0.548 and the standard deviation is 0.093.
----------------------------------------------------------------------------------------------------
[0.6396103896103896, 0.629383116883117, 0.611038961038961, 0.5977272727272728, 0.5951298701298702, 0.5897727272727273, 0.5790584415584416,