In [None]:
%reset

## 0. Set Paths and Imports

In [131]:
# SET PATHS

mainPath = 'C:/Users/cgiron/Documents/PROYECTO UB/'

#Path where raw data has been stored
rawDataPath = mainPath + "Raw_Data/UB_Features_V2/"

#Path where plots can be stored
pathToSavePairPlot = mainPath + 'ADHD_Project_UB/ADHD_Project/images/pairplots/'

pathCSV = mainPath + 'ADHD_Project_UB/ADHD_Project/resultsClustering_doming10.csv'
brainPath = mainPath + 'ADHD_Project_UB/brain.png'

In [2]:
# IMPORTS

import numpy as np
import scipy.io as sio
import pandas as pd
import os
import re
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import proj3d
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.cluster.hierarchy import cophenet
from scipy.spatial.distance import pdist
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

import mne
from mne.datasets import sample
from mne.minimum_norm import apply_inverse_epochs, read_inverse_operator
from mne.connectivity import spectral_connectivity
from mne.viz import circular_layout, plot_connectivity_circle

import matplotlib.patches as mpatches


## 1. Load Data

Load data Fuction

In [3]:
#LOAD DATA: path where files have been stored need to be set before loading this script
def load( path ):
    #columns for the band powers
    filenames = os.listdir(path)

    #electrodes names
    electrodes = ['Fp1', 'F3', 'C3', 'Fz', 'Cz', 'Fp2', 'F4', 'C4']

    #bandPower names
    bandPower = ['Theta2+Alpha1', 'Theta', 'Alpha', 'Beta_Global', 'Beta_Alta', 'Beta_Baja', 'Gamma']

    #let us create the real dataframe
    #columns for the band powers
    columns_df=[]
    for i in electrodes:
        for j in bandPower:
            combination = i + '_(' + j + ')'
            columns_df.append(combination)

    #columns for the band power ratios
    columns_df2 = ['BPR_Fp1', 'BPR_F3', 'BPR_C3', 'BPR_Fz', 'BPR_Cz', 'BPR_Fp2', 'BPR_F4', 'BPR_C4']

    #patterns to determine experiments
    patternA = r'[\w]+A\.mat'
    patternB = r'[\w]+B\.mat'
    patternC = r'[\w]+C\.mat'

    #define the list of dataframes
    list_df=[]
    for i in filenames:
        patient = sio.loadmat(path + i)
        bandPowerDF = pd.DataFrame(patient['BandPower'].reshape(1,56), columns = columns_df)
        bandPowerRatioDF = pd.DataFrame(patient['BandPowerRatio'].reshape(1,8), columns = columns_df2)
        patientDF =  pd.concat([bandPowerDF, bandPowerRatioDF], axis=1)
        #patientDF = pd.DataFrame(patient['BandPower'].reshape(1,56), columns = columns_df)
        patientDF['patientName'] = i.split('.mat')[0]
        
        if re.match(patternA,i) != None:
            patientDF['experiment'] = 'A'
        elif re.match(patternB,i) != None:
            patientDF['experiment'] = 'B'
        elif re.match(patternC,i) != None:
            patientDF['experiment'] = 'C'
                
        list_df.append(patientDF)

    #via the concat all the dataframes are concatenated
    patientsDF = pd.concat(list_df)
    #nans are dropped
    patientsDF = patientsDF.dropna(how='any')

    #dataframe is sorted by experiment type: order A, B and C
    patientsDF.sort_values(by='experiment', ascending= True, inplace=True)

    #indexs are reset
    #patientsDF = patientsDF.reset_index(drop = True)

    #indexs are set according to the patient file
    patientsDF.set_index(['patientName'], inplace=True)
    #Return resulting data frame with all patients
    return ( patientsDF )

In [None]:
patientsDF = load ( rawDataPath )
patientsDF.head()

Normalization data function

In [110]:
#Normalize data per experiment
def normalizeData ( dfValues ):
 #   dfValues = patientsDF.drop( ['experiment'],1 ).copy()
    patientsNames = dfValues.index
    columnsNames = dfValues.columns

    dfNorm = StandardScaler().fit_transform( dfValues )
    dfResult = pd.DataFrame( dfNorm, index = patientsNames, columns = columnsNames )

    return ( dfResult )


In [None]:
dataFrame = patientsDF.drop('experiment', 1).copy()
normsPatientsDF =  normalizeData ( dataFrame ).copy()
normsPatientsDF.head()

Add cluster label

In [5]:
def addLabel ( pathCSV, dataFrame ):
    dfClustering = pd.read_csv( pathCSV )
    dfClustering.index = dfClustering['patientName']
    result = dataFrame.reset_index().merge( dfClustering, how = 'inner').set_index('patientName')
    return ( result )

In [None]:
finalPatientsDF = addLabel ( pathCSV, dataFrame ).copy()
finalPatientsDF.head()

## 2. PairPlots

In [6]:
# %load pairPlots.py
## Pair plots per electrode and experiment (including bandPower at the end)
#electrodes: List of electrodes to be ploted
#bandPower: List of bands to be ploted
#pathToSavePlot: path where to save pairplots

def pairPlotsPerExperiment ( patientsDF, electrodes, bandPower, pathToSavePlot, title ):
    
    bandColumns = [ ]
    bandPowerColumns = [ ]
    for i in electrodes:
        for j in bandPower:
            combinationBand = i + '_(' + j + ')'
            bandColumns.append( combinationBand )
            combinationPower = 'BPR_' + i 
            bandPowerColumns.append( combinationPower )
    numberOfBandPower = len ( bandPower )
    
    for i in range( 0, len( bandColumns ) - 1, numberOfBandPower ):
        electrodePossition = int( (i + 1 ) / numberOfBandPower )
        electrodeBand =  bandColumns[ i : i + numberOfBandPower ]
        electrodeBand.append( bandPowerColumns[ electrodePossition ] )
        df = patientsDF[ electrodeBand ]
        plot = sns.pairplot(df)
        electrode = electrodes[ electrodePossition ]
        picPath = pathToSavePlot + electrode + title + ".png"
        plotTitle = title + " - "
        plt.title( plotTitle )
        
        plot.savefig( picPath )
        print ("Pair-plot for the electrode " + electrode +  " has been saved")
        plt.close('all')

######
## HOW TO USE IT: EXAMPLE
##patientsDF = loadData( path )
##electrodes = ['Fp1', 'F3', 'C3', 'Fz', 'Cz', 'Fp2', 'F4', 'C4']
##bandPower = ['Theta', 'Theta2+Alpha1', 'Alpha', 'Beta_Global']
##pathToSavePlot = mainPath + "Plots/"
##pairPlotsPerExperiment ( patientsDF, electrodes, bandPower, pathToSavePlot )
##
######

In [None]:
pathToSavePlot

In [None]:
patientsDF = load ( rawDataPath )
dataFrame = patientsDF[patientsDF.experiment == 'C'].copy()
electrodes = ['Fp1', 'F3', 'C3', 'Fz', 'Cz', 'Fp2', 'F4', 'C4']
bandPower = ['Theta', 'Theta2+Alpha1', 'Alpha', 'Beta_Global', 'Gamma']
title = "Experimento C"
pairPlotsPerExperiment ( dataFrame, electrodes, bandPower, pathToSavePairPlot, title )

## 3. Correlation graph plot

In [86]:
def corrGraphPlot( dataFrame, electrodes, bandPower, numberOfEdges, plotTitle ):
    
    #Create columns for the band powers
    label_names=[]
    for i in electrodes:
        for j in bandPower:
            combination = i + '_(' + j + ')'
            label_names.append(combination)
        
    node_angles = circular_layout(label_names, label_names, start_pos=90, group_boundaries=[0, len(label_names) / 2 ])
    # Dataframe to plot correlations
    valuesDF = dataFrame[label_names]

    # Create Correlation matrix
    corrDF = valuesDF.corr()
    corrValues = corrDF.values
    
    #Set size nodes depending on their amounmt of correlation with other nodes
    #dfAbs = corrDF.abs()
    #absoluteValues = np.absolute(corrDF.values)
    #sumAbsValues =  dfAbs.sum()/a.sum()
    #sizeNodes = sumAbsValues.values*50

    # Plot correlationGraph
    plot = plot_connectivity_circle( corrValues, 
                             label_names, 
                             n_lines = numberOfEdges,
                             node_angles=node_angles,
                             colormap  = 'RdYlGn',
                             facecolor ='White',
                             textcolor = 'Black',
                             node_width = 3,
                             title = plotTitle,
                             vmin = -1,
                             vmax = 1 )

######
## HOW TO USE IT: EXAMPLE
##
#patientsDF = load ( rawDataPath )
#dataFrame = addLabel ( pathCSV, patientsDF ).copy()
#dataFrame = dataFrame[dataFrame['experiment'] == 'A']
#dataFrame = dataFrame[dataFrame.Best_Cluster == 1]
#electrodes = ['Fp1', 'F3', 'C3', 'Fz', 'Cz', 'Fp2', 'F4', 'C4']
#bandPower = ['Theta2+Alpha1', 'Theta', 'Alpha', 'Beta_Global', 'Gamma']
#experiment = 'A'
#numberOfEdges = 100
#plotTitle = 'All-to-All Correlation - Experiment A'
#corrGraphPlot( dataFrame, electrodes, bandPower, numberOfEdges, plotTitle )
##
######

In [87]:
patientsDF = load ( rawDataPath )
dataFrame = addLabel ( pathCSV, patientsDF ).copy()
dataFrame = dataFrame[ dataFrame['experiment'] == 'B' ]
dataFrame = dataFrame[ dataFrame.cluster_Hierarchichal == 1 ]

electrodes = [ 'F3', 'C3', 'Fz', 'Cz', 'C4', 'F4']
bandPower = ['Theta2+Alpha1', 'Theta', 'Alpha', 'Beta_Global', 'Gamma']
numberOfEdges = 20
plotTitle = 'All-to-All Correlation - Experiment B - Label Patient 1'

corrGraphPlot( dataFrame, electrodes, bandPower, numberOfEdges, plotTitle )

In [98]:
def corrGraphPlotMAxCorr( dataFrame, electrodes, bandPower, numberOfEdges, maxCorr, plotTitle ):
    
    #Create columns for the band powers
    label_names=[]
    for i in electrodes:
        for j in bandPower:
            combination = i + '_(' + j + ')'
            label_names.append(combination)
        
    node_angles = circular_layout(label_names, label_names, start_pos=90, group_boundaries=[0, len(label_names) / 2 ])
    # Dataframe to plot correlations
    valuesDF = dataFrame[label_names]

    # Create Correlation matrix
    corrDF = valuesDF.corr()
    corrValues = corrDF.values
    corrValues[ np.absolute(corrValues) < maxCorr] = 0
    
    #Set size nodes depending on their amounmt of correlation with other nodes
    #dfAbs = corrDF.abs()
    #absoluteValues = np.absolute(corrDF.values)
    #sumAbsValues =  dfAbs.sum()/a.sum()
    #sizeNodes = sumAbsValues.values*50

    # Plot correlationGraph
    plot = plot_connectivity_circle( corrValues, 
                             label_names, 
                             n_lines = numberOfEdges,
                             node_angles = node_angles,
                             colormap  = 'RdYlGn',
                             facecolor ='White',
                             textcolor = 'Black',
                             node_width = 3,
                             title = plotTitle,
                             vmin = -1,
                             vmax = 1 )

In [109]:
patientsDF = load ( rawDataPath )
dataFrame = addLabel ( pathCSV, patientsDF ).copy()
dataFrame = dataFrame[ dataFrame['experiment'] == 'C' ]
dataFrame = dataFrame[ dataFrame.cluster_Hierarchichal == 0 ]

electrodes = [ 'F3', 'C3', 'Fz', 'Cz', 'C4', 'F4']
bandPower = ['Theta2+Alpha1', 'Theta', 'Alpha', 'Beta_Global', 'Gamma']
numberOfEdges = None
plotTitle = 'All-to-All Correlation > 0.8 - Experiment C - Label Patient 0'
maxCorr = 0.8
corrGraphPlotMAxCorr( dataFrame, electrodes, bandPower, numberOfEdges, maxCorr, plotTitle )

Graph representing how different are correlation between electrode_band's for each label patient

In [59]:
def diffBetweenLabelsCorrGraphPlot( dataFrame, electrodes, bandPower, numberOfEdges, plotTitle ):
    
    #Create columns for the band powers
    label_names=[]
    for i in electrodes:
        for j in bandPower:
            combination = i + '_(' + j + ')'
            label_names.append(combination)

    node_angles = circular_layout(label_names, label_names, start_pos=90, group_boundaries=[0, len(label_names) / 2 ])
    # Dataframe to plot correlations

    # Dataframe label 1
    valuesDF_l1 = dataFrame[ dataFrame.cluster_Hierarchichal == 1 ]
    valuesDF_l0 = dataFrame[ dataFrame.cluster_Hierarchichal == 0 ]
    # Create Correlation matrix
    corrDF_l1 = valuesDF_l1[label_names].corr()
    corrDF_l0 = valuesDF_l0[label_names].corr()
    diffCorrDF = corrDF_l1 - corrDF_l0
    diffCorrValue = np.absolute(diffCorrDF.values)
    plot = plot_connectivity_circle( diffCorrValue, 
                                 label_names, 
                                 n_lines = numberOfEdges,
                                 node_angles=node_angles,
                                 colormap  = 'OrRd',
                                 facecolor ='White',
                                 textcolor = 'Black',
                                 node_width = 1,
                                 title = plotTitle,
                                 vmin = 0
                             )


In [61]:
patientsDF = load ( rawDataPath )
dataFrame = addLabel ( pathCSV, patientsDF ).copy()
dataFrame = dataFrame[ dataFrame['experiment'] == 'B' ]

electrodes = [ 'F3', 'C3', 'Fz', 'Cz', 'C4', 'F4',]
bandPower = ['Theta2+Alpha1', 'Theta', 'Alpha', 'Beta_Global', 'Gamma']
plotTitle =  'Absolute difference between correlations - Experiment A'
numberOfEdges = 435

# Plot correlationGraph
diffBetweenLabelsCorrGraphPlot( dataFrame, electrodes, bandPower, numberOfEdges, plotTitle )

# 4. Plot absolute different values 

Plot electrodes and bands in a brain pic

In [164]:

def scatterPlotBrain ( resultToPlot, brainPath, plotTitle, size ):

    electrodes = [ 'F3', 'C3', 'Fz', 'Cz', 'F4', 'C4']
    bandPower = ['Theta2+Alpha1', 'Theta', 'Alpha', 'Beta_Global', 'Gamma']

    fig = plt.figure()
    ax = fig.add_subplot(111)

    #Plot electrodes name
    electrodesX = (300, 300, 500, 500, 700, 700 )
    electrodesY = (300,850,450,700, 300, 850)
    electrodesArea = (15000, 15000, 15000, 15000, 15000)
    for i, txt in enumerate(electrodes):
        ax.text(electrodesX[i],electrodesY[i]-100, txt)

    #Labels
    colors = ['b', 'c', 'y', 'm', 'r']
    recs = []
    for i in range(0,len(colors)):
        recs.append(mpatches.Circle((0, 0), 0.1, color= colors[i]))
    plt.legend(recs, bandPower, loc = 'center right', fontsize=15)

    #Plot brain picture
    im = plt.imread( brainPath )
    implot = plt.imshow( im )

    #Plot electrode area
    electrodesScatterPlot = plt.scatter(electrodesX, electrodesY, s = electrodesArea, color = 'whitesmoke' )

    #Plot band powers per electrode
    y1 = (250,400,250, 800,650, 800)
    y2 = (300,450,300, 850,700, 850)
    y3 = (350,500,350, 900,750, 900)

    theta2Alpha1X = (250, 450, 650, 250, 450, 650 )
    theta2Alpha1Values = resultToPlot[0::5]
    theta2Alpha1Area = np.pi * theta2Alpha1Values.values**2*size

    electrodes = theta2Alpha1ScatterPlot = plt.scatter(theta2Alpha1X, y1, s = theta2Alpha1Area, color = colors[0])

    thetaX = (350, 550, 750, 350, 550, 750 )
    thetaValues = resultToPlot[1::5]
    thetaArea = np.pi * thetaValues.values**2*size
    thetaScatterPlot = plt.scatter(thetaX, y1, s = thetaArea, color = colors[1])

    alphaX = (300, 500, 700, 300, 500, 700 )
    alphaValues = resultToPlot[2::5]
    alphaArea = np.pi * alphaValues.values**2*size
    alphaScatterPlot = plt.scatter(alphaX, y2, s = alphaArea, color = colors[2])

    betaGlobalX = (250, 450, 650, 250, 450, 650 )
    betaGlobalValues = resultToPlot[3::5]
    betaGlobalArea = np.pi * betaGlobalValues.values**2*size
    betaGlobalScatterPlot = plt.scatter(betaGlobalX, y3, s = betaGlobalArea, color = colors[3])


    gammaX = (350, 550, 750, 350, 550, 750 )
    gammaValues = resultToPlot[4::5]
    gammaArea = np.pi * gammaValues.values**2*size
    gammaScatterPlot = plt.scatter(gammaX, y3, s = gammaArea, color = colors[4])

    plt.axis('off')
    plt.title( plotTitle,  fontsize=20 )
    plt.show()


In [142]:
#different values
patientsDF = load ( rawDataPath )
dataFrame = addLabel ( pathCSV, patientsDF ).copy()
dataFrame = dataFrame[ dataFrame['experiment'] == 'C' ]

electrodes = [ 'F3', 'C3', 'Fz', 'Cz', 'F4', 'C4']
bandPower = ['Theta2+Alpha1', 'Theta', 'Alpha', 'Beta_Global', 'Gamma']

label_names=[]
for i in electrodes:
    for j in bandPower:
        combination = i + '_(' + j + ')'
        label_names.append(combination)

cluster = dataFrame.cluster_Hierarchichal
dataFrame = normalizeData ( dataFrame [ label_names ] )
dataFrame['cluster_Hierarchichal'] = cluster
# Dataframe per label 
valuesDF_l1 = dataFrame[ dataFrame.cluster_Hierarchichal == 1 ]
valuesDF_l0 = dataFrame[ dataFrame.cluster_Hierarchichal == 0 ]
len_l1 = valuesDF_l1.shape[0]
len_l0 = valuesDF_l0.shape[0]
sum_l1 = valuesDF_l1[label_names].sum()
sum_l0 = valuesDF_l0[label_names].sum()
resultToPlot = np.absolute((sum_l1*len_l0 - sum_l0*len_l1)/(len_l1*len_l0) )

plotTitle = "Differences between labels - Experiment C"
size = 100
scatterPlotBrain ( resultToPlot, brainPath, plotTitle, size )

In [176]:
#Connectivity from correlation graph
patientsDF = load ( rawDataPath )
dataFrame = addLabel ( pathCSV, patientsDF ).copy()
dataFrame = dataFrame[ dataFrame['experiment'] == 'A' ]
dataFrame = dataFrame[ dataFrame.cluster_Hierarchichal == 0 ]

electrodes = [ 'F3', 'C3', 'Fz', 'Cz', 'F4', 'C4']
bandPower = ['Theta2+Alpha1', 'Theta', 'Alpha', 'Beta_Global', 'Gamma']

label_names=[]
for i in electrodes:
    for j in bandPower:
        combination = i + '_(' + j + ')'
        label_names.append(combination)

valuesDF = dataFrame [ label_names ]

# Dataframe per label 



corrDF = np.absolute(valuesDF.corr())
sumcorrDF = corrDF.sum()
total = sum(sumcorrDF)
resultToPlot = (sumcorrDF / total) * 100

plotTitle = "Connectivity for correlation Graph - Experiment A - Label Patient 0"

size = 10
scatterPlotBrain ( resultToPlot, brainPath, plotTitle, size )

