# Pearson correlation

In [0]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

!pip install pmdarima
!pip install statsmodels --upgrade

In [0]:
from google.colab import files
#uploaded = files.upload()

In [0]:
df = pd.read_csv('fallecidos.csv',encoding = "ISO-8859-1", index_col=0, header=None).T  # Lectura del fichero de entrada y transpuesta del mismo
df = df.rename(columns={'Fecha': 'Date'}) # Ajuste del nombre de las columnas
df['Date']= pd.to_datetime(df['Date']) # Conversión a tipo fecha
df.set_index('Date', inplace=True) # La columna Fechas como índice
df=df.apply(pd.to_numeric) # Hacer que los tipos de datos sean numéricos
df = df.sort_values('Date') # Ordenar en función de la fecha
df=df.interpolate('zero', fill_value=0, limit_direction='backward') # A 0 los primeros valores antes de un número
df=df.interpolate(method='linear', axis=0).ffill().bfill() # Valores intermedios los interpolas
df

In [0]:
!pip install dtw
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
sns.set_context('talk',font_scale=.8)

def pearson(df, col1,col2,do_print):
  overall_pearson_r = df.corr().iloc[df.columns.get_loc(col1),df.columns.get_loc(col2)]
  #print(f"Pandas computed Pearson r: {overall_pearson_r}")

  #r, p = stats.pearsonr(df.dropna()[col1], df.dropna()[col2])
  #print(f"Scipy computed Pearson r: {r} and p-value: {p}")

  if do_print==True:
    f,ax=plt.subplots(figsize=(14,3))
    df.rolling(window=30,center=True).median().plot(ax=ax)
    ax.set(xlabel='Frame',ylabel='Smiling evidence',title=f"Overall Pearson r = {np.round(overall_pearson_r,2)}");
  
    # Set window size to compute moving window synchrony.
    r_window_size = 120
    # Interpolate missing data.
    #df_interpolated = df.interpolate()
    # Compute rolling window synchrony
    rolling_r = df[col1].rolling(window=r_window_size, center=True).corr(df[col2])
    f,ax=plt.subplots(2,1,figsize=(14,6),sharex=True)
    df.rolling(window=30,center=True).median().plot(ax=ax[0])
    ax[0].set(xlabel='Frame',ylabel='Smiling Evidence')
    rolling_r.plot(ax=ax[1])
    ax[1].set(xlabel='Frame',ylabel='Pearson r')
    plt.suptitle("Smiling data and rolling window correlation")
  return overall_pearson_r

# Time Lagged Cross Correlation

In [0]:
def crosscorr(datax, datay, lag=0, wrap=False):
    """ Lag-N cross correlation. 
    Shifted data filled with NaNs 
    
    Parameters
    ----------
    lag : int, default 0
    datax, datay : pandas.Series objects of equal length

    Returns
    ----------
    crosscorr : float
    """
    if wrap:
        shiftedy = datay.shift(lag)
        shiftedy.iloc[:lag] = datay.iloc[-lag:].values
        return datax.corr(shiftedy)
    else: 
        return datax.corr(datay.shift(lag))
def timeLagged(df,col1,col2, do_print):
  d1 = df[col1]
  d2 = df[col2]
  seconds = 1
  fps = 1
  rs = [crosscorr(d1,d2, lag) for lag in range(-int(seconds*fps-1),int(seconds*fps))]
  offset = np.ceil(len(rs)/2)-np.argmax(rs)
  if do_print==True:
    f,ax=plt.subplots(figsize=(14,3))
    ax  .plot(rs)
    ax.axvline(np.ceil(len(rs)/2),color='k',linestyle='--',label='Center')
    ax.axvline(np.argmax(rs),color='r',linestyle='--',label='Peak synchrony')
    ax.set(title=f'Offset = {offset} frames\nS1 leads <> S2 leads',ylim=[.1,.31],xlim=[0,300], xlabel='Offset',ylabel='Pearson r')
    ax.set_xticklabels([int(item-150) for item in ax.get_xticks()]);
    plt.legend()

## Windowed time lagged cross correlation

In [0]:
def windowed(df,col1,col2):
  # Windowed time lagged cross correlation
  seconds = 5
  fps = 30
  no_splits = 20
  samples_per_split = df.shape[0]/no_splits
  rss=[]
  for t in range(0, no_splits):
      d1 = df[col1].loc[(t)*samples_per_split:(t+1)*samples_per_split]
      d2 = df[col2].loc[(t)*samples_per_split:(t+1)*samples_per_split]
      rs = [crosscorr(d1,d2, lag) for lag in range(-int(seconds*fps-1),int(seconds*fps))]
      rss.append(rs)
  rss = pd.DataFrame(rss)
  f,ax = plt.subplots(figsize=(10,5))
  sns.heatmap(rss,cmap='RdBu_r',ax=ax)
  ax.set(title=f'Windowed Time Lagged Cross Correlation',xlim=[0,300], xlabel='Offset',ylabel='Window epochs')
  ax.set_xticklabels([int(item-150) for item in ax.get_xticks()]);

  # Rolling window time lagged cross correlation
  seconds = 5
  fps = 10
  window_size = 10 #samples
  t_start = 0
  t_end = t_start + window_size
  step_size = 5
  rss=[]
  while t_end < len(df):
      d1 = df[col1].iloc[t_start:t_end]
      d2 = df[col2].iloc[t_start:t_end]
      rs = [crosscorr(d1,d2, lag, wrap=False) for lag in range(-int(seconds*fps-1),int(seconds*fps))]
      rss.append(rs)
      t_start = t_start + step_size
      t_end = t_end + step_size
  rss = pd.DataFrame(rss)

  f,ax = plt.subplots(figsize=(10,10))
  sns.heatmap(rss,cmap='RdBu_r',ax=ax)
  ax.set(title=f'Rolling Windowed Time Lagged Cross Correlation',xlim=[0,300], xlabel='Offset',ylabel='Epochs')
  ax.set_xticklabels([int(item-150) for item in ax.get_xticks()]);

# Dynamic Time Warping

In [0]:
from dtw import dtw,accelerated_dtw
def dinamicTime(df, col1, col2, do_print):
  d1 = df[col1].values#.interpolate().values
  d2 = df[col2].values#.interpolate().values
  d, cost_matrix, acc_cost_matrix, path = accelerated_dtw(d1,d2, dist='euclidean')  
  if do_print==True:
    plt.imshow(acc_cost_matrix.T, origin='lower', cmap='gray', interpolation='nearest')
    plt.plot(path[0], path[1], 'w')
    plt.xlabel('Subject1')
    plt.ylabel('Subject2')
    plt.title('DTW Minimum Path')
    plt.show()
  return d

The fact that signal is pulled towards Subject1 indicates that the signal from subject 2 is pulled towards subject1, indicating that subject 1 is the leader. 

# instantaneous phase synchrony

In [0]:
from scipy.signal import hilbert, butter, filtfilt
from scipy.fftpack import fft,fftfreq,rfft,irfft,ifft
import numpy as np
import seaborn as sns
import pandas as pd
import scipy.stats as stats
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a


def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = filtfilt(b, a, data)
    return y

def instantaneous(df, col1, col2, do_print):
  lowcut  = .01
  highcut = .5
  fs = 30.
  order = 1
  d1 = df[col1].interpolate().values
  d2 = df[col1].interpolate().values
  y1 = butter_bandpass_filter(d1,lowcut=lowcut,highcut=highcut,fs=fs,order=order)
  y2 = butter_bandpass_filter(d2,lowcut=lowcut,highcut=highcut,fs=fs,order=order)

  al1 = np.angle(hilbert(y1),deg=False)
  al2 = np.angle(hilbert(y2),deg=False)
  phase_synchrony = 1-np.sin(np.abs(al1-al2)/2)
  N = len(al1)
  print(d1)
  print(d2)
  print(y1)
  print(y1)
  print(al1)
  print(al2)
  # Plot results
  if do_print==True:
    f,ax = plt.subplots(3,1,figsize=(14,7),sharex=True)
    ax[0].plot(y1,color='r',label='y1')
    ax[0].plot(y2,color='b',label='y2')
    ax[0].legend(bbox_to_anchor=(0., 1.02, 1., .102),ncol=2)
    ax[0].set(xlim=[0,N], title='Filtered Timeseries Data')
    ax[1].plot(al1,color='r')
    ax[1].plot(al2,color='b')
    ax[1].set(ylabel='Angle',title='Angle at each Timepoint',xlim=[0,N])
    phase_synchrony = 1-np.sin(np.abs(al1-al2)/2)
    ax[2].plot(phase_synchrony)
    ax[2].set(ylim=[0,1.1],xlim=[0,N],title='Instantaneous Phase Synchrony',xlabel='Time',ylabel='Phase Synchrony')
    plt.tight_layout()
    plt.show()

**Pearson Coefficient**


In [0]:
pearson_dict = dict(pearson_value=0)
for i in df.columns:
#i='Ciudad Real'
  for j in df.columns:
    if(i!=j):
      pearson_val = pearson(df,i,j,False)
      pearson_val= np.abs(pearson_val)
      if(pearson_dict['pearson_value'] < pearson_val):
        pearson_dict= dict(pearson_value=pearson_val, i_value=i, j_value=j)
      
print(pearson_dict)


#windowed(df,col1,col2)


**Dynamic Time Warping**

In [0]:
dtw_dict=dict(dtw_value=0)
for i in df.columns:
#i='Ciudad Real'
  for j in df.columns:
    if(i!=j):
      dtw_val = dinamicTime(df,col1,col2,False)
      if(dtw_dict['dtw_value'] < dtw_val):
        dtw_dict= dict(dtw_value=dtw_val, i_value=i, j_value=j)

print(dtw_dict)

**Instantaneous Phase Synchrony** NO FUNCIONA

In [0]:
col1='Ciudad Real'
col2='Toledo'
instantaneous(df,col1,col2,False)

**Time Lagged** NO FUNCIONA

In [0]:
col1='Albacete'
col2='Toledo'
timeLagged(df,col1,col2,True)

**Windowed time lagged Cross Correlation** NO FUNCIONA

In [0]:
col1='Albacete'
col2='Toledo'
windowed(df,col1,col2)