In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from numpy.fft import fft, ifft
from scipy.signal import periodogram
import ordpy 
import statsmodels.api as sm
from scipy.signal import butter,filtfilt
from matplotlib.pyplot import figure
from os import listdir
from os.path import isfile, join
from statsmodels.tsa.stattools import adfuller, pacf
from tqdm.notebook import tqdm
import math
from scipy import interpolate

In [None]:
#list all the data files and its path
onlyfiles = [f for f in listdir('../Data/data_file') if isfile(join('../Data/data_file', f))]
for i in range(len(onlyfiles)):
    path = '../Data/data_file/' + onlyfiles[i]
files = path
files


# Preprocessing Functions
For windowing, I realise the reaction time is always the longest in the first trial. So I think we could window more (start from 6 seconds) for the first trial?

In [None]:

def window_value(rating, time): #use values instead of timing to window
    rating_windowed_list = []
    time_windowed_list = []
    failed_index_list = [] #capture error in data logging
    #window the trials by finding the first time that they have changed their rating
    for i in range(len(rating)):
        if len(rating) > 1: #catch the whole entry being NaN
            #catch nan, catch hitting middle of confidence slider, catch ending time
            #i.e. this condidtion is it taps on the middle of the confidence slider
            if len(rating[i]) > 1 and time[i][0] < 0.1 and time[i][-1] > 10:
                res_unchanged = next(x for x, val in enumerate(rating[i]) if val != rating[i][0])
                rating_windowed = rating[i][res_unchanged:len(rating[i])]
                time_windowed = time[i][res_unchanged:len(rating[i])]
                #replace trials with error in data logging with nan
                #for trials that has a very long reaction time (about 10 seconds), filter it away
                if time_windowed[0] < 10 and len(time_windowed) > 100:
                    rating_windowed_list.append(rating_windowed)
                    time_windowed_list.append(time_windowed)
                else:
                    rating_windowed_list.append([np.NaN])
                    time_windowed_list.append([np.NaN])
                    failed_index_list.append(i)
            #condition: no nan but tap on the slider (0.1s being the reaction time)
            elif len(rating[i]) > 1 and time[i][0] > 0.1 and time[i][-1] > 10:
                rating_windowed = rating[i] #don't window as it starts recording only when click the slider
                time_windowed = time[i]
                if time_windowed[0] < 10 and len(time_windowed) > 100:
                    rating_windowed_list.append(rating_windowed)
                    time_windowed_list.append(time_windowed)
                else:
                    rating_windowed_list.append([np.NaN])
                    time_windowed_list.append([np.NaN])
                    failed_index_list.append(i)
            else:
                rating_windowed_list.append([np.NaN])
                time_windowed_list.append([np.NaN])
                failed_index_list.append(i)
        else:
            rating_windowed_list.append([np.NaN])
            time_windowed_list.append([np.NaN])


    
        
    return rating_windowed_list, time_windowed_list, failed_index_list


        

def exponential_smoothing(rating, alpha):
    lowpass_list = [] #list of all 30 trials
    for i in range(len(rating)):
        if len(rating.iloc[i]) > 1:
            result = [rating.iloc[i][0]] # first value is same as series
            for n in range(1, len(rating.iloc[i])):
                result.append(alpha * rating.iloc[i][n] + (1 - alpha) * result[n-1])
            lowpass_list.append(result)
        else:
            lowpass_list.append([np.nan])
    return lowpass_list


def downsample(rating, time):
    rating_downsample_list = []
    time_downsample_list = []
    for i in range(len(rating)):
        if len(rating[i]) > 1:
            original_timing = time.iloc[i]
            original_rating = rating[i]
            flinear = interpolate.interp1d(original_timing, original_rating)
            new_timing = np.arange(round_up(original_timing[0],1),round_down(original_timing[-1],1),0.05)
            ylinear = flinear(new_timing)
            rating_downsample_list.append(ylinear)
            time_downsample_list.append(new_timing)
        else:
            rating_downsample_list.append([np.nan])
            time_downsample_list.append([np.nan])
    return rating_downsample_list, time_downsample_list

def round_up(n, decimals=0):
    multiplier = 10 ** decimals
    return math.ceil(n * multiplier) / multiplier
def round_down(n, decimals=0):
    multiplier = 10 ** decimals
    return math.floor(n * multiplier) / multiplier

        
    

# Analysis Functions

In [None]:
#mean, mean square, root mean square, variance, standard deviation
def mean(rating):
    mean_list = []
    for i in range(len(rating)):
        if len(rating[i]) > 1:
            mean_list.append(np.mean(rating[i]))
        else:
            mean_list.append(np.NaN)
    return mean_list

def mean_square(rating, time): #strength/average power of the signal
    mean_square_list = []
    for i in range(len(rating)):
        if len(rating[i]) > 1:
            mean_square_list.append(np.sum(np.array(rating[i]) ** 2)/(max(time[i]) - min(time[i])))
        else:
            mean_square_list.append(np.NaN)
    return mean_square_list

def rms(mean_square_list):
    return np.sqrt(mean_square_list)

def variance(rating,time):
    variance_list = []
    for i in range(len(rating)):
        if len(rating[i]) > 1:
            diff_mean_square_sum = np.sum((rating[i] - np.mean(rating[i]))**2)
            variance = diff_mean_square_sum/(max(time[i]) - min(time[i]))
            variance_list.append(variance)
        else:
            variance_list.append(np.NaN)
    return variance_list

def std(variance_list):
    return np.sqrt(variance_list)
  


#frequency np.fft

def fft_data(rating, time):
    fft_weights_list = []
    fft_freq_list = []
    for i in range(len(rating)):
        if len(rating[i]) > 1:
            fft_weights = fft(rating[i])
            N = len(fft_weights)
            n = np.arange(N)
            T = time[i][-1]-time[i][0]
            fft_freq = n/T
            fft_weights_list.append(fft_weights)
            fft_freq_list.append(fft_freq)
        else:
            fft_weights_list.append([np.NaN])
            fft_freq_list.append([np.NaN])
    return fft_weights_list, fft_freq_list


#power-spectrum
def power_spectrum(rating,time):
    psd_freq_list = []
    psd_list = []
    for i in range(len(rating)):
        if len(rating[i]) > 1:
            N = len(rating[i])
            T = time[i][-1]
            f, S = periodogram(rating[i], N/T, scaling = 'density')
            psd_freq_list.append(f)
            psd_list.append(S)
        else:
            psd_freq_list.append([np.NaN])
            psd_list.append([np.NaN])
    return psd_freq_list, psd_list


#permutation entropy - complexity
def permutation_entropy(rating):
    entropy_list = []
    stat_complexity_list = []
    for i in range(len(rating)):
        if len(rating[i]) > 1:
            entropy, stat_complexity = ordpy.complexity_entropy(rating[i])
            entropy_list.append(entropy)
            stat_complexity_list.append(stat_complexity)
        else:
            entropy_list.append(np.NaN)
            stat_complexity_list.append(np.NaN)
    return entropy_list, stat_complexity_list

#autocorrelation
def autocorrelation(rating, time):
    acorr_list = []
    time_lag_list = []
    for i in range(len(rating)):
        if len(rating[i]) > 1:
            acorr = sm.tsa.acf(rating[i], nlags = int(len(rating[i])))
            acorr_list.append(acorr)
        else:
            acorr_list.append([np.NaN, np.NaN, np.NaN])
    for i in range(len(time)):
        if len(time[i]) > 1:
            time_lag = np.array(time[i]) - np.array(time[i][0])
            time_lag_list.append(time_lag)
        else:
            time_lag_list.append([np.NaN, np.NaN, np.NaN])
    return acorr_list, time_lag_list

#check stationarity
def adfuller_test(rating):
    adfuller_list = []
    stationary_list = []
    for i in range(len(rating)):
        if len(rating[i]) > 1:
            result = adfuller(rating[i])
            adfuller_list.append(result)
            if result[1] <= 0.05:
                stationary_list.append(1)
            else:
                stationary_list.append(0)
        else:
            adfuller_list.append(np.nan)
            stationary_list.append(np.nan)
    labels = ['ADF Test Statistic','p-value','#Lags Used','#Observation Used']
    return adfuller_list, labels, stationary_list
    


In [None]:
files

In [None]:
data = pd.read_pickle(files)
data

# Put continuous pain data into correct form & visualise

In [None]:
for i in range(len(data)):
    plt.scatter(data['time'].iloc[i], data['rating'].iloc[i], s = 0.05)

In [None]:
rating_all = []
for i in range(0,len(data),3):
    rating_all.append(data['rating'].iloc[i:i+3])
timing_all = []
for i in range(0,len(data),3):
    timing_all.append(data['time'].iloc[i:i+3])

# Pre-process the data
## exponential smoothing

In [None]:
lowpass_list_all = []
for i in range(len(participant_list)):
    lowpass_list_all.append(exponential_smoothing(rating_all[i], alpha = 0.1))

## Downsample the data

In [None]:
#with the timing method
rating_downsample_list_all = []
time_downsample_list_all = []



for j in tqdm(range(len(lowpass_list_all))):
    rating_downsample_list, time_downsample_list = downsample(lowpass_list_all[j], timing_all[j])
    rating_downsample_list_all.append(rating_downsample_list)
    time_downsample_list_all.append(time_downsample_list)

    


    

In [None]:
for i in range(len(rating_downsample_list_all)):
    plt.figure()
    plt.subplot(1,2,1)
    plt.scatter(timing_all[i].iloc[0],rating_all[i].iloc[0], s = 0.05)
    plt.scatter(timing_all[i].iloc[1],rating_all[i].iloc[1], s = 0.05)
    plt.scatter(timing_all[i].iloc[2],rating_all[i].iloc[2], s = 0.05)
    plt.xlabel('time (s)')
    plt.ylabel('rating')
    plt.title('original')
    plt.subplot(1,2,2)
    plt.scatter(time_downsample_list_all[i][0],rating_downsample_list_all[i][0], s = 0.05)
    plt.scatter(time_downsample_list_all[i][1],rating_downsample_list_all[i][1], s = 0.05)
    plt.scatter(time_downsample_list_all[i][2],rating_downsample_list_all[i][2], s = 0.05)
    plt.xlabel('time (s)')
    plt.ylabel('rating')
    plt.title('Downsampled to 20 Hz')
    plt.suptitle(f'participant {participant_list[i]}')
    plt.tight_layout()

# Analysis
## common statistical parameters

In [None]:
mean_list_all = []
mean_square_list_all = []
rms_list_all = []
variance_list_all = []
std_list_all = []

for j in range(len(rating_downsample_list_all)):
    mean_list = mean(rating_downsample_list_all[j])
    mean_square_list = mean_square(rating_downsample_list_all[j], time_downsample_list_all[j])
    rms_list = rms(mean_square_list)
    variance_list= variance(rating_downsample_list_all[j], time_downsample_list_all[j])
    std_list = std(variance_list)
    mean_list_all.append(mean_list)
    mean_square_list_all.append(mean_square_list)
    rms_list_all.append(rms_list)
    variance_list_all.append(variance_list)
    std_list_all.append(std_list)


### Mean

In [None]:
x = [1,2,3]
for i in range(len(mean_list_all)):
    plt.figure()
    plt.scatter(x, mean_list_all[i])
    plt.ylim(0,10)
    plt.xlabel('trial')
    plt.ylabel('mean value')
    plt.title(f'Mean value of {participant_list[i]}')
    plt.xticks([1,2,3])

#### Mean distribution

In [None]:
mean_overall = []
for i in range(len(mean_list_all)):
    mean_overall.append(np.mean(mean_list_all[i]))
plt.hist(mean_overall, bins = 10)
plt.xlabel('rating')
plt.ylabel('frequency')
plt.title('Disbtribution of all the mean values of all participants in Day 1')
plt.show()

### Mean square

In [None]:
x = [1,2,3]
for i in range(len(mean_square_list_all)):
    plt.figure()
    plt.scatter(x, mean_square_list_all[i])
    plt.xlabel('trial')
    plt.ylabel('mean square value')
    plt.title(f'Mean square value of {participant_list[i]}')
    plt.xticks([1,2,3])

### Variance

In [None]:
x = [1,2,3]
for i in range(len(variance_list_all)):
    plt.figure()
    plt.scatter(x, variance_list_all[i])
    plt.xlabel('trial')
    plt.ylabel('variance value')
    plt.title(f'variance value of {participant_list[i]}')
    plt.xticks([1,2,3])

#### Variance distribution

In [None]:
def var_all(rating):
    rating_collected = np.hstack([rating[0], rating[1], rating[2]])
    return np.nanvar(rating_collected)

In [None]:
var_combined_list_all = []
for j in range(len(rating_downsample_list_all)):
    var_combined_list = var_all(rating_downsample_list_all[j])
    var_combined_list_all.append(var_combined_list)


In [None]:
plt.hist(var_combined_list_all, bins = 20)
plt.xlabel('variance')
plt.ylabel('frequency')
plt.title('variance distribution of participants on Day 1')
plt.show()

## Frequency
### FFT

In [None]:
(time_downsample_list_all[j][0][-1] - time_downsample_list_all[j][0][0])/len(time_downsample_list_all[j][0])

In [None]:
fft_weights_list_all = []
fft_freq_list_all = []

for j in range(len(rating_downsample_list_all)):
   
        plt.figure()
        fft_weights_list, fft_freq_list = fft_data(rating_downsample_list_all[j], time_downsample_list_all[j])
        fft_weights_list_all.append(fft_weights_list)
        fft_freq_list_all.append(fft_freq_list)
        for i in range(len(rating_downsample_list_all[j])):
            plt.plot(fft_freq_list[i], np.abs(fft_weights_list[i]))
        plt.xlim(0,0.15)
        plt.xlabel('Frequency (Hz)')
        plt.ylabel('Weight')
        plt.title(f'Frequency weights of the signal with FFT for \n participant {participant_list[j]}')
        plt.yscale('log')
        plt.ylim(0.1,10**4.5)


In [None]:
for i in range(len(fft_freq_list_all)):
    for j in range(3):
        plt.plot(fft_freq_list_all[i][j], np.abs(fft_weights_list_all[i][j]))
plt.xlim(0,0.15)
plt.ylim(0.1,10**4.5)
plt.xlabel('Frequency (Hz)')
plt.ylabel('Weight')
plt.title('FFT of all the participants')
plt.yscale('log')

### Power spectrum

In [None]:
psd_freq_list_all = []
psd_list_all=[]

for j in range(len(rating_downsample_list_all)):
    plt.figure()
    psd_freq_list, psd_list = power_spectrum(rating_downsample_list_all[j], time_downsample_list_all[j])
    psd_freq_list_all.append(psd_freq_list)
    psd_list_all.append(psd_list)
    for i in range(len(lowpass_list_all[j])):
        plt.plot(psd_freq_list[i], np.abs(psd_list[i]))
    plt.xlim(0,0.15)
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Power Spectral Density')
    plt.title(f'Power Spectral Density of the pain signal for \n participant {participant_list[j]}')


#### power spectrum all

In [None]:
for i in range(len(psd_freq_list_all)):
    for j in range(3):
        plt.plot(psd_freq_list_all[i][j], np.abs(psd_list_all[i][j]))
plt.xlim(0,0.15)
plt.xlabel('Frequency (Hz)')
plt.ylabel('Weight')
plt.title('Power spectrum of all the participants')

## Permutation entropy

In [None]:
entropy_list_all = []
stat_complexity_list_all = []
for i in range(len(rating_downsample_list_all)):
    plt.figure()
    entropy_list, stat_complexity_list = permutation_entropy(rating_downsample_list_all[i])
    entropy_list_all.append(entropy_list)
    stat_complexity_list_all.append(stat_complexity_list)
    plt.scatter(range(1,len(entropy_list)+1), entropy_list)
    plt.title(f'Permutation entropy of the data for \n participant {participant_list[i]}')
    plt.xlabel('trial number')
    plt.ylabel('permutation entropy')
    plt.ylim(-0.1,1.1)
    plt.xticks([1,2,3])

#### Permutation entropy all

In [None]:
for i in range(len(entropy_list_all)):
    plt.scatter(range(1,len(entropy_list_all[0])+1), entropy_list_all[i])
plt.xlabel('trial number')
plt.ylabel('permutation entropy')
plt.ylim(-0.1,1.1)
plt.xticks([1,2,3])
plt.title('Permutation entropy for all participants for each trial')
plt.show()

In [None]:
entropy_overall = []
for i in range(len(entropy_list_all)):
    entropy_overall.append(np.mean(entropy_list_all[i]))
plt.hist(entropy_overall)
plt.xlabel('permutation entropy')
plt.ylabel('frequency')
plt.title('Distribution of Permutation entropy for all participants')
plt.show()

## Autocorrelation

In [None]:
acorr_list_all = []
time_lag_list_all = []
for j in range(len(rating_downsample_list_all)):
        
    plt.figure()
    acorr_list, time_lag = autocorrelation(rating_downsample_list_all[j], time_downsample_list_all[j])
    acorr_list_all.append(acorr_list)
    time_lag_list_all.append(time_lag)

    
    for i in range(3):
        plt.plot(time_lag[i], acorr_list[i])
    plt.xlabel('time lag (s)')
    plt.ylabel('autocorrelation')
    plt.title(f'Autocorrelation function for \n participant {participant_list[j]}')
    plt.ylim(-1.1,1.1)

In [None]:
for i in range(len(time_lag_list_all)):
    for j in range(3):
        plt.plot(time_lag_list_all[i][j], acorr_list_all[i][j])
plt.xlabel('time lag (s)')
plt.ylabel('autocorrelation function')
plt.title('Autocorrelation function of all the participants')

## Partial Autocorrelation

In [None]:
def cal_pacf(rating):
    pacf_list = []
    for i in range(len(rating)):
        if len(rating[i]) > 1 and rating[i][-1] != rating[i][0]:
            pacf_values = pacf(rating[i])
            pacf_list.append(pacf_values)
        else:
            pacf_list.append([np.NaN, np.NaN, np.NaN])
    return pacf_list

In [None]:
participant_list[25]

In [None]:
time_lag_list_pacf = np.arange(0,100,0.05)
pacf_list_all = []
for j in tqdm(range(len(rating_downsample_list_all))):
    plt.figure()
    pacf_list = cal_pacf(rating_downsample_list_all[j])
    for i in range(3):
        plt.plot(time_lag_list_pacf[:len(pacf_list[i])],pacf_list[i])
        plt.ylabel('PACF value')
        plt.xlabel('time lag (s)')
        plt.title(f'PACF of {participant_list[j]}')
    pacf_list_all.append(pacf_list)


#### PACF of all

In [None]:
for i in range(len(pacf_list_all)):
    for j in range(3):
        plt.plot(time_lag_list_pacf[:len(pacf_list_all[i][j])], pacf_list_all[i][j])
#plt.yscale('log')
plt.xlabel('time lag (s)')
plt.ylabel('PACF value')
plt.title('PACF of all the participants')

## Check stationarity

In [None]:
adfuller_list_all = []
stationary_list_all = []
for j in range(len(rating_downsample_list_all)):
    adfuller_list, labels, stationary_list = adfuller_test(rating_downsample_list_all[j])
    adfuller_list_all.append(adfuller_list)
    stationary_list_all.append(stationary_list)



## Time series model
### ARIMA model

In [None]:
from pmdarima.arima import auto_arima
def calcsmape(actual, forecast):
    return 1/len(actual) * np.sum(2 * np.abs(forecast-actual) / (np.abs(actual) + np.abs(forecast)))

In [None]:
TEST_SIZE = 100
smape_all = []
model_all = []
for i in tqdm(range(len(rating_downsample_list_all))):
    smape_participant = []
    model_participant = []
    for j in range(3):
        data_trial = pd.Series(rating_downsample_list_all[i][j])
        train, test = data_trial[:-TEST_SIZE], data_trial[-TEST_SIZE:]

        x_train, x_test = np.array(range(train.shape[0])), np.array(range(train.shape[0], data_trial.shape[0]))
        train.shape, x_train.shape, test.shape, x_test.shape
        model = auto_arima(train, start_p=1, start_q=1,
                            test='adf',
                            max_p=5, max_q=5,
                            m=1,             
                            d=1,          
                            seasonal=False,   
                            start_P=0, 
                            D=None, 
                            trace=True,
                            error_action='ignore',  
                            suppress_warnings=True, 
                            stepwise=True)
        model_participant.append(model)
        prediction, confint = model.predict(n_periods=TEST_SIZE, return_conf_int=True)
        cf= pd.DataFrame(confint)
        prediction_series = pd.Series(prediction, index = test.index)
        fig1, ax1 = plt.subplots(1, 1, figsize=(15, 5))
        ax1.plot(data_trial, label = 'original')
        ax1.plot(prediction_series, label = 'forecasting')
        ax1.fill_between(prediction_series.index,
                        cf[0],
                        cf[1],color='grey',alpha=.3)
        ax1.set_ylim(0,10)
        ax1.set_title(f'participant id {participant_list[i]}, day 1, trial {j+1}')
        ax1.legend()
        smape=calcsmape(test,prediction)
        smape_participant.append(smape)
    smape_all.append(smape_participant)
    model_all.append(model_participant)

### LSTM

In [None]:
from sklearn.preprocessing import StandardScaler
data_trial = pd.DataFrame(data_trial)
# Fit scalers
#scalers = {}
#scalers = StandardScaler().fit(data_trial.values.reshape(-1, 1))

object= StandardScaler()
 
 
# standardization 
scale = object.fit_transform(data_trial) 

# Save the descriptive analysis

In [None]:
data_dict = {'PID': participant_list, 'mean': mean_list_all, 'mean overall': mean_overall, 'mean square': mean_square_list_all, 'variance': variance_list_all, 'variance overall': var_combined_list_all, 'standard deviation': std_list_all, 'FFT frequency': fft_freq_list_all, 'FFT weights': fft_weights_list_all, 'power spectrum frequency': psd_freq_list_all, 'power spectrum weights': psd_list_all, 'permutation entropy': entropy_list_all, 'permutation entropy overall': entropy_overall, 'autocorrelation': acorr_list_all, 'PACF': pacf_list_all, 'stationary': stationary_list_all, 'ARIMA model': model_all, 'SMAPE': smape_all}
data_descriptive = pd.DataFrame(data_dict)
data_descriptive.to_pickle('../Data/descriptive_data/DescriptiveDataARIMAmodel.pkl')

## Try analysis
### Mean pain VS PE

In [None]:
mean_entropy = zip(mean_overall, entropy_overall)
x = []; y=[]
for point in list(mean_entropy):
   x.append(point[0])
   y.append(point[1])
a, b = np.polyfit(x, y, 1)
x_bestfit = np.arange(0,11,0.1)
y_bestfit = a*x_bestfit+b
plt.plot(x_bestfit, y_bestfit, color = 'green', linestyle = '--')
plt.scatter(x,y)
plt.xlabel('Mean')
plt.ylabel('permutation entropy')
plt.xlim(0,10)
plt.ylim(0,1)
plt.text(6, 0.2, 'y = ' + '{:.2f}'.format(b) + ' {:.2f}'.format(a) + 'x', size=14)
plt.title('permutation entropy against mean')

### MSK scores and Mean

In [None]:
#list all the data files and its path
questionnaire = []
onlyfiles = [f for f in listdir('../Data/questionnaire') if isfile(join('../Data/questionnaire', f))]
for i in range(len(onlyfiles)):
    path = '../Data/questionnaire/' + onlyfiles[i]
    questionnaire.append(path)

questionnaire
for item in questionnaire:
    df_questionnaire = pd.read_csv(item)


In [None]:
index = []
for id in participant_list:
    index.append(np.where(df_questionnaire.PROLIFIC_PID == id)[0][0])

In [None]:
df_questionnaire_index = df_questionnaire.iloc[index]
msk_scores = df_questionnaire_index['Score'].values

In [None]:
mean_mskscores = zip(mean_overall, msk_scores)
x = []; y=[]
for point in list(mean_mskscores):
   x.append(point[0])
   y.append(point[1])

a, b = np.polyfit(x, y, 1)
x_bestfit = np.arange(0,11,0.1)
y_bestfit = a*x_bestfit+b
plt.plot(x_bestfit, y_bestfit, color = 'green', linestyle = '--')
plt.scatter(x,y)
plt.xlabel('Mean')
plt.ylabel('MSK scores')
plt.xlim(0,10)
plt.ylim(0,50)
plt.text(0.5, 10, 'y = ' + '{:.2f}'.format(b) + ' {:.2f}'.format(a) + 'x', size=14)
plt.title('MSK scores against mean')

### MSK scores and Permutation Entropy

In [None]:
pe_mskscores = zip(entropy_overall, msk_scores)
x = []; y=[]
for point in list(pe_mskscores):
   x.append(point[0])
   y.append(point[1])
a, b = np.polyfit(x, y, 1)
x_bestfit = np.arange(0,1.2,0.1)
y_bestfit = a*x_bestfit+b
plt.plot(x_bestfit, y_bestfit, color = 'green', linestyle = '--')
plt.scatter(x,y)
plt.xlabel('Permutation Entropy')
plt.ylabel('MSK scores')
plt.xlim(0,1)
plt.ylim(0,50)
plt.text(0.5, 10, 'y = ' + '{:.2f}'.format(b) + ' + {:.2f}'.format(a) + 'x', size=14)
plt.title('MSK scores against permutation entropy')

### MSK scores and variance

In [None]:

var_mskscores = zip(var_combined_list_all, msk_scores)
x = []; y=[]
for point in list(var_mskscores):
   x.append(point[0])
   y.append(point[1])
a, b = np.polyfit(x, y, 1)
x_bestfit = np.arange(0,max(var_combined_list_all)+1,0.1)
y_bestfit = a*x_bestfit+b
plt.plot(x_bestfit, y_bestfit, color = 'green', linestyle = '--')
plt.scatter(x,y)
plt.xlabel('Variance')
plt.ylabel('MSK scores')
#plt.xlim(0,1)
plt.ylim(0,50)
plt.text(0.5, 10, 'y = ' + '{:.2f}'.format(b) + ' + {:.2f}'.format(a) + 'x', size=14)
plt.title('MSK scores against variance')

### variance and mean

In [None]:

var_mean = zip(var_combined_list_all, mean_overall)
x = []; y=[]
for point in list(var_mean):
   x.append(point[0])
   y.append(point[1])
a, b = np.polyfit(x, y, 1)
x_bestfit = np.arange(0,max(var_combined_list_all)+1,0.1)
y_bestfit = a*x_bestfit+b
plt.plot(x_bestfit, y_bestfit, color = 'green', linestyle = '--')
plt.scatter(x,y)
plt.xlabel('Variance')
plt.ylabel('Mean')
#plt.xlim(0,1)
plt.ylim(0,50)
plt.text(0.5, 10, 'y = ' + '{:.2f}'.format(b) + ' {:.2f}'.format(a) + 'x', size=14)
plt.title('mean against variance')

### variance and permutation entropy

In [None]:

var_pe = zip(var_combined_list_all, entropy_overall)
x = []; y=[]
for point in list(var_pe):
   x.append(point[0])
   y.append(point[1])
a, b = np.polyfit(x, y, 1)
x_bestfit = np.arange(0,max(var_combined_list_all)+1,0.1)
y_bestfit = a*x_bestfit+b
plt.plot(x_bestfit, y_bestfit, color = 'green', linestyle = '--')
plt.scatter(x,y)
plt.xlabel('Variance')
plt.ylabel('Permutation Entropy')
#plt.xlim(0,1)
plt.ylim(0,1)
plt.text(6, 0.2, 'y = ' + '{:.2f}'.format(b) + ' {:.2f}'.format(a) + 'x', size=14)
plt.title('permutation entropy against variance')

## Analysing the response of prediction and confidence

In [None]:
#list all the data files and its path
onlyfiles = [f for f in listdir('../Data/data_file') if isfile(join('../Data/data_file', f))]
for i in range(len(onlyfiles)):
    path = '../Data/data_file/' + onlyfiles[i]
files = path
files


In [None]:
prediction_all = []
confidence_all= []

data = pd.read_pickle(files)

prediction_all = data['prediction'].unique()
confidence_all = data['confidence'].unique()



In [None]:
plt.hist(prediction_all, bins = 10)
plt.xlabel('prediction')
plt.ylabel('frequency')
plt.title('Distribution of prediction for all participants')
plt.show()

In [None]:
plt.hist(confidence_all, bins = 10)
plt.xlabel('confidence')
plt.ylabel('frequency')
plt.title('Distribution of confidence for all participants')
plt.show()

o	Evaluate difference between the PE values statistically

o	Pearson’s correlation coefficient to test the relationship between the PE values and pain levels?
