In [122]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
import seaborn as sns
import pickle
#from scipy import stats
from scipy import spatial
from scipy.spatial import distance
# importing functions from bootstrapped
import bootstrapped.bootstrap as bs
import bootstrapped.compare_functions as bs_compare
import bootstrapped.stats_functions as bs_stats
import scipy.stats as st

In [2]:
baseline = np.load('/Users/jenniferthem/Masterthesis_Data/January2020/centroid_dist_baseline.npy',allow_pickle=True)
ket = np.load('/Users/jenniferthem/Masterthesis_Data/January2020/centroid_dist_ketamine.npy',allow_pickle=True)
placebo = np.load('/Users/jenniferthem/Masterthesis_Data/January2020/centroid_dist_placebo.npy',allow_pickle=True)

In [3]:
b_df = pd.DataFrame(baseline)
k_df = pd.DataFrame(ket)
p_df = pd.DataFrame(placebo)

In [4]:
b_arr = b_df[0].tolist()
k_arr = k_df[0].tolist()
p_arr = p_df[0].tolist()

In [106]:
b_trim =st.trim_mean(b_arr, 0.1)
k_trim =st.trim_mean(k_arr, 0.1)
b_trim

1.3864509482211596

In [19]:
np.mean(b_arr)

1.9338832772219734

Comparing ketamine and baseline: distances all voxels to their respective centroid

### Bootstrapping

In [6]:
b = np.asarray(b_arr)
k = np.asarray(k_arr)
p = np.asarray(p_arr)

##### Goal: Compare the differences between ketamine and baseline to the differences between placebo and baseline

**First**: calculate Ketamine-baseline and placebo-baseline

**Second**: Subtract these two

In [13]:
# distance of conditions: between distances voxels to respective centroid 
dist_k_b = k-b
dist_p_b = p-b

In [108]:
np.amax(dist_k_b)

11.812368558517523

In [79]:
dist_p_b

array([-0.25987737,  0.18475062, -0.27445611, ..., -0.14489161,
       -0.18755927, -0.1450823 ])

In [81]:
dist = dist_k_b - dist_p_b
dist.mean()

0.5011597735001668

In [102]:
dist_k_b_df = pd.DataFrame(dist_k_b)
dist_p_b_df = pd.DataFrame(dist_p_b)
df_k = pd.concat([dist_k_b_df, roi_labels_df], axis=1)
df_k.columns=['Ketamine-Baseline', 'ROI']
df_p = pd.concat([dist_p_b_df, roi_labels_df], axis=1)
df_p.columns=['Placebo-Baseline', 'ROI']
df_com= pd.concat([df_k,df_p], axis=1)

In [118]:
%matplotlib notebook
keys = ['Ketamine-Baseline', 'Placebo-Baseline']
# distplot: Flexibly plot a univariate distribution of observations - Probability density function
# Iterate through the five airlines
for curr_roi in keys:
    # Subset to the airline
    subset = df_com[curr_roi]
    #plt.figure(figsize=(20,10))
    #fig = plt.subplots(figsize=(20,15))
    # Draw the density plot
    sns.distplot(subset, hist = False, kde = True,
                 kde_kws = {'linewidth': 1},
                 label = curr_roi)
    
# Plot formatting
#fig.set_size_inches(18.5, 10.5)
#fig.subplots(figsize=(20,15))
plt.legend(prop={'size': 8}, title = 'Condition')
plt.title('')
plt.xlabel('Distance')
plt.ylabel('Density')
plt.xlim(-10, 10)
plt.savefig('/Users/jenniferthem/Masterthesis_Data/January2020/Density_k_p.png', dpi=250)

<IPython.core.display.Javascript object>

In [116]:
sns.distplot(dist, hist = False, kde = True,
                 kde_kws = {'linewidth': 1})

plt.title('(Ketamine-Baseline)-(Placebo-Baseline)')
plt.xlabel('Distance')
plt.ylabel('Probibility density')
plt.xlim(-10, 10)
plt.axvline(linewidth=1, linestyle='--')
plt.savefig('/Users/jenniferthem/Masterthesis_Data/January2020/Density_dist_comb.png', dpi=250)

<IPython.core.display.Javascript object>

**Bootstrapping**

In [71]:
bsr_k_b = bs.bootstrap(dist_k_b, stat_func=bs_stats.mean, alpha=0.05, num_iterations=50000, return_distribution=True)

In [72]:
bsr_p_b = bs.bootstrap(dist_p_b, stat_func=bs_stats.mean, alpha=0.05, num_iterations=50000, return_distribution=True)

In [73]:
bsr = bs.bootstrap(dist, stat_func=bs_stats.mean, alpha=0.05, num_iterations=50000, return_distribution=True)

In [82]:
bsr.mean()

0.5012097738406607

In [74]:
bsr_k_b_df = pd.DataFrame(bsr_k_b)
bsr_p_b_df = pd.DataFrame(bsr_p_b)
bsr_df = pd.DataFrame(bsr)

In [77]:
bsr_com= pd.concat([bsr_k_b_df, bsr_p_b_df, bsr_df], axis=1)
bsr_com.columns = ['Ketamine', 'Placebo', 'Ketamine-Placebo']
bsr_com

Unnamed: 0,Ketamine,Placebo,Ketamine-Placebo
0,-0.131908,-0.628506,0.492754
1,-0.140209,-0.630957,0.501091
2,-0.136238,-0.632328,0.490577
3,-0.146805,-0.630498,0.524936
4,-0.142129,-0.639462,0.505331
...,...,...,...
49995,-0.139599,-0.645202,0.487471
49996,-0.152658,-0.637306,0.494165
49997,-0.133236,-0.650514,0.491597
49998,-0.125533,-0.646665,0.496793


 The y-axis in a density plot is the probability density function for the kernel density estimation. However, we need to be careful to specify this is a probability density and not a probability. The difference is the probability density is the probability per unit on the x-axis. To convert to an actual probability, we need to find the area under the curve for a specific interval on the x-axis. Somewhat confusingly, because this is a probability density and not a probability, the y-axis can take values greater than one. The only requirement of the density plot is that the total area under the curve integrates to one. I generally tend to think of the y-axis on a density plot as a value only for relative comparisons between different categories.

In [94]:
%matplotlib notebook
keys = ['Ketamine', 'Placebo']
# distplot: Flexibly plot a univariate distribution of observations - Probability density function
# Iterate through the five airlines
for curr_key in keys:
    # Subset to the airline
    subset = bsr_com[curr_key]
    #plt.figure(figsize=(20,10))
    #fig = plt.subplots(figsize=(20,15))
    # Draw the density plot
    sns.distplot(subset, hist = False, kde = True,
                 kde_kws = {'linewidth': 1},
                 label = curr_key)
    
# Plot formatting
#fig.set_size_inches(18.5, 10.5)
#fig.subplots(figsize=(20,15))
plt.legend(prop={'size': 8}, title = 'Condition')
plt.title('')
plt.xlabel('Bootstrapped mean')
plt.ylabel('Probibility density')
plt.xlim(-1, 1)
plt.axvline(linewidth=1, linestyle='--')
plt.savefig('/Users/jenniferthem/Masterthesis_Data/January2020/Density_bootstrapped_mean_dist_k_p.png', dpi=250)

<IPython.core.display.Javascript object>

In [112]:
%matplotlib notebook

sns.distplot(bsr_com['Ketamine-Placebo'], hist = False, kde = True,
                 kde_kws = {'linewidth': 1})

plt.title('')
plt.xlabel('Bootstrapped mean')
plt.ylabel('Probibility density')
plt.xlim(-1, 1)
plt.axvline(linewidth=1, linestyle='--')
plt.savefig('/Users/jenniferthem/Masterthesis_Data/January2020/Density_bootstrapped_mean_dist_comb.png', dpi=250)

<IPython.core.display.Javascript object>

**Calculating p-value**

In [134]:
sum_mean=0
for i in range(len(bsr_k_b)):
    if bsr_k_b[i]>0:
        sum_mean +=bsr_k_b[i]
    #print('sum:', sum_mean )    
    p_val = 2*(sum_mean/len(bsr_k_b))


In [136]:
np.amax(bsr_k_b)

-0.09494484271762886

In [132]:
p_val

0.0

In [25]:
# bootstrap of only mean - not distribution
%matplotlib notebook
# confidence intervals of the mean plotted for 10 000 iterations
plt.plot(sample_size, [x[1] for x in bootstrap_results], c='blue')
plt.plot(sample_size, [x[1] for x in normal_results], linestyle='--', c='orange')

plt.plot(sample_size, [x[0] for x in bootstrap_results], c='blue', 
         label='Bootstrap')
plt.plot(sample_size, [x[0] for x in normal_results], linestyle='--', c='orange', 
         label='t-distribution')

plt.axhline(dist.mean(), c='black', label='True Mean')
plt.legend(loc='best')
plt.title('t-distribution vs Bootstrap')

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 't-distribution vs Bootstrap')

In [32]:
%matplotlib notebook
# confidence intervals of the mean plotted for 50 000 iterations
plt.plot(sample_size, [x[1] for x in bootstrap_results], c='blue')
plt.plot(sample_size, [x[1] for x in normal_results], linestyle='--', c='orange')

plt.plot(sample_size, [x[0] for x in bootstrap_results], c='blue', 
         label='Bootstrap')
plt.plot(sample_size, [x[0] for x in normal_results], linestyle='--', c='orange', 
         label='t-distribution')

plt.axhline(dist.mean(), c='black', label='True Mean')
plt.legend(loc='best')
plt.title('t-distribution vs Bootstrap')

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 't-distribution vs Bootstrap')