In [2]:
import os
import pandas as pd
import numpy as np
import nibabel as nb
import math as m
import pickle
from kernel3d import kernel3d
from tal2icbm_spm import tal2icbm_spm

cwd = os.getcwd()
raw_folder = cwd + "/DataRaw"
mask_folder = cwd + "/MaskenEtc"

df = pd.read_excel(raw_folder + "/Stroop_Updatejune2020_CWvsother_compareCundN.xlsx", engine='openpyxl')

template = nb.load(mask_folder + "/Grey10.nii")
template_shape = template.get_fdata().shape
x_max = template_shape[0]
y_max = template_shape[1]
z_max = template_shape[2]

lines_columns = ["Author", "Subjects", "XYZmm", "Space", "Cond", "ExpIndex"]
lines = pd.DataFrame(columns=lines_columns)
exp_columns = ["Author", "Subjects", "Space", "Cond", "XYZmm", "UncertainTemplates",
               "UncertainSubjects", "Smoothing", "XYZ", "Kernel", "Peaks"]
experiments = pd.DataFrame(columns=exp_columns)

df = df[df['Articles'].notnull()].reset_index(drop=True)

cnt_exp = 0

for index, row in df.iterrows():
    lines.at[index, "Author"] = row.Articles
    lines.at[index, "Subjects"] = int(row.Subjects)
    lines.at[index, "XYZmm"] = row.x, row.y, row.z
    lines.at[index, "Space"] = row[5]
    lines.at[index, "Cond"] = list(row[6:].dropna().str.lower().str.strip().values)

    
    if index > 0:
        cnt_exp += 1
        if (lines.loc[index, ["Author", "Subjects"]] == lines.loc[index-1, ["Author", "Subjects"]]).all():
            if set(lines.at[index, "Cond"]) == set(lines.at[index-1, "Cond"]):
                cnt_exp -= 1
            
    lines.at[index, "ExpIndex"] = cnt_exp

for i in range(lines.iloc[-1]["ExpIndex"] + 1):
    start = lines[lines["ExpIndex"] == i].index[0]
    experiments.at[i,"Author"] = lines.at[start, "Author"]
    experiments.at[i,"Subjects"] = lines.at[start, "Subjects"]
    experiments.at[i,"Space"] = lines.at[start, "Space"]
    experiments.at[i,"Cond"] = lines.at[start, "Cond"]
    
    experiments.at[i,"XYZmm"] = np.vstack(lines[lines["ExpIndex"] == i].XYZmm).T.astype(float)
    
    if experiments.at[i, 'Space'] == 'TAL':
        experiments.at[i, 'XYZmm'] = tal2icbm_spm(experiments.at[i, 'XYZmm'])
    
    experiments.at[i,"UncertainTemplates"] = (5.7/(2*m.sqrt(2/m.pi)) * m.sqrt(8*m.log(2)))
    experiments.at[i, "UncertainSubjects"] =  (11.6/(2*m.sqrt(2/m.pi)) * m.sqrt(8*m.log(2))) / m.sqrt(lines.at[start, "Subjects"])
    experiments.at[i, "Smoothing"] = m.sqrt(experiments.at[i,"UncertainTemplates"]**2 + experiments.at[i, "UncertainSubjects"]**2)
    
    experiments.at[i, "XYZ"] = np.round(np.dot(np.linalg.inv(template.affine),
                                      np.concatenate((experiments.at[i, "XYZmm"],
                                                      np.ones((1,experiments.at[i, "XYZmm"].shape[1])))))).astype(int)
    experiments.at[i, "XYZ"][0][experiments.at[i, "XYZ"][0] >= x_max] = x_max-1
    experiments.at[i, "XYZ"][1][experiments.at[i, "XYZ"][1] >= x_max] = y_max-1
    experiments.at[i, "XYZ"][2][experiments.at[i, "XYZ"][2] >= x_max] = z_max-1    
    experiments.at[i, "XYZ"][experiments.at[i, "XYZ"] < 1] = 1
    
    experiments.at[i, 'Kernel'] = kernel3d(template.affine, experiments.at[i, 'Smoothing'], 31)
    experiments.at[i, "Peaks"] = experiments.at[i, "XYZ"].shape[1]
    

task_names, task_counts = np.unique(np.hstack(experiments.Cond), return_counts=True)

tasks_columns = ["Name", "Num_Exp", "Who", "TotalSubjects", "ExpIndex"]
tasks = pd.DataFrame(columns=tasks_columns)
tasks.Name = np.append(task_names, "all")
tasks.Num_Exp = np.append(task_counts, experiments.shape[0])

for task_row, value in enumerate(list(tasks.Name)):
    counter = 0
    for exp_row in range(lines.iloc[-1]["ExpIndex"] + 1):
        if value in experiments.at[exp_row, "Cond"]:
            if counter == 0:
                tasks.at[task_row, "Who"] = [experiments.at[exp_row, "Author"]]
                tasks.at[task_row, "TotalSubjects"] = experiments.at[exp_row, "Subjects"]
                tasks.at[task_row, "ExpIndex"] = [exp_row]
            else:
                tasks.at[task_row, "Who"].append(experiments.at[exp_row, "Author"])
                tasks.at[task_row, "TotalSubjects"] += experiments.at[exp_row, "Subjects"]
                tasks.at[task_row, "ExpIndex"].append(exp_row)
            counter += 1
    

tasks.at[tasks.index[-1], "Who"] = experiments.Author.to_list()
tasks.at[tasks.index[-1], "TotalSubjects"] = sum(experiments.Subjects.to_list())
tasks.at[tasks.index[-1], "ExpIndex"] = list(range(experiments.shape[0]))

tasks = tasks.sort_values(by="Num_Exp", ascending=False).reset_index(drop=True)

pickle_object = (experiments, tasks)
if not os.path.exists(cwd+"/DataPickle"):
    os.makedirs(cwd+"/DataPickle")
with open(cwd+"/DataPickle/data.pickle", "wb") as f:
    pickle.dump(pickle_object, f)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
