In [18]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from typing import List
import numpy as np
import attr
from typing import Optional, List

In [19]:
%run load_data.ipynb

In [20]:
data = create_subject_experiment_data(excel_paths=['../data_social_anxiety/social_anxiety_sagol/questionnaires_byTasks_new.xlsx'], 
                                      nifty_tasks=[('../data_social_anxiety/social_anxiety_sagol/Hariri_2ndLev/FacesVsShapes', 'hariri')])

In [25]:
data.subjects_data[0]

SubjectExperimentData(subject_id=114, features_data={'ID': 9942, 'Age': 25, 'Gender': 1, 'GenderBIN': 0, 'Education': 14.0, 'Status': 5, 'Remarks': None, 'ExpDate': Timestamp('2018-03-02 00:00:00'), 'ScreeningDate': Timestamp('2018-02-27 00:00:00'), 'DatesDiff(days)': 3, 'Screening_LSAS': 54, 'Screening_LSAS_anx': 33, 'Screening_LSAS_avo': 21, 'RSE': 36, 'BFNE': 66, 'FPES': 29, 'STAI_T': 38, 'BDI': 10, 'RRQ_rumination': 43, 'RRQ_reflection': 19, 'RRQ': 62, 'STAXI': 20, 'Suicidality': 0, 'DPSOS_other': 24, 'DPSOS_self': 15, 'LSAS': 35, 'LSAS_anx': 24, 'LSAS_avo': 11, 'STAXI-TangT': 4, 'STAXI-TangR': 10, 'STAXI_A\\O': 16, 'STAXI_A\\I': 25, 'STAXI_C\\O': 29, 'STAXI_C\\I': 23, 'SPSRQ_reward': 13, 'SPSRQ_punishment': 9, 'NEO_O': 19, 'NEO_C': 39, 'NEO_E': 28, 'NEO_A': 30, 'NEO_N': 13, 'ISEL_12': 29, 'ISEL_appraisal support': 11, 'ISEL_belonging support': 8, 'ISEL_tangible support': 10, 'SPIN': 32, 'Music': None, 'hps_attrib': 62.5, 'lps_attrib': 25.0, 'has_attrib': 93.33, 'las_attrib': 0.0},

In [11]:
def get_available_rois() -> List[str]:
    return 

def get_mask_from_roi(roi_name) -> np.array:
    return

def apply_roi_masks(experiment_data: ExperimentData, rois) -> ExperimentData:
    return

In [12]:
AVAILABLE_MODELS = ['svr']

In [16]:
@attrs
class Models:
    ylabels: List[str] = attrib()
    rois: Optional[List[str]] = attrib()
    # (x, y, z)
    shape: tuple = attrib() 
    # {'svr' : <model>, 'cnn': <model>}
    models: dict = attrib()
    
    def save():
        raise NotImplementedError
    
    @classmethod
    def load(ylabels, rois, shape):
        raise NotImplementedError

In [17]:
def get_or_create_models(experiment_data: List[SubjectExperimentData], ylabels, rois) -> Models:
    if rois:
        experiment_data = apply_roi_masks(experiment_data, rois)
    
    pre_computed_models = get_pre_computed_models(ylabels, rois, shape)
    return pre_computed_models or generate_models(experiment_data, ylabels, rois)

def get_pre_computed_models(ylabels, rois) -> Optional[Models]:
    """
    Load serialized models for the given ylabel and rois if such models exist.
    """
    
def generate_models(experiment_data_roi_masked: List[SubjectExperimentData], ylabels, rois) -> Models:
    models = {}
    for model_name in AVAILABLE_MODELS:
        models[model_name] = train_model(experiment_data_roi_masked, model_name=model_name)
    
    models = Models(ylabels=ylabels, rois=rois, shape=experiment_data_roi_masked.shape, models=model)
    models.save()
    return models
        
        
def train_model(experiment_data: ExperimentData, model_name: str):
    if model_name == 'svr':
        train_svr(experiment_data)
    else:
        raise NotImplementedError(f'Model: {model_name} is not supported.')

In [12]:
data = data.subjects_data

In [13]:
x,y,z = data[0].tasks_data['hariri'].shape

In [14]:
hariri_data = [
    {
        'data': subject_data.tasks_data['hariri'].reshape(x*y*z),
        'y': subject_data.features_data['FPES']
    } for subject_data in data if 'hariri' in subject_data.tasks_data]

In [15]:
X = np.array([subject['data'] for subject in hariri_data])
y = np.array([subject['y'] for subject in hariri_data])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

In [16]:
X.shape

(55, 558025)

In [17]:
mdl = SVR(kernel='linear')
mdl = mdl.fit(X_train, y_train)

In [18]:
mdl.score(X_train, y_train)

0.9999507058306588

In [19]:
mdl.predict(X_test)

array([22.14301669, 29.17136153, 31.95671716, 29.39009307, 16.21286879,
       32.6408583 , 30.60190169, 18.09279499, 27.1604125 , 31.27011165,
        5.89881909, 16.34961145, 32.75062112, 22.1085396 , 31.14534505,
       29.2603911 , 29.2521457 , 30.55377023, 29.17452058])

In [20]:
y_test

array([29, 21, 42, 37,  0, 50, 44, 23, 35, 25, 23,  4, 48, 27, 17, 54,  1,
       40, 29])

In [None]:

# Here I try to implement the spaceNet thing on our 4 samples.
# it is not working since it is not enough samples
# thats the code they used: https://nilearn.github.io/modules/generated/nilearn.decoding.SpaceNetRegressor.html#nilearn.decoding.SpaceNetRegressor


n_subjects = 4  # increase this number if you have more RAM on your box
imgs_paths = ['sub-120con_0001.nii', 'sub-121con_0001.nii', 'sub-122con_0001.nii', 'sub-123con_0001.nii']

# Split data into training set and test set
from sklearn.utils import check_random_state
from sklearn.model_selection import train_test_split

labels = Y
rng = check_random_state(42)
gm_imgs_train, gm_imgs_test, age_train, age_test = train_test_split(
    imgs_paths, labels, train_size=.75, random_state=rng)

print(gm_imgs_train)
print(gm_imgs_test)



In [None]:
from nilearn.decoding import SpaceNetRegressor

# To save time (because these are anat images with many voxels), we include
# only the 5-percent voxels most correlated with the age variable to fit.
# Also, we set memory_level=2 so that more of the intermediate computations
# are cached. Also, you may pass and n_jobs=<some_high_value> to the
# SpaceNetRegressor class, to take advantage of a multi-core system.
#
# Also, here we use a graph-net penalty but more beautiful results can be
# obtained using the TV-l1 penalty, at the expense of longer runtimes.
decoder = SpaceNetRegressor(memory="nilearn_cache", penalty="graph-net",
                            screening_percentile=5., memory_level=2)
decoder.fit(gm_imgs_train, age_train)  # fit
coef_img = decoder.coef_img_
y_pred = decoder.predict(gm_imgs_test).ravel()  # predict
mse = np.mean(np.abs(age_test - y_pred))
print('Mean square error (MSE) on the predicted age: %.2f' % mse)