# VAMPIRE WORKFLOW

## Purpose: To split images into quadrants, pick training and testing image sets, and in the future run the full VAMPIRE workflow

Edited: October 28th, 2021 to specifically refer to Phuong's BEV Treatment data and split them for VAMPIRE

*Step 1: Import necessary packages*

In [None]:
import shutil, os

from glob import glob

import numpy as np
import pandas as pd
from skimage import io
import matplotlib.pyplot as plt
from PIL import Image
from numpy.linalg import inv
from sklearn.model_selection import train_test_split
from skimage.segmentation import clear_border
import skimage
import tifffile as tiff
import vampire
from os.path import isfile, join

%matplotlib inline

*Step 2: User Inputs*

Manual Step:
Beginning with already segmented images saved as .npy arrays from the "2_Phuong_collab_segmentation.ipynb" Jupyter Notebook also within this folder. 

Not a blind study. 

Images already exist in a folder tree based on the overall slice treatment time and then the group subset for example:

48_hr_exposure_time > (1) BEV_treatment (2) healthy_control (3) OGD_control

In [None]:
#file names should be in the current working directory
folder_location = '/Users/nelsschimek/Documents/nancelab/Data/caffeine'

file_type_init = '.npy'

slice_number = 4
random_state_num = 3

*Step 5: Getting the List of Images to Split*

In [None]:
my_path = '/Users/nelsschimek/Documents/nancelab/Data/caffeine/cortex/'
file_list = [f for f in os.listdir(my_path) if isfile(join(my_path, f)) and '.tif' in f]
file_list

In [None]:
for file in file_list:

    new_array = np.load('/Users/nelsschimek/Documents/nancelab/Data/caffeine/cortex/test/' + file)

    colored_array = new_array.astype(np.uint8)*255

    output_path = '/Users/nelsschimek/Documents/nancelab/Data/caffeine/cortex/test/' + file[:-4] + '.tif'

    tiff.imwrite(output_path, colored_array)


*Step 4: Choose training and testing data sets*

In [None]:
# ttsplit_list_files = []
# for folders in folder_list:
#     arr = os.listdir(str(folder_location + '/' + folders))
#     subfolder_list = np.asarray(arr)
#     subfolder_list = [ x for x in subfolder_list if "DS" not in x]
#     subfolder_list = [ x for x in subfolder_list if ".npy" not in x]
#     for subfolders in subfolder_list:
#         print(subfolders)
#         arr = os.listdir(str(folder_location + '/' + folders + '/' + subfolders))
#         files_list = np.asarray(arr)
#         files_list = [ x for x in files_list if "DS" not in x]
#         files_list = [ x for x in files_list if "quad" in x]
#         X_train, X_test= train_test_split(files_list, test_size=0.20, random_state=random_state_num)
#         for files in files_list:
#             if files in X_train[:]:
#                 shutil.move(str(folder_location + '/' + folders + '/' + subfolders + '/' + files), '/Users/nelsschimek/Documents/nancelab/vampire_work/caffeine/train')
#             else:
#                 shutil.move(str(folder_location + '/' + folders + '/' + subfolders + '/' + files), '/Users/nelsschimek/Documents/nancelab/vampire_work/caffeine/test')

*Step Y: Renaming the data sets according to VAMPIRE naming mechanism*

In [None]:
arr_train1 = os.listdir('/Users/nelsschimek/Documents/nancelab/Data/caffeine/cortex/train')
file_list_train1 = np.asarray(arr_train1)
file_list_train1 = [ x for x in file_list_train1 if "DS" not in x]
file_list_train1

In [None]:
im_number= 1
for names in file_list_train1:
    print(names)
    
    file_location = str('/Users/nelsschimek/Documents/nancelab/Data/caffeine/cortex/train/' + names)
    array = np.load(file_location)
    im = Image.fromarray(array)
    
    if im_number < 10:
        im.save(str('/Users/nelsschimek/Documents/nancelab/Data/caffeine/cortex/train/' + names[:-4] + 'xy' + '0' + str(im_number) + 'c1.png'))
        
    else:
        im.save(str('/Users/nelsschimek/Documents/nancelab/Data/caffeine/cortex/train/' + names[:-4] + 'xy' + str(im_number) + 'c1.png'))
        
    
    im_number +=1

*Splitting the test group into the appropriate conditions*

In [None]:
im_number= 1
for names in file_list_test:
    print(names)
    
    file_location = str('/Users/nelsschimek/Documents/nancelab/vampire_work/caffeine/test/' + names)
    array = np.load(file_location)
    im = Image.fromarray(array)
    
    if im_number < 10:
        im.save(str('/Users/nelsschimek/Documents/nancelab/vampire_work/caffeine/test/' + names[:-4] + 'xy' + '0' + str(im_number) + 'c1.png'))
        
    else:
        im.save(str('/Users/nelsschimek/Documents/nancelab/vampire_work/caffeine/test/' + names[:-4] + 'xy' + str(im_number) + 'c1.png'))
        
    
    im_number +=1

# Creating the information necessary for VAMPIRE Analysis

In [None]:
treatments = ['treatment_A', 'treatment_B', 'treatment_C', 'treatment_D', 'treatment_E']
groups = ['cortex']

In [None]:
image_set_path = '/Users/nelsschimek/Documents/nancelab/Data/caffeine/training/converted_tiffs'

vampire.extraction.extract_properties(image_set_path)

In [None]:
build_info_df = pd.DataFrame({
    'img_set_path': [image_set_path],
    'output_path': [image_set_path],
    'model_name': ['li'],
    'num_points': [50],
    'num_clusters': [5],
    'num_pc': [np.nan]
})

In [None]:
vampire.quickstart.fit_models(build_info_df)

In [None]:
model_path = os.path.join('/Users/nelsschimek/Documents/nancelab/Data/caffeine/training/converted_tiffs', 'model_li_(50_5_38)__.pickle')
vampire_model = vampire.util.read_pickle(model_path)

In [None]:
region = 'hippocampus'

apply_info_df = pd.DataFrame({
    'img_set_path': [f'/Users/nelsschimek/Documents/nancelab/Data/caffeine/testing/{region}/treatment_A/converted_tiffs/',
                     f'/Users/nelsschimek/Documents/nancelab/Data/caffeine/testing/{region}/treatment_B/converted_tiffs/',
                     f'/Users/nelsschimek/Documents/nancelab/Data/caffeine/testing/{region}/treatment_C/converted_tiffs/',
                     f'/Users/nelsschimek/Documents/nancelab/Data/caffeine/testing/{region}/treatment_D/converted_tiffs/',
                     f'/Users/nelsschimek/Documents/nancelab/Data/caffeine/testing/{region}/treatment_E/converted_tiffs/'],
    'model_path': ['/Users/nelsschimek/Documents/nancelab/Data/caffeine/training/converted_tiffs/model_li_(50_5_38)__.pickle',
                   '/Users/nelsschimek/Documents/nancelab/Data/caffeine/training/converted_tiffs/model_li_(50_5_38)__.pickle',
                   '/Users/nelsschimek/Documents/nancelab/Data/caffeine/training/converted_tiffs/model_li_(50_5_38)__.pickle',
                   '/Users/nelsschimek/Documents/nancelab/Data/caffeine/training/converted_tiffs/model_li_(50_5_38)__.pickle',
                   '/Users/nelsschimek/Documents/nancelab/Data/caffeine/training/converted_tiffs/model_li_(50_5_38)__.pickle'],
    'output_path': [f'/Users/nelsschimek/Documents/nancelab/Data/caffeine/testing/{region}/treatment_A/',
                    f'/Users/nelsschimek/Documents/nancelab/Data/caffeine/testing/{region}/treatment_B/',
                    f'/Users/nelsschimek/Documents/nancelab/Data/caffeine/testing/{region}/treatment_C/',
                    f'/Users/nelsschimek/Documents/nancelab/Data/caffeine/testing/{region}/treatment_D/',
                    f'/Users/nelsschimek/Documents/nancelab/Data/caffeine/testing/{region}/treatment_E/'],
    'img_set_name': [f'li_{region}_treatment_A',
                     f'li_{region}_treatment_B',
                     f'li_{region}_treatment_C',
                     f'li_{region}_treatment_D',
                     f'li_{region}_treatment_E'],
})

In [None]:
apply_info_df['img_set_path'][0]

In [None]:
vampire.quickstart.transform_datasets(apply_info_df)

In [None]:
properties_path = os.path.join('/Users/nelsschimek/Documents/nancelab/Data/caffeine/testing/cortex/treatment_A', 'apply-properties_li_on_li_cortex_treatment_A_(50_5_38)__.pickle')
properties_df = vampire.util.read_pickle(properties_path)
properties_df