# VAMPIRE WORKFLOW

## Purpose: To split images into quadrants, pick training and testing image sets, and in the future run the full VAMPIRE workflow

Edited: October 28th, 2021 to specifically refer to Phuong's BEV Treatment data and split them for VAMPIRE

*Step 1: Import necessary packages*

In [1]:
import shutil, os

from glob import glob

import numpy as np
import pandas as pd
from skimage import io
import matplotlib.pyplot as plt
from PIL import Image
from numpy.linalg import inv
from sklearn.model_selection import train_test_split
from skimage.segmentation import clear_border


%matplotlib inline

*Step 2: User Inputs*

Manual Step:
Beginning with already segmented images saved as .npy arrays from the "2_Phuong_collab_segmentation.ipynb" Jupyter Notebook also within this folder. 

Not a blind study. 

Images already exist in a folder tree based on the overall slice treatment time and then the group subset for example:

48_hr_exposure_time > (1) BEV_treatment (2) healthy_control (3) OGD_control

In [39]:
#file names should be in the current working directory
folder_location = '/Users/hhelmbre/Desktop/Phuong_Bev/li_thresh'

file_type_init = '.npy'

slice_number = 4
random_state_num = 3

*Step 5: Getting the List of Images to Split*

In [40]:
arr = os.listdir(folder_location)
folder_list = np.asarray(arr)
folder_list = [ x for x in folder_list if "DS" not in x ]
folder_list

['48_hour_exposure_0_hr_application_time',
 '24_hr_exposure_0_hr_application_time',
 '4_hr_exposure_0_hr_application_time',
 'controls']

In [41]:
files_to_split_list = []
for folders in folder_list:
    arr = os.listdir(str(folder_location + '/' + folders))
    subfolder_list = np.asarray(arr)
    subfolder_list = [ x for x in subfolder_list if "DS" not in x]
    for subfolders in subfolder_list:
        arr = os.listdir(str(folder_location + '/' + folders + '/' + subfolders))
        files_list = np.asarray(arr)
        files_list = [ x for x in files_list if "DS" not in x]
        for files in files_list:
            name = str(folder_location + '/' + folders + '/' + subfolders + '/' + files)
            files_to_split_list.append(name)


*Step 6: Split the Images*

In [42]:
    name = files_to_split_list[0]
    file = np.load(name)
    quada, quadb = np.array_split(file, 2)
    quad1, quad2 = np.hsplit(quada, 2)
    quad3, quad4 = np.hsplit(quadb, 2)

In [43]:
for files in files_to_split_list:
    file = np.load(files)
    quada, quadb = np.array_split(file, 2)
    quad1, quad2 = np.array_split(quada, 2, axis=1)
    quad3, quad4 = np.array_split(quadb, 2, axis=1)
    
    quad1 = clear_border(quad1)
    quad2 = clear_border(quad2)
    quad3 = clear_border(quad3)
    quad4 = clear_border(quad4)

    
    np.save(str(files[:-4] + '_quad1.npy'), quad1)
    np.save(str(files[:-4] + '_quad2.npy'), quad2)
    np.save(str(files[:-4] + '_quad3.npy'), quad3)
    np.save(str(files[:-4] + '_quad4.npy'), quad4)

*Step 4: Choose training and testing data sets*

In [44]:
ttsplit_list_files = []
for folders in folder_list:
    arr = os.listdir(str(folder_location + '/' + folders))
    subfolder_list = np.asarray(arr)
    subfolder_list = [ x for x in subfolder_list if "DS" not in x]
    for subfolders in subfolder_list:
        print(subfolders)
        arr = os.listdir(str(folder_location + '/' + folders + '/' + subfolders))
        files_list = np.asarray(arr)
        files_list = [ x for x in files_list if "DS" not in x]
        files_list = [ x for x in files_list if "quad" in x]
        X_train, X_test= train_test_split(files_list, test_size=0.20, random_state=random_state_num)
        for files in files_list:
            if files in X_train[:]:
                shutil.move(str(folder_location + '/' + folders + '/' + subfolders + '/' + files), '/Users/hhelmbre/Desktop/Phuong_Bev/vampire/train')
            else:
                shutil.move(str(folder_location + '/' + folders + '/' + subfolders + '/' + files), '/Users/hhelmbre/Desktop/Phuong_Bev/vampire/test')

48_hr_exposure_healthy_control
48_hr_exposure_BEV_treatment
48_hr_exposure_OGD_control
24_hr_exposure_BEV_treatment
24_hr_exposure_OGD_control
24_hr_exposure_healthy_control
4_hr_exposure_OGD_control
4_hr_exposure_BEV_treatment
4_hr_exposure_healthy_control
healthy_control
ogd_control


*Step Y: Renaming the data sets according to VAMPIRE naming mechanism*

In [48]:
arr_train1 = os.listdir('/Users/hhelmbre/Desktop/Phuong_Bev/vampire/train')
file_list_train1 = np.asarray(arr_train1)
file_list_train1 = [ x for x in file_list_train1 if "DS" not in x]

In [51]:
im_number= 1
for names in file_list_train1:
    print(names)
    
    file_location = str('/Users/hhelmbre/Desktop/Phuong_Bev/vampire/train/' + names)
    array = np.load(file_location)
    im = Image.fromarray(array)
    
    if im_number < 10:
        im.save(str('/Users/hhelmbre/Desktop/Phuong_Bev/vampire/images/train/' + names[:-4] + 'xy' + '0' + str(im_number) + 'c1.png'))
        
    else:
        im.save(str('/Users/hhelmbre/Desktop/Phuong_Bev/vampire/images/train/' + names[:-4] + 'xy' + str(im_number) + 'c1.png'))
        
    
    im_number +=1

EV_MICROGLIA_STUDY_24HR_EXPOSURE_1-3-2-6_li_thresh_quad3.npy
EV_MICROGLIA_STUDY_HEALTHY_CONTROL_1-1-1-2_li_thresh_quad4.npy
EV_MICROGLIA_STUDY_4HR_EXPOSURE_1-2-3-9_li_thresh_quad4.npy
EV_MICROGLIA_STUDY_HEALTHY_CONTROL_1-1-1-5_li_thresh_quad1.npy
EV_MICROGLIA_STUDY_24HR_EXPOSURE_1-3-2-2_li_thresh_quad4.npy
EV_MICROGLIA_STUDY_OGD_CONTROL_1-1-2-3_li_thresh_quad1.npy
EV_MICROGLIA_STUDY_24HR_EXPOSURE_1-3-2-5_li_thresh_quad1.npy
EV_MICROGLIA_STUDY_HEALTHY_CONTROL_1-1-3-3_li_thresh_quad2.npy
EV_MICROGLIA_STUDY_OGD_CONTROL_1-1-2-1_li_thresh_quad2.npy
EV_MICROGLIA_STUDY_48_HR_EXPOSURE_1-4-2-3_li_thresh_quad1.npy
EV_MICROGLIA_STUDY_48_HR_EXPOSURE_1-4-2-4_li_thresh_quad4.npy
EV_MICROGLIA_STUDY_HEALTHY_CONTROL_1-1-3-1_li_thresh_quad1.npy
EV_MICROGLIA_STUDY_HEALTHY_CONTROL_1-1-3-2_li_thresh_quad2.npy
EV_MICROGLIA_STUDY_24HR_EXPOSURE_1-3-2-3_li_thresh_quad4.npy
EV_MICROGLIA_STUDY_24HR_EXPOSURE_1-3-2-4_li_thresh_quad1.npy
EV_MICROGLIA_STUDY_OGD_CONTROL_1-1-2-2_li_thresh_quad1.npy
EV_MICROGLIA_STUDY_

*Splitting the test group into the appropriate conditions*

In [53]:
arr_test = os.listdir('/Users/hhelmbre/Desktop/Phuong_Bev/vampire/test')
file_list_test = np.asarray(arr_test)
file_list_test = [ x for x in file_list_test if "DS" not in x]

In [54]:
im_number= 1
for names in file_list_test:
    print(names)
    
    file_location = str('/Users/hhelmbre/Desktop/Phuong_Bev/vampire/test/' + names)
    array = np.load(file_location)
    im = Image.fromarray(array)
    
    if im_number < 10:
        im.save(str('/Users/hhelmbre/Desktop/Phuong_Bev/vampire/images/test/' + names[:-4] + 'xy' + '0' + str(im_number) + 'c1.png'))
        
    else:
        im.save(str('/Users/hhelmbre/Desktop/Phuong_Bev/vampire/images/test/' + names[:-4] + 'xy' + str(im_number) + 'c1.png'))
        
    
    im_number +=1

EV_MICROGLIA_STUDY_HEALTHY_CONTROL_1-1-3-2_li_thresh_quad3.npy
EV_MICROGLIA_STUDY_HEALTHY_CONTROL_1-1-1-3_li_thresh_quad4.npy
EV_MICROGLIA_STUDY_HEALTHY_CONTROL_1-1-3-3_li_thresh_quad3.npy
EV_MICROGLIA_STUDY_HEALTHY_CONTROL_1-1-1-5_li_thresh_quad2.npy
EV_MICROGLIA_STUDY_24HR_EXPOSURE_1-3-2-5_li_thresh_quad2.npy
EV_MICROGLIA_STUDY_HEALTHY_CONTROL_1-1-1-4_li_thresh_quad3.npy
EV_MICROGLIA_STUDY_OGD_CONTROL_1-1-2-2_li_thresh_quad3.npy
EV_MICROGLIA_STUDY_24HR_EXPOSURE_1-3-2-4_li_thresh_quad2.npy
EV_MICROGLIA_STUDY_48_HR_EXPOSURE_1-4-2-3_li_thresh_quad3.npy
EV_MICROGLIA_STUDY_24HR_EXPOSURE_1-3-2-2_li_thresh_quad2.npy
EV_MICROGLIA_STUDY_48_HR_EXPOSURE_1-4-2-4_li_thresh_quad2.npy
EV_MICROGLIA_STUDY_OGD_CONTROL_1-1-2-1_li_thresh_quad4.npy
EV_MICROGLIA_STUDY_24HR_EXPOSURE_1-3-2-3_li_thresh_quad3.npy
EV_MICROGLIA_STUDY_HEALTHY_CONTROL_1-1-3-2_li_thresh_quad4.npy
EV_MICROGLIA_STUDY_24HR_EXPOSURE_1-3-2-1_li_thresh_quad1.npy
EV_MICROGLIA_STUDY_4HR_EXPOSURE_1-2-3-9_li_thresh_quad1.npy
EV_MICROGLIA_ST

# Creating the information necessary for VAMPIRE Analysis

In [3]:
data_folder = '/Users/hhelmbre/Desktop/Phuong_Bev/vampire/images'

In [2]:
#creates the directory in your data folder to put all information related to the model
os.mkdir(str(data_folder + '/' + '10_29_21_model'))

FileExistsError: [Errno 17] File exists: '/Users/hhelmbre/Desktop/Phuong_Bev/vampire/images/10_29_21_model'

In [59]:
#creates the csv for building a model
data = [['all_training_images', '1', str(data_folder + '/' + 'train'), 'please work', 'c1']]
build_model_csv = pd.DataFrame(data, columns = ['condition', 'set number', 'set location', 'note', 'ch1']) 

#saves csv to newly created model directory
build_model_csv.to_csv(data_folder + '/' + '10_29_21_model/' + 'images_to_build_model.csv', index=False)

In [5]:
treatments = ['48_hr_exposure_0_hr_application_pngs', '24_hr_exposure_0_hr_application_pngs', '4_hr_exposure_0_hr_application_pngs', 'controls']
groups = ['bev_treatment', 'healthy_control', 'ogd_control']

In [6]:
apply_model_paths = []
apply_model_csv = pd.DataFrame(columns = ['condition', 'set number', 'set location', 'note', 'ch1'])
set_number = 1
for treatment in treatments:
    for group in groups:
        path = data_folder + '/test/' + treatment + '/' + group
        df2 = pd.DataFrame({'condition': [treatment], 'set number': [set_number], 'set location': [path], 'note': ['Phuong BeV analysis'], 'tag': ['c1']})
        apply_model_csv = apply_model_csv.append(df2)

        set_number += 1

apply_model_csv.to_csv(data_folder + '/' + '10_29_21_model/' + 'images_to_apply_model.csv', index=False)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  return concat(


In [7]:
print('Build Model CSV Path:', str(data_folder + '/' + 'model/' + 'images_to_build_model.csv'))

print('Conda Environment: tiredvampires)
print('Number of Shape Models (Recommended):', '5')
print('Number of Shape Coordinates (Recommended):', '50')
print('Model Name:', '10_29_21_model')

print('Apply Model CSV Path:', str(data_folder + '/' + 'model/' + 'images_to_apply_model.csv'))
print('Model to Apply:', str(data_folder + '/' + 'model/' + '10_20_21_model'))

Build Model CSV Path: /Users/hhelmbre/Desktop/Phuong_Bev/vampire/images/model/images_to_build_model.csv
Number of Shape Models (Recommended): 5
Number of Shape Coordinates (Recommended): 50
Model Name: 10_29_21_model
Apply Model CSV Path: /Users/hhelmbre/Desktop/Phuong_Bev/vampire/images/model/images_to_apply_model.csv
Model to Apply: /Users/hhelmbre/Desktop/Phuong_Bev/vampire/images/model/10_20_21_model


In [1]:
import vampireanalysis
from vampireanalysis import vampire

In [2]:
vampire()

## getboundary.py
registry or boundary already exist
## main.py
## bdreg.py
applying model
available cpu cores :  8
Sample size N =  176
For loop A of bdreg, elapsed time is 0.11744499206542969seconds...
For parallel of bdreg, elapsed time is 2.7254350185394287seconds...
## pca_bdreg.py
For PCA bdreg, elapsed time is 0.0015270709991455078seconds...
# clusterSM
For cluster, elapsed time is 0.3896908760070801seconds...
## update_csv.py
