In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from tqdm import trange
import re
from lmfit.models import GaussianModel, StepModel, Model
from cn_tools.cns_fit import cns_pc_from_file
from cn_tools.cns_fit import cns_cut
from cn_tools.cns_fit import mu_from_gauss
from cn_tools.cns_fit import sigma_from_cns
from cn_tools.cns_fit import Z_from_cns
from cn_tools.cns_fit import compute_local_contacts
from cn_tools.cns_fit import compute_nearest_neighbours
from cn_tools.cns_fit import compute_mean_sigma
from cn_tools.cns_fit import approximate_diameter
from cn_tools.data_processing import prepare_df

In [2]:
directories = ['../Data/preprocessed/VF_005_analysis/', '../Data/preprocessed/VF_006_analysis/',
               '../Data/preprocessed/VF_007_analysis/', '../Data/preprocessed/VF_008_analysis/']

In [7]:
def compute_mu(directory, upper_range = 2, namespace = ''):
    '''Iterates through a raw directory and saves all processed csv files in another directory called preprocessed'''
    
    #define save directory
    save_file = f'../Data/preprocessed{namespace}/mu.csv'
    if not os.path.isdir(f'../Data/preprocessed{namespace}'):
        os.mkdir(f'../Data/preprocessed{namespace}')
    if os.path.isfile(save_file):
        df_mu = pd.read_csv(save_file, index_col=0)
    else: 
        df_mu = pd.DataFrame()
        
    #create list of all minkowski file folders to evaluate
    files = [directory + file for file in os.listdir(directory) if 'tomo' in file]
    
    #for folder in folders:
    for i in trange(len(files)):
        file_name = files[i]
        index = file_name[-11:-4] #index has the form 'XXX_YYY'
        interval, cns, pc = cns_pc_from_file(file_name, upper_range = upper_range)
        mu = mu_from_gauss(interval, pc)
        df_mu.loc[index, 'mu'] = mu
        
    df_mu.to_csv(save_file)
    
def compute_sigmas(directory, upper_range = 2, namespace = ''):
    '''Iterates through a raw directory and saves all processed csv files in another directory called preprocessed'''
        
    #define save directory
    save_file = f'../Data/preprocessed{namespace}/sigma.csv'
    if os.path.isfile(save_file):
        df_sigma = pd.read_csv(save_file, index_col=0)
    else: 
        df_sigma = pd.DataFrame()
    
    final_dir = re.sub('preprocessed', 'final', directory)
    if not os.path.isdir(final_dir):
        os.makedirs(final_dir)
        
    df_mu = pd.read_csv(f'../Data/preprocessed{namespace}/mu.csv', index_col=0)
        
    #create list of all minkowski file folders to evaluate
    files = [directory + file for file in os.listdir(directory) if 'tomo' in file]
    
    #for folder in folders:
    for i in trange(len(files)):
        
        #index has the form 'XXX_YYY' and is used to infer which of the resolutions is used
        file_name = files[i]
        index = file_name[-11:-4]
        mu = df_mu.loc[index, 'mu']
        df = pd.read_csv(file_name, index_col=0)
        df['diameter'] = mu
        
        final_file = re.sub('preprocessed', 'final', file_name)
        if os.path.isfile(final_file):
            df_cut = pd.read_csv(final_file)
        else:
            df_cut = prepare_df(df, index)
            df_cut.to_csv(final_file)
        xyz = df[['x','y','z']].to_numpy()
        xyz_cut = df_cut[['x','y','z']].to_numpy()
    
        interval, cns = cns_cut(xyz, xyz_cut, mu, upper_range = upper_range)

        #open df, cut df, compute pcns, save pcns, save df, 
        sigma = sigma_from_cns(interval, cns, mu)
        df_sigma.loc[index, 'sigma'] = sigma
        df_cns = pd.DataFrame(data = {'interval':interval, 'cns':cns})
        df_cns.to_csv(f'../Data/preprocessed{namespace}/cns_{index}.csv')
        
    df_sigma.to_csv(save_file)
    
def contacts_and_neighbours(directory, namespace = ''):

    #compute mean sigmas
    mean_sigmas = {res:compute_mean_sigma(sigma_file_path = f'../Data/preprocessed{namespace}/sigma.csv',resolution = res) for res in [20,30]}
    #pick relevant sigma according to directory name
    sigma = mean_sigmas[approximate_diameter(directory)]
    df_mu = pd.read_csv(f'../Data/preprocessed{namespace}/mu.csv', index_col=0)
    files = [directory + file for file in os.listdir(directory) if 'tomo' in file]
    
    for i in trange(len(files)):
        #filename from preprocessed directory and other relevant files
        file_name = files[i]
        index = file_name[-11:-4]
        poly_file = re.sub('preprocessed', 'raw', file_name)
        poly_file = re.sub('.csv', '.poly', poly_file)
        final_file = re.sub('preprocessed', f'final', file_name)
        
        #get presaved data to perform cns fit with constant mu and sigma
        df_pcns = pd.read_csv(f'../Data/preprocessed{namespace}/cns_{index}.csv')
        interval, cns = df_pcns.interval.values, df_pcns.cns.values
        mu = df_mu.at[index, 'mu']

        Z = Z_from_cns(interval, cns, mu, sigma)
        df = pd.read_csv(file_name, index_col = 0)
        df_cut = pd.read_csv(final_file, index_col = 0)
        
        xyz_cut = df_cut[['x','y','z']].to_numpy()
        xyz = df[['x','y','z']].to_numpy()
        df_cut[f'contact_number{namespace}'] = compute_local_contacts(xyz, xyz_cut, Z, interval, cns, namespace)
        df_cut = compute_nearest_neighbours(df_cut, poly_file)
        df_cut.to_csv(final_file)


In [9]:
directories = ['../Data/preprocessed/VF_005_analysis/', '../Data/preprocessed/VF_006_analysis/',
               '../Data/preprocessed/VF_007_analysis/', '../Data/preprocessed/VF_008_analysis/']

for u_r, n_s in [(1.0, '_10'), (1.5, '_15'), (2.0, '')]:
    for directory in directories:
        compute_mu(directory, upper_range=u_r, namespace=n_s)
    for directory in directories:
        compute_sigmas(directory, upper_range=u_r, namespace=n_s)
    for directory in directories:
        contacts_and_neighbours(directory, namespace=n_s)

100%|█████████████████████████████████████████| 265/265 [01:59<00:00,  2.23it/s]
100%|███████████████████████████████████████████| 78/78 [00:36<00:00,  2.16it/s]
100%|███████████████████████████████████████████| 78/78 [00:35<00:00,  2.21it/s]
100%|█████████████████████████████████████████| 308/308 [01:55<00:00,  2.68it/s]
100%|█████████████████████████████████████████| 265/265 [01:40<00:00,  2.63it/s]
100%|███████████████████████████████████████████| 78/78 [00:30<00:00,  2.56it/s]
100%|███████████████████████████████████████████| 78/78 [00:29<00:00,  2.60it/s]
100%|█████████████████████████████████████████| 308/308 [01:49<00:00,  2.81it/s]
100%|█████████████████████████████████████████| 265/265 [03:40<00:00,  1.20it/s]
100%|███████████████████████████████████████████| 78/78 [01:04<00:00,  1.20it/s]
100%|███████████████████████████████████████████| 78/78 [01:04<00:00,  1.20it/s]
100%|█████████████████████████████████████████| 308/308 [04:25<00:00,  1.16it/s]
100%|███████████████████████

In [None]:
path = '../Data/final/'
measurements = ['../Data/final/VF_005_analysis',
                '../Data/final/VF_006_analysis',
                '../Data/final/VF_007_analysis',
                '../Data/final/VF_008_analysis']

for measurement in measurements:
    df = merge_measurements(measurement +'/')
    df.to_csv(f'../Data/{measurement[14:20]}_og.csv')