# Reading input data and building the SFS

The first step when running delimitpy is to read the input alignments and use them to construct a site frequency spectrum (SFS).

In [1]:
from delimitpy import parse_input
from delimitpy import process_empirical
import numpy as np
import os
import pickle

# Read in intermediate files from previous part of tutorial.


In [2]:
config_parser = parse_input.ModelConfigParser("../../examples/test1/config.txt")
config_values = config_parser.parse_config()
labels = np.load(os.path.join(config_values["output directory"], 'labels.npy'), allow_pickle=True)
with open(os.path.join(config_values["output directory"], 'parameterized_models.pickle'), 'rb') as f:
    parameterized_models = pickle.load(f)


In [3]:
data_processor = process_empirical.DataProcessor(parameterized_models, config=config_values)
empirical_array = data_processor.fasta_to_numpy()
print(empirical_array.shape)
empirical_2d_sfs_sampling = data_processor.find_downsampling(empirical_array)
print(empirical_2d_sfs_sampling)



(30, 21548)
(30, 17167)
finding projection values
{(1, 1, 1): 17167, (1, 1, 2): 17167, (1, 1, 3): 17167, (1, 1, 4): 17167, (1, 1, 5): 17167, (1, 1, 6): 17165, (1, 1, 7): 17141, (1, 1, 8): 16886, (1, 1, 9): 15285, (1, 1, 10): 8538, (1, 2, 1): 17167, (1, 2, 2): 17167, (1, 2, 3): 17167, (1, 2, 4): 17167, (1, 2, 5): 17167, (1, 2, 6): 17165, (1, 2, 7): 17141, (1, 2, 8): 16886, (1, 2, 9): 15285, (1, 2, 10): 8538, (1, 3, 1): 17167, (1, 3, 2): 17167, (1, 3, 3): 17167, (1, 3, 4): 17167, (1, 3, 5): 17167, (1, 3, 6): 17165, (1, 3, 7): 17141, (1, 3, 8): 16886, (1, 3, 9): 15285, (1, 3, 10): 8538, (1, 4, 1): 17167, (1, 4, 2): 17167, (1, 4, 3): 17167, (1, 4, 4): 17167, (1, 4, 5): 17167, (1, 4, 6): 17165, (1, 4, 7): 17141, (1, 4, 8): 16886, (1, 4, 9): 15285, (1, 4, 10): 8538, (1, 5, 1): 17155, (1, 5, 2): 17155, (1, 5, 3): 17155, (1, 5, 4): 17155, (1, 5, 5): 17155, (1, 5, 6): 17153, (1, 5, 7): 17129, (1, 5, 8): 16874, (1, 5, 9): 15273, (1, 5, 10): 8532, (1, 6, 1): 17047, (1, 6, 2): 17047, (1, 6, 3): 17

In [None]:
empirical_2d_sfs = data_processor.numpy_to_2d_sfs(empirical_array, downsampling={"A":12, "B":8, "C":10}, replicates = 10)
empirical_msfs = data_processor.numpy_to_msfs(empirical_array, downsampling={"A":12, "B": 8, "C":10}, replicates = 10)
empirical_stats = data_processor.calc_sumstats()
