# Simulation Vs Inference

We simulate alignment data under a mutation-selection model (SimuEvol), using amino-acids preferences and GTR-matrix as parameters.

Then we infer the GTR-matrix using $\omega$-based model (Hyphy and BayesCode) and compare the differences between the matrices. 

### Requirements to display Amino-acids preferences for this notebook: 

DMS_tools: https://jbloomlab.github.io/dms_tools/index.html

### Requirements for the whole experiment: 

SimuEvol: https://github.com/ThibaultLatrille/SimuEvol

Hyphy: https://github.com/veg/hyphy

BayesCode: https://github.com/bayesiancook/bayescode

Python scripts: https://github.com/ThibaultLatrille/SimuEvol/tree/master/scripts

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from IPython.display import display
from wand.image import Image as WImage
from collections import defaultdict
import os

def plot(plot_name, size='900x'):
    print("Here is the selection plot {0}".format(plot_name))
    img = WImage(filename=plot_name, resolution=480)
    img.transform(resize=size)
    display(img)


current_dir = "/home/thibault/SimuEvol/"
for file in sorted(os.listdir(current_dir + "/data_prefs")):
    prefix = file.strip().replace(".txt", "")
    hyphy_path = "{0}/data_hyphy/{1}".format(current_dir, prefix)
    experiment_path = "{0}/data_figures/{1}.tsv".format(current_dir, prefix)
    if os.path.isdir(hyphy_path) and os.path.isfile(experiment_path):

        prefs_path = "{0}/data_prefs/{1}".format(current_dir, file)
        pdf_path = "{0}/data_figures/{1}.pdf".format(current_dir, prefix)

        nperline = 100
        log = !dms_logoplot {prefs_path} {pdf_path} --nperline {nperline}

        plot(pdf_path)
        
        experiment_file = open(experiment_path, "r")
        header = experiment_file.readline().strip().split("\t")
        master_param = header[1]
        nested_dict = defaultdict(dict)
        for line in experiment_file:
            line_split = line.strip().split("\t")
            experiment = line_split[0]

            for index in range(1, len(header)):
                param = header[index]
                if experiment not in nested_dict[param]:
                    nested_dict[param][experiment] = []
                nested_dict[param][experiment].append(float(line_split[index]))
        
        for param, nested_dict_l1 in nested_dict.items():
            if param != master_param and param[0] != "-":
                my_dpi = 96
                fig = plt.figure(figsize=(1096 / my_dpi, 768 / my_dpi), dpi=my_dpi)
                plt.subplot(211)
                for experiment in sorted(nested_dict_l1.keys(), key=lambda x: x[::-1]):
                    value_list = nested_dict_l1[experiment]
                    mut_bias_range = nested_dict[master_param][experiment]
                    plt.plot(mut_bias_range, value_list, '--', label=experiment)
                plt.xscale('log')
                plt.xlabel('$\lambda$')
                if param[0] == "%":
                    plt.plot(mut_bias_range, mut_bias_range, color="black", label="y=x")
                    plt.yscale('log')
                plt.ylabel(param)
                plt.legend()
                plt.title(prefix)
                plt.tight_layout()
                plt.show()  