In [None]:
import numpy as np
from scipy.optimize import minimize
import matplotlib.pyplot as plt
import itertools
import pandas as pd
import scipy
from tqdm.notebook import tqdm  

In [None]:
# Gene orders of variants from dataset
ng_combos = np.array([[1,2,3,4,5],[2, 3, 4, 1, 5], [4,1,2,3,5], [4,2,3,1,5]])




In [None]:
# set genome positions for each variant
gene_lengths = np.array([1325, 813, 830, 1664, 6372])
    
ng_lenpos = []
for pos_pre in ng_combos:
    pos = pos_pre
    gene_len_temp = [gene_lengths[i-1] for i in pos]
    genome_pos = [np.sum(gene_len_temp[0:i]) + (i-1) * 10 for i in pos]
    genome_pos = np.array(genome_pos - np.min(genome_pos))
    ng_lenpos.append(genome_pos)

    

In [None]:
# 10.1128/JVI.76.14.6865-6872.2002 VSV protein masses
# typical amount fo protein in a western blot https://www.abcam.com/en-us/technical-resources/protocols/western-blot
# Assuming that there is 100 ng of Viral protein present
Virus_Prot_kDa = np.sum(np.array([0.39215686 * 47, 0.26666667 * 30, 0.18823529* 27, 0.1254902 * 63 , 0.02745098 * 241]))
g_vir_prot = 1.0e-7 # 100 ng
mol_prot = g_vir_prot/(Virus_Prot_kDa * 1e3)
prot_tot = mol_prot * 6.022e23



In [None]:
ng_data = np.array([[0.282, 0.195, 0.252, 0.251, 0.02 ],
                    [0.162, 0.118, 0.135, 0.578, 0.007],
                    [0.151, 0.271, 0.354, 0.21 , 0.014],
                    [0.108, 0.182, 0.284, 0.415, 0.011]]) * prot_tot
ng_data = ng_data.astype(int)

In [None]:
# objective function for fitting Start-Stop model
def obj_fun(x):
    if np.sum(x) > 1:
        return np.inf
    pred = [1,1,1,1,1]
    for i in range(1,5):
        pred[i] = pred[i-1] - x[i-1]
    lambda_pred = pred/np.sum(pred)
    LL = 0
    LL += np.sum(scipy.stats.poisson.logpmf(ng_data[0], lambda_pred))
    if np.isnan(LL):
        return np.inf
    return LL * -1

In [None]:
# fit model
res = minimize(obj_fun, [0,0,0,.9] , bounds = [[0, 1]], tol = 1e-20)
res

In [None]:
# get prediction for wildtype
pred = [1,1,1,1,1]
for i in range(1,5):
    pred[i] = pred[i-1] - res.x[i-1]
lambda_pred = pred/np.sum(pred)
lambda_pred

In [None]:
# calculate LL for Stop-Start model
SS_list = []
ng_ss_preds = np.array(lambda_pred/np.sum(lambda_pred))[ng_combos - 1]
for i in range(0, ng_data.shape[0]):
    SS_list.append(np.sum(scipy.stats.poisson.logpmf(ng_data[i], ng_ss_preds[i])))
LL_ng_ss = sum(SS_list[1:])


In [None]:
# objective function for fitting ram model
def obj_fun(x):
    p = x[0]
    pred = np.array([p**x for x in ng_lenpos[0]])
    lambda_pred = pred/np.sum(pred)
    LL = 0
    LL += np.sum(scipy.stats.poisson.logpmf(ng_data[0], lambda_pred))
    if np.isnan(LL):
        return np.inf
    return LL * -1

In [None]:
p = pd.read_csv("Folder1-Parameter_Fits/all_viruses_best_fits.csv").iloc[0, -3]

# fit ram model parameter starting at fit from previous dataset
res = minimize(obj_fun,p , bounds = [[0.5, 1]], tol = 1e-20)
p1 = res.x[0]
p1


In [None]:
# calculate log likelihood of RAM model
RAM_list = []
for i in range(0, ng_data.shape[0]):
    labmda_pred = np.array([p1**x for x in ng_lenpos[i]])
    labmda_pred = labmda_pred/np.sum(labmda_pred)
    RAM_list.append(np.sum(scipy.stats.poisson.logpmf(ng_data[i], labmda_pred)))
LL_ng_RAM = sum(RAM_list[1:])

In [None]:
# Generate plots for each varaints

variants = ["NPMGL", "GNPML", "PMGNL", "GPMNL"]

for i in range(len(variants)):
    vr = variants[i]
    vr_data = ng_data[i]/np.sum(ng_data[i])
    ss_pred = ng_ss_preds[i]
    RAM_pred = np.array([p1**x for x in ng_lenpos[i]])
    RAM_pred = RAM_pred/np.sum(RAM_pred)   
    categories = ["N", "P", "M", "G", "L"]
    indices = np.arange(len(categories))
    fig, ax = plt.subplots(figsize=(10, 6))
    
    bar_width = 0.25 

    ax.bar(indices, vr_data, width=bar_width, color='#0072B2', edgecolor='black', label='Observed Data')


    ax.bar(indices - bar_width, ss_pred, width=bar_width, color='yellow', edgecolor='black', label='Start-Stop Prediction')


    ax.bar(indices + bar_width, RAM_pred, width=bar_width, color='red', edgecolor='black', label='RAM Prediction')


    ax.tick_params(axis='y', labelsize=18)
    
    title = f"Variant 3'-{vr}-5'"
    if i == 0:
        title = title + " (WT)"
    ax.set_title(title, fontsize=28)
    ax.set_xticks(indices)
    ax.set_xticklabels(categories, fontsize=20)


    plt.ylim([0, .6])
    
    ax.legend(loc='upper left', fontsize=17, frameon=True, shadow=True)
    ax.set_ylabel('Relative Expression', fontsize=22)
    ax.set_xlabel('Gene', fontsize=22)  
    ax.text(.465, 0.675, 'RAM Log Likelihood: {:.3E}'.format(RAM_list[i]) + '\nStop-Start Log Likelihood: {:.3E}'.format(SS_list[i]) , transform=ax.transAxes, fontsize=15,
    verticalalignment='top', horizontalalignment='right', bbox=dict(facecolor='white', alpha=0.8))
    plt.tight_layout()
    plt.savefig(f"Figures/Fig4/{vr}_predictions.svg")
    plt.show()
        
    





In [None]:
# Print LL in scientific notation
print(f"{LL_ng_RAM:.4e}")  
print(f"{LL_ng_ss:.4e}") 


In [None]:
# Calculate BIC
BIC_RAM = (4 * np.log(prot_tot)) - (2 * LL_ng_RAM)
BIC_ss = (1 * np.log(prot_tot)) - (2 * LL_ng_ss)
print(f"{BIC_RAM:.4e}") 
print(f"{BIC_ss:.4e}") 

