In [1]:
import numpy as np
import pandas as pd
import csv
import matplotlib.pyplot as plt
import random
import time
from scipy import stats

In [2]:
def read_rama(path):
    with open(path, 'r') as file:
        phi = []
        psi = []
        for row in file:
            if row.split()[0] == '#':
                continue
            if row.split()[0] == '@': 
                continue
            if row.split()[0] == '@TYPE': 
                continue
            phi.append(float(row.split()[0])) 
            psi.append(float(row.split()[1]))

        df = pd.DataFrame(columns=['phi', 'psi'])
        df['phi'] = phi
        df['psi'] = psi
        df['weight'] = 1 / len(df.index)
        #df['weight_psi'] = 1 / len(df.index)

    return df

In [30]:
def randomizer(mult):

    dih = random_dihedral()
    const = random_constant(3)
    
    mult[random.randrange(0,len(mult))] = random.randrange(1,6)
    
    return dih, const, mult


def random_dihedral():

    return [360 * random.random() - 180 for i in range(4)]


def random_constant(limit):

    return [limit * random.random() for i in range(4)]

####################################################################################################

def make_weights(rama_df, dih, const, mult, beta):
        
    rama_df = rama_df.assign(weight=lambda x: weight(x.phi, x.psi, dih, const, mult, beta))
    #rama_df = rama_df.assign(weight_psi=lambda x: weight_psi(x.psi, dih, const, mult, beta))


    rama_df['weight'] = rama_df['weight'] / np.sum(rama_df['weight'])
    #rama_df['weight_psi'] = rama_df['weight_psi'] / np.sum(rama_df['weight_psi'])
    return rama_df

####################################################################################################

def weight(phi, psi, dih, const, mult, beta):

    d2r = np.pi

    gd_42 = (const[0] * ( 1 + np.cos(mult[0] * (psi * d2r) - (dih[0] * d2r))))
    gd_43 = (const[1] * ( 1 + np.cos(mult[1] * (phi * d2r) - (dih[1] * d2r))))        
    gd_44 = (const[2] * ( 1 + np.cos(mult[2] * (phi * d2r) - (dih[2] * d2r))))
    gd_45 = (const[3] * ( 1 + np.cos(mult[3] * (psi * d2r) - (dih[3] * d2r))))

    return np.exp(-1 * beta * (gd_42 + gd_43 + gd_44 + gd_45))

In [50]:
def make_matrix(df, positions, dimension):

    values = np.vstack([df['phi'], df['psi']])

    kernel = stats.gaussian_kde(values,bw_method = 0.5, weights=df['weight'])

    mat = np.reshape(kernel(positions).T, dimension)
    #print(mat)  
    return mat

###################################################################################################

def make_grid():
    X, Y = np.mgrid[-180:180:72j, -180:180:72j]

    positions = np.vstack([X.ravel(), Y.ravel()])

    dimension = X.shape

    return positions, dimension

###################################################################################################

def efficiency_analysis(greta_df):
       
    s_weight = np.power(np.sum(greta_df['weight']), 2)
    s2_weight = np.sum(np.power(greta_df['weight'], 2))
    #print(s_weight/(s2_weight*len(greta_df.index)))
    return s_weight/(s2_weight*len(greta_df.index))

###################################################################################################

def score_comp(amber_mat, greta_mat):
    #we will use entropy as the sum over bins of REF * log(REF/TEST)

    score = -1 * np.sum(amber_mat * np.log(amber_mat / greta_mat))
    #print('the score is: %s' % score)
    return score

In [58]:
def writer(df, score_ref, ratio, i, temp, acceptance, score, perc, dih, const, mult):

    if score > score_ref:
        ratio = ratio + 1

        df2 = pd.DataFrame([[score, ratio/(i+1), perc, dih[0], dih[1], dih[2], dih[3], const[0], const[1], const[2], const[3], mult[0], mult[1], mult[2], mult[3]]], columns=['score','ratio','perc','gd42','gd43','gd44','gd45','k42','k43','k44','k45','m42','m43','m44','m45'])
        
        df = df.append(df2)

        score_ref = score

    else:
        if np.exp(-(score  - score_ref)/temp) < acceptance:

            ratio = np.int(ratio) + 1

            df2 = pd.DataFrame([[score, ratio/(i+1), perc, dih[0], dih[1], dih[2], dih[3], const[0], const[1], const[2], const[3], mult[0], mult[1], mult[2], mult[3]]], columns=['score','ratio','perc','gd42','gd43','gd44','gd45','k42','k43','k44','k45','m42','m43','m44','m45'])
            
            df = df.append(df2)

            score_ref = score

    return score_ref, ratio, df

In [65]:
def main(amber_df, greta_df):
    
    score_ref = -1
    ratio = 0
    
    mult = [1,3,1,3]

    #Monte Carlo temperature
    temp = 0.085

    start_time = time.time()

    df = pd.DataFrame(columns=['score','ratio','perc','gd42','gd43','gd44','gd45','k42','k43','k44','k45','m42','m43','m44','m45'])

    positions, dimension = make_grid()
    
    amber_mat = make_matrix(amber_df, positions, dimension)

    for i in range(5000):
        
        acceptance = random.random()
        
        dih, const, mult = randomizer(mult)
        
        score, perc = metropolis(amber_mat, greta_df, positions, dimension, dih, const, mult, beta=1/2.4943389)
        
        score_ref, ratio, df = writer(df, score_ref, ratio, i, temp, acceptance, score, perc, dih, const, mult)

    print('Metropolis done in %s seconds!' %(time.time() - start_time))
    df.sort_values(by=['score'], ascending=False).to_csv('output/ALA_run2.csv')
    return df.sort_values(by=['score'], ascending=False)

####################################################################################################

def metropolis(amber_mat, greta_df, positions, dimension, dih, const, mult, beta):
    
    greta_df = make_weights(greta_df, dih, const, mult, beta)
    greta_mat = make_matrix(greta_df, positions, dimension)
    perc = efficiency_analysis(greta_df)

    if  np.float(perc) >= 0.40:
        score = score_comp(amber_mat, greta_mat)
    else:
        score = -10

    return score, perc

In [66]:
amber_df = read_rama('amber_rama/rama_aladp_amber.xvg')
greta_df = read_rama('greta_rama/rama_aladp_greta_mod1.xvg')

In [67]:
ala = main(amber_df, greta_df)

Metropolis done in 1294.2230365276337 seconds!


In [68]:
ala

Unnamed: 0,score,ratio,perc,gd42,gd43,gd44,gd45,k42,k43,k44,k45,m42,m43,m44,m45
0,-0.035503,0.006274,0.411934,94.624298,173.689353,170.735025,-149.0127,2.107833,0.143527,2.985263,1.403613,5,5,3,3
0,-0.03564,0.007092,0.453531,99.112733,-130.017856,-51.402873,15.02285,0.829624,0.313388,2.251547,2.835332,1,4,3,3
0,-0.035659,0.007005,0.403538,40.663785,-143.360356,12.752022,-34.584355,1.862443,1.720556,1.145213,2.770712,4,5,3,3
0,-0.035761,0.012915,0.570619,88.604001,0.455973,76.576686,-110.086995,1.032097,2.444917,1.771592,0.052851,1,1,3,2
0,-0.035811,0.019048,0.43796,133.805482,154.018893,116.096578,-76.84368,0.052356,2.583104,0.778394,2.827126,1,5,1,3
0,-0.035993,0.054348,0.502136,73.052993,50.259263,36.259732,-165.346067,2.436178,1.093367,1.644887,1.116846,2,1,3,5
0,-0.036047,0.363636,0.444956,-109.084321,-41.337599,176.597491,167.92155,0.763325,2.705677,0.674407,2.449619,3,3,4,2
0,-0.036531,0.333333,0.475639,-143.138389,104.90858,-121.643287,-124.7914,2.928211,0.712034,1.959908,0.411291,3,3,4,5
0,-0.036706,0.666667,0.443975,119.156398,-53.057801,-119.467477,-79.262159,2.678229,0.801527,1.542947,1.949589,3,5,1,3
0,-0.036815,1.0,0.686287,46.524342,167.103425,-29.350363,13.263472,1.072151,0.934875,1.956125,0.439037,1,3,1,3
