In [1]:
import numpy as np
import pandas as pd
import csv
import matplotlib.pyplot as plt
import random
import time

In [2]:
def make_database():#aminos):
    
    path = 'database_csv/GLY.csv' 
    df = pd.read_csv(path)
    df.columns = ['aa', 'phi', 'psi']
        
    df = df.drop('aa', axis =1)
    df['weight'] = 1 /len(df.index) #normalized
    
    return df #IT'S GOOD, DON'T TOUCH

####################################################################################################

def read_rama(path):
    #firstly, we read the .xvg file and make a proper pandas dataframe
    data = open(path, 'r')
    rama = pd.read_csv(data, sep = '\s+')
    rama.columns = ['phi', 'psi', 'aa']

    #in rama.xvg data are organized as \phi \psi aa-num so we have to split the last index
    rama[['type', 'num']] = rama['aa'].str.split('-', 1, expand=True)

    #values sorted by amino acid so that computations can be faster to implement
    rama = rama.drop(['aa', 'num'], axis = 1).sort_values('type').reset_index(drop=True)

    aminos = rama['type'].unique()

    rama_dict = {amino: rama[rama['type'] == amino].drop('type', axis = 1).reset_index() for amino in aminos}
    
    return rama_dict['GLY'] #.sort_values(['row', 'col'])

In [3]:
def randomizer(mult):

    dih = random_dihedral()
    const = random_constant(10)
    
    mult[random.randrange(0,len(mult))] = random.randrange(1,9)
    
    return dih, const, mult


def random_dihedral():

    return [360 * random.random() - 180 for i in range(4)]


def random_constant(limit):

    return [limit * random.random() for i in range(4)]

####################################################################################################

def make_weights(rama_df, dih, const, mult, beta):
        
    rama_df = rama_df.assign(weight=lambda x: weight(x.phi, x.psi, dih, const, mult, beta))

    rama_df['weight'] = rama_df['weight'] / np.sum(rama_df['weight'])
    
    return rama_df


def weight(phi, psi, dih, const, mult, beta):
    
    d2r = np.pi / 180
    
    gd_42 = (const[0] * ( 1 + np.cos(mult[0] * (psi * d2r) - (dih[0] * d2r))))
    gd_43 = (const[1] * ( 1 + np.cos(mult[1] * (phi * d2r) - (dih[1] * d2r))))        
    gd_44 = (const[2] * ( 1 + np.cos(mult[2] * (phi * d2r) - (dih[2] * d2r))))
    gd_45 = (const[3] * ( 1 + np.cos(mult[3] * (psi * d2r) - (dih[3] * d2r))))

    return np.exp(-1 * beta * (gd_42 + gd_43 + gd_44 + gd_45))

In [4]:
def make_matrix(rama_df):

    mat, x1, x2 = np.histogram2d(rama_df['psi'], rama_df['phi'], bins=180, weights=rama_df['weight'], density=True)
            
    return mat

####################################################################################################

def efficiency_analysis(rama_dict):
       
    s_weight = np.power(np.sum(rama_dict['weight']), 2)
    s2_weight = np.sum(np.power(rama_dict['weight'], 2))

    return s_weight/(s2_weight*len(rama_dict.index))

####################################################################################################

def score_comp(mat_db, df_rama):
    
    mat_rama = make_matrix(df_rama)
    
    scores = []

    mat_diff = np.power(mat_db - mat_rama, 2)
    mat_sq = np.power(mat_db, 2)

    num = np.sum(mat_diff)
    den = np.sum(mat_sq)

    score = (num/den)

    return score

In [5]:
def writer(df, score_ref, ratio, i, temp, acceptance, score, perc, dih, const, mult):

    if score < score_ref:
        ratio = ratio + 1

        df2 = pd.DataFrame([[score, ratio/(i+1), perc, dih[0], dih[1], dih[2], dih[3], const[0], const[1], const[2], const[3], mult[0], mult[1], mult[2], mult[3]]], columns=['score','ratio','perc','gd46','gd47','gd48','gd49','k46','k47','k48','k49','m46','m47','m48','m49'])
        
        df = df.append(df2)

        score_ref = score

    else:
        if np.exp(-(score  - score_ref)/temp) > acceptance:

            ratio = np.int(ratio) + 1

            df2 = pd.DataFrame([[score, ratio/(i+1), perc, dih[0], dih[1], dih[2], dih[3], const[0], const[1], const[2], const[3], mult[0], mult[1], mult[2], mult[3]]], columns=['score','ratio','perc','gd46','gd47','gd48','gd49','k46','k47','k48','k49','m46','m47','m48','m49'])
            
            df = df.append(df2)

            score_ref = score

    return score_ref, ratio, df

In [13]:
def main(mat_db, rama_dict):
    
    score_ref = 1.052705
    ratio = 0
    
    mult = [3,5,1,1]

    #Monte Carlo temperature
    temp = 0.03

    start_time = time.time()

    df = pd.DataFrame(columns=['score','ratio','perc','gd46','gd47','gd48','gd49','k46','k47','k48','k49','m46','m47','m48','m49'])

    for i in range(100000):
        
        acceptance = random.random()
        
        dih, const, mult = randomizer(mult)
        
        score, perc = metropolis(mat_db, rama_dict, dih, const, mult, beta=1/2.4943389)
        
        score_ref, ratio, df = writer(df, score_ref, ratio, i, temp, acceptance, score, perc, dih, const, mult)

    print('Metropolis done in %s seconds!' %(time.time() - start_time))
    df.sort_values(by=['score']).to_csv('output/GLY.csv')
    return df.sort_values(by=['score'])

####################################################################################################

def metropolis(mat_db, rama_df, dih, const, mult, beta):
    
    rama_df = make_weights(rama_df, dih, const, mult, beta)
    
    perc = efficiency_analysis(rama_df)

    if  np.float(perc) >= 0.45:
        score = score_comp(mat_db, rama_df)
    else:
        score = 1000

    return score, perc


In [14]:
mat_db = make_matrix(make_database())

rama_dict = read_rama('input/rama_diff_4.xvg')

In [None]:
gly = main(mat_db, rama_dict)

In [None]:
gly