# TP RNA
For a given ribonucleotide chain, the RNA folding problem consists in finding the native fold
among the astronomically large number of possible conformations. The native fold being the
one with the lowest Gibbs free energy, the objective function should be an estimator of this
energy.


In [None]:
# install packages
! pip install -r requirements.txt

### Part 1
Train the objective function, using interatomic distance distributions that are computed from a dataset of known (i.e., experimentally determined) 3D structures

In [2]:
! pip install pandas

Defaulting to user installation because normal site-packages is not writeable
Collecting pandas
  Downloading pandas-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pytz>=2020.1
  Downloading pytz-2023.3-py2.py3-none-any.whl (502 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m502.3/502.3 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting tzdata>=2022.1
  Downloading tzdata-2023.3-py2.py3-none-any.whl (341 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m341.8/341.8 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: pytz, tzdata, pandas
Successfully installed pandas-2.0.2 pytz-2023.3 tzdata-2023.3


In [3]:
# load packages
import numpy as np
import argparse
import math
import pandas as pd
import importlib
# import py file with common functions
import part1 as part1
part1.load()

src file loaded !


In [4]:
importlib.reload(part1)
atoms = part1.read_pdb_file('./data/pdb/4gxy.pdb')
distances = part1.calculate_ca_distances(atoms)


In [6]:
importlib.reload(part1)
reference_frequencies = part1.calculate_frequencies(distances)
reference_frequencies

(         A-A       A-U       A-C       A-G       U-U       U-G       U-C  \
 0   0.168750  0.000000  0.000000  0.000000  0.125000  0.000000  0.000000   
 1        NaN       NaN       NaN       NaN       NaN       NaN       NaN   
 2        NaN       NaN       NaN       NaN       NaN       NaN       NaN   
 3        NaN       NaN       NaN       NaN       NaN       NaN       NaN   
 4   0.000000  0.250000  0.000000  0.250000  0.000000  0.000000  0.000000   
 5   0.066667  0.013333  0.066667  0.086667  0.013333  0.106667  0.066667   
 6   0.091954  0.022989  0.068966  0.195402  0.022989  0.068966  0.045977   
 7   0.000000  0.133333  0.133333  0.466667  0.000000  0.000000  0.200000   
 8   0.095238  0.285714  0.095238  0.190476  0.000000  0.095238  0.047619   
 9   0.000000  0.162162  0.162162  0.216216  0.000000  0.108108  0.027027   
 10  0.058252  0.077670  0.038835  0.135922  0.058252  0.048544  0.077670   
 11  0.033473  0.033473  0.079498  0.121339  0.033473  0.092050  0.066946   

# Part 2

In [8]:
# ! pip install seaborn

Defaulting to user installation because normal site-packages is not writeable
Collecting seaborn
  Downloading seaborn-0.12.2-py3-none-any.whl (293 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m293.3/293.3 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: seaborn
Successfully installed seaborn-0.12.2


In [9]:
import part2 as part2
score_path = './data/scores/'
output_path = './data/figs/'


In [None]:
part2.plot_distributions(score_path, output_path)

# Part 3

In [11]:
import part3 as part3
import part1 as part1

In [12]:
pdb_file = './data/pdb/4gxy.pdb'
atoms = part1.read_pdb_file(pdb_file)
distances = part1.calculate_ca_distances(atoms)
gibbs_energy = part3.calculate_gibbs_free_energy(distances)
print("==="*10,"\nEstimated Gibbs Free Energy:", gibbs_energy,'\n',"==="*10)

Estimated Gibbs Free Energy: 25388.97307537536 
