In [89]:
import MDAnalysis as mda
from MDAnalysis.analysis import align
import warnings
import numpy as np
import matplotlib.pyplot as plt
import os

warnings.filterwarnings('ignore') # suppress some MDAnalysis warnings about PSF files
print("Using MDAnalysis version", mda.__version__)


aminoacids = [
    "ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY", "HIS", 
    "ILE", "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL"
]
results_folder = "resultats"
data_folder = "dades"
molecule_file = input("quin fitxer de molecules vols analitzar? ")
molecule_name = molecule_file.split(".")[0]

def folder_check(folder_name):
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

folder_check(results_folder)
folder_check(results_folder + "/" + molecule_name)

#funcions definides

def histogram(hist_data, hist_name):
    """
    prints and saves a histogram with the given distribution
    """
    hist_data = np.array(hist_data)
    
    fig, ax = plt.subplots()
    ax.hist(hist_data, bins=20 )
    # plot the xdata locations on the x axis:
    ax.plot(hist_data, 0*hist_data, 'd' )
    ax.set_ylabel('Nombre de residuus amb aquesta RMSD')
    ax.set_xlabel('RMSD '+ hist_name)


def average_structure(av_residu, av_univers):
    """
    For a given aminoacid and a universe, returns the average shape of that aminoacid in the universe
    """
    av_univers = av_univers.select_atoms("protein and resname "+av_residu)
    n_frames = len(av_univers.residues)
    if n_frames == 0: 
        return
    n_atoms = len(av_univers.residues[0].atoms)
    with mda.Writer(results_folder + "/" + molecule_name+ "/" +av_residu +'.pdb', n_atoms) as w:
        for ts in range(n_frames):
            if len(av_univers.residues[ts].atoms)== n_atoms:
                w.write(av_univers.residues[ts].atoms)
            else: print("La " + str(ts) +" "+av_residu + " té algun problema, té " + str(len(av_univers.residues[ts].atoms))+ " àtoms en comptes de "+str(n_atoms))
    av_univers=mda.Universe(results_folder + "/" + molecule_name+ "/"+av_residu +'.xtc')
    print(av_univers.residues[0].atoms[0])
    average= align.AverageStructure(av_univers,
                                     ref_frame=0).run()
    distr = average.results.items

    ref = average.results.universe

    ref.atoms.write(results_folder + "/" + molecule_name+ "/" + av_residu +"average.pdb")
    return av_residu


###Main

u = mda.Universe(data_folder+"/"+molecule_file)
#Es determinen les estructures mitjanes
aminoacids_presents = []
average_structure("ALA", u)


"""
for aminoacid in aminoacids:
    a= average_structure(aminoacid, u)
"""
#Es determinen les RMSD respecte les estructures mitjanes

Using MDAnalysis version 2.8.0


quin fitxer de molecules vols analitzar?  1ubq.pdb


OSError: Failed to load from the topology file resultats/1ubq/ALA.xtc with parser <class 'MDAnalysis.topology.MinimalParser.MinimalParser'>.
Error: File does not exist: b'resultats/1ubq/ALA.xtc'

In [5]:
distancies = n_alpha_dist(u)
residues = u.residues
print(distancies[0])

In [None]:
#Aquesta funció l'he definida però al final no ha resultat necessaria
def distortion_distribution(dist_univers, dist_residu, dist_reference):
    """
    for a given aminoacid it calclates the RMSD 
    """
    if len(dist_univers.residues)==0:
        return
    dist_univers = dist_univers.select_atoms("resname "+dist_residu)
    dist_n_atoms = len(dist_reference)
    distribution= []
    
    for res_i in dist_univers.residues:
        if len(res_i.atoms)== dist_n_atoms:
            mda.analysis.align.alignto(res_i.atoms, dist_reference)
            distribution.append( mda.analysis.rms.rmsd(dist_reference.positions, res_i.atoms.positions, weights=None))
    
    histogram(distribution, dist_residu)
    print(len(distribution), distribution)
    

In [52]:
align.AverageStructure??

[0;31mInit signature:[0m
[0malign[0m[0;34m.[0m[0mAverageStructure[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mmobile[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mreference[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mselect[0m[0;34m=[0m[0;34m'all'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfilename[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mweights[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtol_mass[0m[0;34m=[0m[0;36m0.1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmatch_atoms[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstrict[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mforce[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0min_memory[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mref_frame[0m[0;34m=[0m[0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m**[0m[

In [None]:
#Funcions definides per realitzar d'altres càlculs
def max_list(m_list, m_select):
    """
    Given a list of tuples it returns the one that has the max value at a given position
    """
    m_max= m_list[0]
    for element in m_list:
        if element[m_select]>m_max[m_select]:
            m_max = element
    return m_max

def n_alpha_dist(protein):
    """
    For a given protein, it returns the distance between the c_alpha atom and its -COO atached carbon for each residue
    """
    if not isinstance(protein, mda.core.universe.Universe):
        raise TypeError(f"Expected input_value to be of type mda.core.universe.Universe, but got {type(protein).__name__}")
    distances = []
    for res in protein.residues:
        nitrogen = res.atoms.select_atoms("name N")
        carbon_a = res.atoms.select_atoms("name CA")
        if len(nitrogen) == 1 and len(carbon_a) == 1:
            distance = np.linalg.norm(nitrogen.positions[0] - carbon_a.positions[0])
            distances.append((res.resid, res.resname, distance))
        else:
            print(f"Skipping residue {res.resid} ({res.resname}) due to lacking infotmation")
    return distances


(np.int64(1), 'MET', np.float32(1.4755622))
u 2
