In [None]:
# this is the main class
# facilitating property extraction and plotting
from MDPropTrack.analysis import PropertyAnalyser

# these are additional classes 
# implementing calculation of some key properties for lipids and proteins
from MDPropTrack.analysis import LipidPropertyCalculator, ProteinPropertyCalculator

`MDPropTrack` builds on [`MDAnalysis`](https://www.mdanalysis.org/) library for MD data analysis.It's highly recommended to familiarise yourself with Universe class structure, trajectory processing and transformation, and selection language.

`PropertyAnalyser` is the main class of the package. Apart from property extraction, it provides flexible plotting of time series. You can assess convergence of the properties by ploting autocorrelation time vs simulation time.

An indicator of time series convergence is that autocorrelation time plot reached a plateau.

The methods for autocorrelation analysis in `PropertyAnalyser` were adapted from [emcee tutorial](https://emcee.readthedocs.io/en/stable/tutorials/autocorr/). I also suggest reading the following paper on [convergence in Molecular Dynamics](https://www.nature.com/articles/s42004-024-01114-5#Sec2) to get the idea of autocorrelation applicability.

**Let's overview what you can do with `PropertyAnalyser`**

In [None]:
# quick start 
# create analyser for your system and get properties from an edr file
pa = PropertyAnalyser(edr='path/to/simulation.edr')

# call the following method to extract properties
pa.extract_properties()

# look at the DataFrame
pa.data

# plot extracted properties as time series
pa.plot()

# or plot autocorrelation to assess convergence
pa.plot(plot_convergence=True)

Now, let's get into details:

In [None]:
# here are all the options for PropertyAnalyser

pa = PropertyAnalyser(

    # you can read properties from multiple edr files and/or trajectories
    edr = ['path/to/equilibration.edr', 'path/to/production.edr'],

    # you can provide edr without trj and vice versa
    # trajectories and topologies can be in any format compatible with MDAnalysis
    # note that trajectories and edrs are matched by file name
    # the number of trajectories and edrs doesn't have to be the same
    trj = ['path/to/equilibration.xtc', 'path/to/production.xtc'],

    # for trajectories it's highly recommended to provide a topology file
    # tpr is the best option for GROMACS trajectories
    topol = 'path/to/equilibration.tpr',

    # NOTE! there is no default set of properties to calculate from trajectories
    # you need to provide a list of functions
    # here are ready-to-use options implemented in the package 
    funcs = [

        # lipid properties

        # protein properties
    ],

    # you can pass a list of names for properties calculated by each function
    # otherwise they will be named Prop1, Prop2 etc.
    func_names = [''],

    # by default trajectories are transformed by uwrapping atom groups in a simulation box
    # you can also specify groups for centering and rot-trans fit
    # by default 'protein' 

    # note 
    center_group='protein',
    rot_trans_group='protein' 
)

In [None]:
# you can also tune property extraction

pa.extract_properties(
    
    # time in edrs and trajectories is usually reported in ps
    # and by default PropertyAnalyser converts it to ns
    # pass 'ps' to stay with ps
    tu='ns',

    # a step for trajectory analysis, default 1
    # only relevant for trajectories
    step=1,
    
    # if you pass a list of edrs/trajectories they will be treated as sequential steps
    # but maybe you did several equilibrations with different conditions and want to compare them
    # pass False to treat imput steps as independent runs starting at 0 ps
    sequential=True,
    
    # reading edr is very fast so verbose will only report progress for trajectory analysis
    # default False
    verbose=False
)

`pa.data` is a pandas DataFrame so you can manipulate it as such (but keep in mind that plotting functions rely on a certain df structure).

For example:

In [None]:
# see a list of properties in the DataFrame
print(pa.data.columns)

# save data as csv
pa.data.to_csv('path/to/file.csv')

# reload data into new class instance
pa_new = PropertyAnalyser()
pa_new.data = pd.read_csv('path/to/file.csv', index_col=0)

There are many ways to tune plotting too:

In [None]:
pa.plot(
    properties_to_plot=['Potential', 'Temperature', 'Pressure', 'Volume'],
    plot_convergence=False,
    labels=None,
    x_lab='Time, ns',
    cmap='Set1',
    figure_kwargs=None, 
    style_kwargs={"style": "darkgrid", "rc": {"grid.color": ".6", "grid.linestyle": ":"}},
    sns_kwargs={'alpha': 0.7}
)

You can also create your custom functions to extract some values from trajectories.

Here is a template for you to work with:

In [None]:
class CustomPropertyCalculator:
	"""
	Class template with methods to calculate
	properties from a trajectory
	"""

	def __init__(self, lipid_sel=None, tail_sel=None, leaflet=0):
		"""
		Class atributes hold parameters to be used in methods

		lipid_sel - str, MDAnalysis selection for lipid group for analysis
		
		tail_sel - str, MDAnalysis selection for lipid tails
		that will be used by CalcOrderParameter

		leaflet - int, leaflets to use for property averaging 
		Valied for CalcAreaPerLipid
		-1 - lower
		1  - upper
		0  - both
		"""
		self.lipid_sel = lipid_sel
		self.tail_sel = tail_sel
		self.leaflet = leaflet

	def CalcAreaPerLipid(self, system, step=1, verbose=False):
		"""
		Calculate average area per lipid

		system - MDAnalysis Universe, trajectory for analysis
		step - int, step for trajectory analysis
		verbose - bool, report trajectory analysis progress

		Also requires:
		self.lipid_sel - atom selection for lipids in the bilayer.
		Atoms used to identify leaflets.
		These atoms will also be used to perform the Voronoi tessellation.
		self.leaflet - leaflet(s) to perform averaging for 

		Returns
		list(floats)
		"""

		# binning 
		n_bins = int(system.dimensions[0] // 10)
		
		# assign leaflets
		leaflets = lpp.leaflets.assign_leaflets.AssignLeaflets(
		  universe = system,
		  lipid_sel = self.lipid_sel,
		  n_bins = n_bins 
		)
		leaflets.run(
			step = step,
			verbose = verbose
		)
		
		# compute apl
		apl = lpp.analysis.area_per_lipid.AreaPerLipid(
			universe = system,
			lipid_sel = self.lipid_sel,
			leaflets = leaflets.leaflets
		)
		apl.run(
			step = step,
			verbose = verbose
		)

		# choose the leaflet(s) to compute metrics for
		if self.leaflet == 0:
			leaflet_vals = [-1, 1]
		else:
			leaflet_vals = [self.leaflet]

		# compute mean in selected group for each frame
		mask = np.isin(leaflets.leaflets, leaflet_vals)
		apl_by_frame = [
			np.nanmean(apl.areas[mask[:, i], i]) for i in range(apl.areas.shape[1])
		]
		
		return apl_by_frame