In [4]:
import numpy as np
import numpy.lib.recfunctions as rfn
import matplotlib
import matplotlib.pyplot as plt
import starfile
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
from scipy.ndimage import binary_dilation, binary_erosion

In [5]:
# import seaborn and set context for nicer plotting style
import seaborn as sns
sns.set_theme(context='poster', style='ticks', font='Helvetica')

In [33]:
df = starfile.read('../data/particles/HEKFWT_RiboStates_New.star')
optics = df['optics']
df_wt = df['particles']

In [11]:
# are shifts zero?
xshift = data_wt['rlnOriginXAngst']
yshift = data_wt['rlnOriginYAngst']
zshift = data_wt['rlnOriginZAngst']
assert all([c == 0. for c in [xshift.min(), xshift.max(), yshift.min(), yshift.max(), zshift.min(), zshift.max()]])

In [14]:
# get the pixel size and check whether its equal over the list
pixel_size = 1  # in A
if np.all(data_wt['rlnPixelSize'] == data_wt['rlnPixelSize'][0]):
    pixel_size = data_wt['rlnPixelSize'][0]
else:
    print('pixel size not identical over dataset')

In [63]:
# adjust coordinates to A
df_wt['x_angst'], df_wt['y_angst'], df_wt['z_angst'] = df_wt['rlnCoordinateX'] * pixel_size, \
                                                       df_wt['rlnCoordinateY'] * pixel_size, \
                                                       df_wt['rlnCoordinateZ'] * pixel_size

In this dataset classification information was annotated in the rlnMicrographName columns (see below).

First part of the name is the tomogram identifier, they also contain date information (the ones without date were also collected on the same date). The last three parts of the name indicate active/hibernating, membrane/soluble/uncertain (Mem/Sol/Unk), and if membrane bound also translocon state NCLN/OST/TRAP.

In [24]:
df_wt[::10000]['rlnMicrographName']

0                 tomo1.mrc.tomostar.active.Mem.NCLN
10000      tomo210602_11.mrc.tomostar.active.Mem.OST
20000     tomo210821_294.mrc.tomostar.active.Mem.OST
30000      tomo210813_11.mrc.tomostar.active.Mem.OST
40000      tomo200528_72.mrc.tomostar.active.Mem.OST
50000             tomo66.mrc.tomostar.active.Mem.OST
60000     tomo210821_247.mrc.tomostar.active.Mem.OST
70000          tomo210602_29.mrc.tomostar.active.Sol
80000         tomo210624_120.mrc.tomostar.active.Sol
90000          tomo210813_78.mrc.tomostar.active.Sol
100000         tomo210813_21.mrc.tomostar.active.Unk
110000        tomo210813_125.mrc.tomostar.active.Unk
120000        tomo210730_140.mrc.tomostar.active.Unk
130000         tomo210624_89.mrc.tomostar.active.Unk
Name: rlnMicrographName, dtype: object

We need to split it to extract the information. After splitting, I added these classes as columns to the pandas dataframe.

In [35]:
# ribosome states
df_wt['tomogram'] = [name.split('.')[0] for name in data_wt['rlnMicrographName']]  # first get the tomogram name
df_wt['date'] = [n.split('_')[0] if len(n.split('_')) == 2 else 'tomo' for n in df_wt['tomogram']]
df_wt['activity'] = [name.split('.')[3] for name in data_wt['rlnMicrographName']]
df_wt['membrane'] = [name.split('.')[4] for name in data_wt['rlnMicrographName']]
df_wt['translocon'] = [name.split('.')[5] if len(name.split('.')) == 6 
                            else 'Unk' for name in data_wt['rlnMicrographName']]
df_wt['state'] = [tl if ms == 'Mem' else ms for ms, tl in zip(df_wt['membrane'], df_wt['translocon'])]

Get some additional annotations from other particle lists. Particle identity between lists is matched based on subtomogram name or rotation angle similarity (both are not ideal, but we have to deal with it). This part of the code is a bit custom, but it will be very situational. So, I decided not to wrap it with functions.

This will open lists that were classified based on elongations state and append the information as a new column to the dataframe.

In [58]:
base = '../data/particles/HEKFWT_'
names = ['Dec', 'Post', 'Pre', 'Pre+', 'Rot1', 'Rot1+', 
         'Rot2', 'RotIdle', 'Translocation', 'UnRotIdle']
df_wt['elongation'] = ['Unk', ] * df_wt.shape[0]

for n in names:
    data_temp = starfile.read(base + n + '.star')['particles']
    subt_temp = [name.split('/')[-1] for name in data_temp['rlnImageName']]
    for i, subtomo in enumerate(df_wt['rlnImageName']):
        if subtomo.split('/')[-1] in subt_temp:
            df_wt.loc[i, 'elongation'] = n
    print('freq ' + n + ' : ', len(subt_temp) / df_wt.shape[0])


freq Dec :  0.20081875697804244
freq Post :  0.10056568663937476
freq Pre :  0.028909564570152587
freq Pre+ :  0.2979903237811686
freq Rot1 :  0.0394491998511351
freq Rot1+ :  0.031231857089691107
freq Rot2 :  0.15303312244138445
freq RotIdle :  0.03387420915519166
freq Translocation :  0.04308894678079643
freq UnRotIdle :  0.05630815035355415


Here we open annotations of presence of the trap and ccdc47 complexes in multipass (NCLN) translocons. I combine the data in a column at the end that has an overview of all different types of translocons and soluble and unassigned states. This is the classification we used for the main figures of the manuscript.

In [65]:
base = '../data/particles/MP_TRAP{trap}CCDC47{ccd}.star'
# a = absent, p = present
names = [('a', 'a'), ('a', 'p'), ('p', 'a'), ('p', 'p')]
df_wt['trapccdc'] = ['Unk', ] * df_wt.shape[0]

for n in names:
    print(n)
    data_temp = starfile.read(base.format(trap=n[0], ccd=n[1]))['particles']
    rot, tilt, psi = data_temp['rlnAngleRot'], data_temp['rlnAngleTilt'], data_temp['rlnAnglePsi']
    for i, (r, t, p) in enumerate(zip(df_wt.rlnAngleRot, df_wt.rlnAngleTilt, df_wt.rlnAnglePsi)):
        # try to find matches of the rotations
        # might be better to do it on positions
        present = np.logical_and(np.logical_and(r == rot, t == tilt), p == psi)
        if np.sum(present) > 1:
            print('not unique')
        elif np.sum(present) == 1:
            df_wt.loc[i, 'trapccdc'] = n[0] + n[1]
#     print('freq ' + n + ' : ', len(subt_temp) / table_wt.shape[0])

trapccdc_dict = {'aa': '', 'ap': 'CCDC47', 'pa': 'TRAP', 'pp': 'TRAPCCDC47'}
comb_state = np.array(['NCLN' + trapccdc_dict[trapccdc] if (st == 'NCLN' and trapccdc != 'Unk') 
                       else st for st, trapccdc in zip(df_wt.state, df_wt.trapccdc)])
df_wt['state_full'] = comb_state

('a', 'a')
('a', 'p')
not unique
not unique
not unique
not unique
('p', 'a')
not unique
not unique
('p', 'p')


## Neighbour density

In [71]:
from neighbours import neighbour_position_3d

In [73]:
relative_coords_mem, plane_norm = neighbour_position_3d(df_wt[df_wt.activity=='active'], class_column_name='state',
                                                    center_classes=['NCLN', 'NCLN+TRAP', 'OST', 'TRAP'],
                                                    neighbour_classes=['NCLN', 'NCLN+TRAP', 'OST', 'TRAP'], #, 'Unk'],
                                                    plane_norm=[0.25, 0.40, 0.88])

# %matplotlib qt
# fig, ax = plot_3d(relative_coords_mem, plane_norm)
# plt.show()

%matplotlib inline
fig, ax, hist, edges = density_plot(relative_coords_mem, (-550, 550), 15, (6, 6), probability=True, 
                                    tick_labels=[-500, -250, 0, 250, 500], vrange=(0, 0.008))

AttributeError: 'DataFrame' object has no attribute 'x'