ribosome number analysis
01.25

@author: jennifer love

This script calculates the number of ribosomes present on an mRNA using the SunTag method; colocalised mRNA and neongreen (protein) signals represent translating mRNAs. Through normalisation of the number of single protein and mRNA molcules in a translation site, the number of ribosomes per mRNA is calculated.

This first portion of this script is adapted from DaisyVinter/thesis/ribosome_number.ipynb but differs in terms of the ribosome number calculation.

In [12]:
# load relevant packages
import pandas as pd
import scipy.spatial.distance
import scipy.optimize
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

#setting global plotting parameters
mpl.rc('font',family='Arial', size=14)
csfont = {'fontname': 'Arial'}

# output directory
folder = ''

# specify file locations
mrna_file = '' # mRNA data
bright_file = '' # single protein neongreen foci 
single_file = '' # bright neongreen foci (translation sites)

# input image voxels
xy_pixel = 0.014
z_pixel = 0.2

Read in and prepare data

In [2]:
# read in data and rename columns, remove -8 intensity values
mrna = pd.read_csv(mrna_file, sep = '  ', engine = 'python', names = ['x', 'y', 'z', 'intensity', ''])
brights = pd.read_csv(bright_file, sep = '  ', engine = 'python', names = ['x', 'y', 'z', 'intensity', ''])
singles = pd.read_csv(single_file, sep = '  ', engine = 'python', names = ['x', 'y', 'z', 'intensity', ''])
mrna = mrna.loc[mrna['intensity'] != -8]
brights = brights.loc[brights['intensity'] != -8]
singles = singles.loc[singles['intensity'] != -8]

# convert to um using pixel dimensions
mrna['x'] = mrna['x']*xy_pixel
mrna['y'] = mrna['y']*xy_pixel
mrna['z'] = mrna['z']*z_pixel
brights['x'] = brights['x']*xy_pixel
brights['y'] = brights['y']*xy_pixel
brights['z'] = brights['z']*z_pixel
singles['x'] = singles['x']*xy_pixel
singles['y'] = singles['y']*xy_pixel
singles['z'] = singles['z']*z_pixel

# remove bright translation sites from the full data to give just single proteins
just_singles = singles[~singles.isin(brights)].dropna()

# create data just based on position, without intensity etc

mrna_pos = np.array(mrna[['x', 'y', 'z']])
singles_pos = np.array(singles[['x', 'y', 'z']])
brights_pos = np.array(brights[['x', 'y', 'z']])

Assess colocalisation of mRNAs and single protein signals

In [None]:
# assess colocalisation of singles and mRNAs

distances = scipy.spatial.distance.cdist(singles_pos, mrna_pos, metric = 'euclidean')
opt_index = scipy.optimize.linear_sum_assignment(distances)

min_dist = []
for i in zip(opt_index[0], opt_index[1]):
    min_dist.append(distances[i])

for i in zip(opt_index[0], opt_index[1]):
    singles.loc[i[0], 'closest_mrna'] = i[1]
    mrna.loc[i[1], 'closest_ng'] = i[0]
    singles.loc[i[0], 'distance'] = distances[i]
    mrna.loc[i[1], 'distance'] = distances[i]

coloc = singles.loc[singles['distance'] <= 0.4]
non_coloc = singles[~singles.isin(coloc)].dropna(subset = ['x'])

print(coloc)

Assess colocalisation of mRNAs and bright neongreen foci

In [4]:
# assess colocalisation of translation sites and mRNAs

distances_bright = scipy.spatial.distance.cdist(brights_pos, mrna_pos, metric = 'euclidean')
opt_index_bright = scipy.optimize.linear_sum_assignment(distances_bright)

min_dist = []
for i in zip(opt_index_bright[0], opt_index_bright[1]):
    min_dist.append(distances_bright[i])

for i in zip(opt_index_bright[0], opt_index_bright[1]):
    brights.loc[i[0], 'closest_mrna'] = i[1]
    mrna.loc[i[1], 'closest_bright'] = i[0]
    brights.loc[i[0], 'distance'] = distances_bright[i]
    mrna.loc[i[1], 'distance_to_bright'] = distances_bright[i]

bright_coloc = brights.loc[brights['distance'] <= 0.4]
bright_no_coloc = brights[~brights.isin(bright_coloc)].dropna(subset = ['x'])

Calculate ribosome number, taking into account number of mRNAs in each translation site (important for aggregates of mRNAs)

In [None]:
# retrieve the mRNA intensity data

mrna['IDs'] = [str(i) for i in range(0, mrna.shape[0])]
print(mrna)
mrna = mrna.rename(columns={"intensity": "mrna_intensity"})
mrna['IDs'] = mrna['IDs'].astype(float)

# merge the colocalised translation sites with the mRNA data to give paired measurements of intensity

coloc_brights_mRNAs = bright_coloc.merge(mrna, left_on='closest_mrna', right_on='IDs')
print(coloc_brights_mRNAs)

# merge the colocalised single proteins with the mRNA data to give paired measurements of intensity

coloc_singles_mRNAs = coloc.merge(mrna, left_on='closest_mrna', right_on='IDs')
print(coloc_singles_mRNAs)

In [None]:
# extract the intensities of a single protein and a single mRNA for normalisation

single_protein = just_singles['intensity'].median()
print(single_protein)
single_mrna = mrna['mrna_intensity'].median()
print(single_mrna)

Plot the distributions of mRNAs and single proteins to ensure the median is representative for normalisation

In [None]:
# plotting the median mRNA intensity on the distribution (to check its at the peak)
plt.hist(mrna['mrna_intensity'], bins=100)
plt.axvline(x = single_mrna, c = 'red')
plt.show()

In [None]:
# plotting the median single protein intensity on the distribution (to check its at the peak)
plt.hist(just_singles['intensity'], bins=70)
plt.axvline(x = single_protein, c = 'red')
plt.show()

In [9]:
# normalise the full protein data 
coloc_singles_mRNAs['mrna_intensity_norm'] = coloc_singles_mRNAs['mrna_intensity']/single_mrna
coloc_singles_mRNAs['TS_intensity_norm'] = coloc_singles_mRNAs['intensity']/single_protein

# normalise the bright TS data
coloc_brights_mRNAs['mrna_intensity_norm'] = coloc_brights_mRNAs['mrna_intensity']/single_mrna
coloc_brights_mRNAs['TS_intensity_norm'] = coloc_brights_mRNAs['intensity']/single_protein

coloc_brights_mRNAs['mrna_intensity_norm'] = coloc_brights_mRNAs['mrna_intensity_norm'].round()

Plot the normalised intensities of bright translation sites against mRNAs to visualise the relationship and inspect the data

In [None]:
# Plot the normalised intensities of bright translation sites against mRNAs

plt.figure(figsize=(6,4))
p = sns.regplot(data=coloc_brights_mRNAs, x="mrna_intensity_norm", y="TS_intensity_norm")
plt.ylabel('Translation site intensity (normalised)')
plt.xlabel('mRNA intensity (normalised)')
plt.show()

# get the parameters of the regression
slope, intercept, r, p, sterr = scipy.stats.linregress(x=p.get_lines()[0].get_xdata(), y=p.get_lines()[0].get_ydata())
print(intercept, slope)

Calculate ribosome number:

In [None]:
# Calculate ribosome number (new method - taking into account there could be more than one mRNA per translation site)

correction_factor = (575*0.5 + 759)/(575 + 759) # 

ribosome_number = (coloc_brights_mRNAs['TS_intensity_norm']/coloc_brights_mRNAs['mrna_intensity_norm'])/correction_factor
ribosome_number = pd.DataFrame(ribosome_number)
ribosome_number.columns = (['Ribosome_number'])
print(ribosome_number)

ribosome_number.to_csv(folder + '/ribo_numbers.csv')