In [1]:
import numpy as np 
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
import glob

In [2]:
def seq_to_int(seq):
	d = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
	return np.array([d[x] for x in list(seq)])

def get_paths():
	paths = glob.glob("../../data/barcode_counts/20230907_barcode/*")
	paths += glob.glob("../../data/barcode_counts/20231207_barcode/*")
	map = {}
	for x in paths:
		gc = x.split('/')[-1].split('_')[0].split('-')[0]
		rep = x.split('/')[-1].split('_')[0].split('-')[1]
		xna = x.split('/')[-1].split('_')[1]
		map[(gc, rep, xna)] = x

	return map

map = get_paths()

def import_data(gc, rep, map, df_map):
	RNA_file = map[(gc, rep, 'RNA')]
	DNA_file = map[(gc, rep, 'DNA')]

	df_DNA = pd.read_csv(DNA_file, names=["ct_0", "barcode"], delim_whitespace=True)
	df_RNA = pd.read_csv(RNA_file, names=["ct_1", "barcode"], delim_whitespace=True)
	
	df_outer = df_DNA.merge(df_RNA, on="barcode", how="outer").fillna(0)
	df = df_outer.merge(df_map, on="barcode", how="inner")
	df['ct'] = df['ct_1'] + df['ct_0']
	df['relative_counts'] = (df.ct_1 + 1) / (df.ct_0 + 1)
	df['int_promoter'] = df['promoter'].apply(seq_to_int)
	df['int_wt'] = df['wt_seq'].apply(seq_to_int)
	return df

def expression_shift(df_input, prom):
	df = df_input.loc[df_input.name == prom, :]
	mean_exp = np.mean(df.relative_counts)
	exp_shift = np.zeros([4, 160])
	for i in range(len(df)):
		for j in range(160):
			exp_shift[int(df.int_promoter.values[i][j]), j] += (df.relative_counts.values[i] - mean_exp)/mean_exp

	return exp_shift

def mutual_information(df_input, prom, drop_out):
	df = df_input.loc[df_input.name == prom, :]
	arr = np.vstack(df.int_promoter.values)
	freq_mat = np.zeros([2, 4, 160])
	for i in range(len(df)):
		for j in range(160):
			freq_mat[0, df.int_promoter.values[i][j], j] += df.ct_0.values[j]
			freq_mat[1, df.int_promoter.values[i][j], j] += df.ct_1.values[j]
	return freq_mat

In [3]:
# Import data
df_map = pd.read_csv("./interactive_footprints/20220514_mapping_processed.csv")
df_meta = pd.read_csv('./interactive_footprints/20231207_footprints_meta.csv')
df_regulonDB = pd.read_csv('./interactive_footprints/regulonDB_meta.csv')

df_fp = pd.read_csv('./interactive_footprints/20231207_footprints.csv')
df_fp = df_fp[(df_fp.replicate == 1) & (df_fp.d == 1)]

df_exshift = pd.read_csv('./interactive_footprints/20231207_exshifts.csv')
df_exshift = df_exshift[df_exshift.replicate == 1]

  df_exshift = pd.read_csv('./interactive_footprints/20231207_exshifts.csv')


In [4]:
def plot_footprint(footprint, ax, promoter, growth_condition):

	max_signal = max(footprint)
	ax.set_ylim(top=max_signal*1.15)

	x = np.arange(-114, 44)
	#shiftcolors = [('#D56C55' if exshift > 0 else '#738FC1') for exshift in exshift_list]
	ax.bar(x, footprint, color='#738FC1', edgecolor=None, linewidth=0)
	ax.set_title(f'{promoter} - {growth_condition}', fontsize=8, loc='left')

	ax.tick_params(axis='x', labelsize=6)
	ax.tick_params(axis='y', labelsize=6)

def plot_exshift(exshift, ax_ex):
	vmax = np.max(np.abs(exshift))
	vmin = -vmax

	div_colors = sns.diverging_palette(258, 16, s=56, l=50, n=15, sep=5, center='light', as_cmap=True)
	
	hm = sns.heatmap(exshift, cmap=div_colors, ax=ax_ex, vmin=vmin, vmax=vmax, center=0)
	
	cbar = hm.collections[0].colorbar
	cbar.ax.set_position([ax_ex.get_position().x1 + 0.01, ax_ex.get_position().y0, 0.02, ax_ex.get_position().height])
	#cbar.set_label('Expression\nshift', rotation=90, labelpad=15, fontsize=14)
	cbar.ax.tick_params(labelsize=5) 
	
	ax_ex.set_yticklabels(['A', 'C', 'G', 'T'], rotation=360, fontsize=6)

	# Explicitly set the x-ticks to match your desired labels
	tick_positions = np.arange(15, 161, 20)  # Adjust the divisor to match your number of labels
	tick_labels = np.arange(-100, 41, 20)
	ax_ex.set_xticks(tick_positions)  # Set the positions for the x-ticks
	# Set the labels for the x-ticks and rotate them
	ax_ex.set_xticklabels(tick_labels, rotation=360, fontsize=6)  # Set the labels for the x-ticks and rotate them

In [5]:
def plot_regseq(promoter):
	# Filter data for the given promoter
	df_fp_promoter = df_fp[df_fp['promoter'] == promoter]
	df_exshift_promoter = df_exshift[df_exshift['promoter'] == promoter]
	
	growth_conditions = df_fp_promoter['growth_condition'].unique()

	fp_list = []
	exshift_list = []

	for gc in growth_conditions:
		df_fp_gc = df_fp_promoter[df_fp_promoter['growth_condition'] == gc]
		df_exshift_gc = df_exshift_promoter[df_exshift_promoter['growth_condition'] == gc]

		footprint = df_fp_gc['mut_info'].values
		exshift = np.zeros((4, 160))
		for i in range(4):
			exshift[i, :] = df_exshift_gc[df_exshift_gc['base'] == (i+1)]['expression_shift'].values

		fp_list.append(footprint)
		exshift_list.append(exshift)
		
	for pdf_index in range(3):
		fig, axes = plt.subplots(10, 2, figsize=(8.5, 11))
		fig.text(0.00, 0.5, 'Information (bits)', va='center', ha='center', rotation='vertical', fontsize=10)
		fig.text(0.98, 0.5, 'Expression shift', va='center', ha='center', rotation='vertical', fontsize=10)

		totnum = 10 if pdf_index < 2 else 11
		for i in range(totnum):
			ax_fp = axes[i][0]
			ax_ex = axes[i][1]
			footprint = fp_list[pdf_index * 10 + i]
			exshift = exshift_list[pdf_index * 10 + i]
			growth_condition = growth_conditions[pdf_index * 10 + i]

			if i == 0:
				plot_footprint(footprint, ax_fp, promoter, growth_condition)
				plot_exshift(exshift, ax_ex)
			else:
				plot_footprint(footprint, ax_fp, promoter, growth_condition)
				plot_exshift(exshift, ax_ex)

			if i == 9:
				ax_fp.set_xlabel('Position relative to TSS', fontsize=9)
				ax_ex.set_xlabel('Position Relative to TSS', fontsize=9)
		
		plt.tight_layout()
		plt.savefig('./all_footprints/{}_{}.pdf'.format(promoter, pdf_index), bbox_inches='tight')
		plt.close()

In [6]:
promoters = list(df_fp['promoter'].unique())

for promoter in promoters[:1]:
	plot_regseq(promoter)