In [1]:
# DEPENDENCIES

# import pysam
import pandas as pd
# import HTSeq
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
# from tqdm.notebook import tqdm, trange
# import pstats
# from tqdm import tqdm as tqdm
# import cProfile
import plotly.io as pio
pio.renderers.default = 'notebook_connected'
import numpy as np
pd.set_option('display.max_rows', 100)
import chart_studio.tools as tls

# GLOBAL VARS
HTML_FOLDER ="misc/interactive_figures/"
MALE_COUNTING_FILE = "countings/FC29_counting_df.v3.tsv"
FEMALE_COUNTING_FILE = "countings/FC30_counting_df.v3.tsv"

# GLOBAL PARAMETERS

MIN_SUB_COVERAGE = 0.1 # Threshold used to filter the features (gene or TE) mapped by a read = minimal subject coverage (nb of aligned bases / total nb of feature's bases)
EXPORT_TO_CHART_STUDIO = False
EXPORT_TO_HTML = False


PLOTLY_SHOW_CONFIG = {
  'toImageButtonOptions': {
    'format': 'svg', # one of png, svg, jpeg, webp
    'filename': 'custom_image',
    'height': None,
    'width': None,
    'scale': 1 # Multiply title/legend/axis/canvas sizes by this factor
  }
}

import chart_studio
import chart_studio.plotly as py

chart_studio.tools.set_credentials_file(username='EricCumunel', api_key='ve7YItpOOE4o6GRAWxYv')

In [2]:
saved_FC29_counting_df_v3 = pd.read_csv(MALE_COUNTING_FILE, sep = "\t")
filtered_FC29_counting_df_v3 = saved_FC29_counting_df_v3[saved_FC29_counting_df_v3['Counting'] >= 6]

saved_FC30_counting_df_v3 = pd.read_csv(FEMALE_COUNTING_FILE, sep = "\t")
filtered_FC30_counting_df_v3 = saved_FC30_counting_df_v3[saved_FC30_counting_df_v3['Counting'] >= 2]

FC29_counters = [22232, 3913, 1524, 1476, 1306, 5126, 8887]
FC30_counters = [7332, 489, 101, 452, 280, 1386, 4624]

In [3]:
## Male funnel plot
def draw_funnel_plot(counters, title, fig_name, export_to_chart_studio, export_to_html):
    tot = counters[0]
    filter1 = tot - counters[5]
    filter2 = filter1 - counters[6]

    data = dict(
        number=[tot, filter1, filter2 ],
        Filters=["Reads aligned on a TE", "Reads with at least 10% subject cover", "Reads with less non-overlapping bases on TE"])
    fig = px.funnel(data, x='number', y='Filters', title=title)
    fig.show(config=PLOTLY_SHOW_CONFIG)
    if export_to_chart_studio :
        py.plot(fig, filename = '{}'.format(fig_name), auto_open=False)
    if export_to_html :
        fig.write_html(HTML_FOLDER + fig_name +".html")

draw_funnel_plot(FC29_counters, "Male Dataset","male_funnel_plot", EXPORT_TO_CHART_STUDIO, EXPORT_TO_HTML)

In [None]:
draw_funnel_plot(FC30_counters, "Female dataset")

In [None]:
def draw_counters_pie_chart(counters_1, counters_2):
	labels = ['Aligned on one TE only','Aligned on multiple TE','Aligned in exonic region','Aligned in intronic region']

	# Create subplots: use 'domain' type for Pie subplot
	fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
	fig.add_trace(go.Pie(labels=labels, values=counters_1[1:], name="Male dataset"), 1, 1)
	fig.add_trace(go.Pie(labels=labels, values=counters_2[1:], name="Female dataset"), 1, 2)

	# Use `hole` to create a donut-like pie chart
	fig.update_traces(hole=.3, textinfo='value', hoverinfo="label+value+name")

	fig.update_layout(
		title_text="Context of reads mapped on TE",
		# Add annotations in the center of the donut pies.
		annotations=[dict(text='Male', x=0.20, y=0.5, font_size=20, showarrow=False),
					dict(text='Female', x=0.81, y=0.5, font_size=20, showarrow=False)])
	fig.show(config=PLOTLY_SHOW_CONFIG)

draw_counters_pie_chart(FC29_counters, FC30_counters)

In [None]:
def draw_icicle(df):
    fig = px.icicle(df, path=['Subclass', 'Superfamily', 'Family', 'Insertion'], values='Counting',
                    color='mean_subcov',
                    hover_data=['Counting'],
                    color_continuous_scale='RdBu',
                    color_continuous_midpoint=np.average(df['mean_subcov'],weights=df['Counting'])
                    )
    fig.update_traces(root_color="lightgrey")
    fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
    fig.show()

def draw_sunburst(df):
    fig = px.sunburst(df, path=['Subclass', 'Superfamily', 'Family', 'Insertion'], values='Counting',
                    color='mean_subcov',
                    hover_data=['Counting'],
                    color_continuous_scale='Plasma',
                    # color_continuous_scale='RdBu',
                    # color_continuous_midpoint=np.average(df['mean_subcov'],weights=df['Counting'])
                    color_continuous_midpoint=np.average(0.5)
                    )
    fig.update_traces(root_color="lightgrey")
    fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
    fig.show()

In [None]:
def draw_2_sunburst_charts(counting_1, counting_2):
    fig = make_subplots(rows=1, cols=2, specs=[[{"type": "sunburst"}, {"type": "sunburst"}]])
    def make_sunburst(counting):
        return px.sunburst(counting, path=['Subclass', 'Superfamily', 'Family', 'Insertion'],
        values='Counting',
        color='mean_subcov',
        hover_data=['Counting'],
        color_continuous_scale='Plasma',
        color_continuous_midpoint=np.average(counting['mean_subcov'], weights=counting['mean_subcov'])
        )
    sunburst1 = make_sunburst(counting_1)
    sunburst2 = make_sunburst(counting_2)
    fig.add_trace(sunburst1.data[0], row=1, col=1)
    fig.add_trace(sunburst2.data[0], row=1, col=2)
    fig.show(config=PLOTLY_SHOW_CONFIG)
    fig.write_html(HTML_FOLDER + "TE_LR_RNAseq_sunbursts_comparison.html")
    return fig

sunburst = draw_2_sunburst_charts(filtered_FC29_counting_df_v3, filtered_FC30_counting_df_v3)
# py.plot(sunburst, filename = 'TE_LR_RNAseq_sunbursts', auto_open=False)

In [None]:
family_list = ['POGO', 'Copia_LTR', 'ROO_I', 'Gypsy12_LTR', 'DNAREP1_DM', 'Mariner2_DM', "BARI_DM"]

def get_family_list(counting_df, subclass):
	family_list = list(set(counting_df[(counting_df["Counting"]) & (counting_df["Subclass"] == subclass)]["Family"]))
	return family_list

# family_list = ['MINOS']
def draw_violin_charts(counting_df_1, counting_df_2, family_list):
	color_list = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52']
	while len(family_list) > len(color_list):
		color_list += color_list
	fig = make_subplots(rows=2, cols=1,
	shared_xaxes=True,
	shared_yaxes='all',
	row_titles=["Male", "Female"],
	)

	def make_violin(counting_df, family, color, show_legend):
		tot_nb_of_read = sum(counting_df["Counting"])
		df = counting_df[(counting_df["Family"] == family) & (counting_df["Counting"] > 0)]
		return go.Violin(x=df["Family"],
				y=np.log10(df['Counting']/tot_nb_of_read),
				# y=df["Counting"],
				name=family,
				points="all",
				pointpos=0,
				# box_visible=True,
				meanline_visible=True,
				line_color="black",
				opacity = 0.9,
				customdata = np.stack((df['Insertion'], df['Counting']), axis=-1),
				hovertemplate = ('<b>Insertion</b>: %{customdata[0]}<br>'+'<b>Counting</b>: %{customdata[1]}'),
				legendgroup=family,
				showlegend = show_legend,
				fillcolor=color)


	for row, df in enumerate([counting_df_1, counting_df_2]):
		for col, family in enumerate(family_list):
			fig.add_trace(make_violin(df, family, color_list[col], bool(row)), row=row+1, col=1)
	fig.update_layout()
	fig.show(config=PLOTLY_SHOW_CONFIG)
	return fig

fig = draw_violin_charts(saved_FC29_counting_df_v3, saved_FC30_counting_df_v3, family_list)

In [None]:
def generate_all_violin_plots(male_counting_df, female_counting_df):
	subclass_set = set(list(male_counting_df["Subclass"].unique()) +  list(female_counting_df["Subclass"].unique()))
	for subclass in subclass_set:
		print(subclass)
		family_list = get_family_list(male_counting_df, subclass)
		family_list.extend(get_family_list(female_counting_df, subclass))
		family_list = list(set(family_list))
		new_fig = draw_violin_charts(male_counting_df, female_counting_df, family_list)
		# new_fig.write_html(HTML_FOLDER + "TE_LR_RNAseq_violin_plot.{}.log2.html".format(subclass))
		# py.plot(new_fig, filename = 'TE_LR_RNAseq_violin.{}.log2'.format(subclass), auto_open=False)
generate_all_violin_plots(saved_FC29_counting_df_v3, saved_FC30_counting_df_v3)

In [None]:
def get_insertion_length(insertion_name):
	start, end = insertion_name.split('$')[-2:]
	return int(end) - int(start) + 1

def get_insertion_merged_df(female_counting, male_counting):
	insertion_list = set(list(female_counting["Insertion"]) + list(male_counting["Insertion"]))
	subclass_list = []
	superfamily_list = []
	family_list = []
	male_counting_list = []
	female_counting_list = []
	insertion_length_list = []
	for insertion in insertion_list :
		if insertion in list(female_counting["Insertion"]) and insertion in list(male_counting["Insertion"]):
			insertion_df = female_counting[female_counting["Insertion"] == insertion]
			female_counting_list.append(insertion_df["Counting"].values[0])
			male_counting_list.append(male_counting[male_counting["Insertion"] == insertion]["Counting"].values[0])
			
		elif insertion in list(female_counting["Insertion"]) :
			insertion_df = female_counting[female_counting["Insertion"] == insertion]
			female_counting_list.append(insertion_df["Counting"].values[0])
			male_counting_list.append(0)
		else :
			insertion_df = male_counting[male_counting["Insertion"] == insertion]
			male_counting_list.append(insertion_df["Counting"].values[0])
			female_counting_list.append(0)
		subclass_list.append(insertion_df["Subclass"].values[0])
		superfamily_list.append(insertion_df["Superfamily"].values[0])
		family_list.append(insertion_df["Family"].values[0])
		insertion_length_list.append(get_insertion_length(insertion))

	insertion_merged_df = pd.DataFrame(list(zip(subclass_list, superfamily_list, family_list, insertion_list, insertion_length_list, female_counting_list, male_counting_list )), columns=["Subclass", "Superfamily", "Family", "Insertion", "Length", "Female_counting", "Male_counting"])

	return insertion_merged_df

insertion_merged_df = get_insertion_merged_df(saved_FC30_counting_df_v3, saved_FC29_counting_df_v3)
# print(insertion_merged_df)

In [None]:
df = insertion_merged_df
fig = px.scatter(df, x="Female_counting", y="Male_counting", color="Subclass",
                #  size='petal_length',
				 hover_data=['Subclass', 'Superfamily', 'Family', 'Insertion', 'Female_counting', 'Male_counting', 'Length'],
				#  trendline="ols"
				 )
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(range=[0, 200])
fig.show(config=PLOTLY_SHOW_CONFIG)