In [1]:
import pandas as pd
import numpy as np
from multiprocessing import Pool
from functools import partial
import glob
import os
import plotly.express as px
import math
import matplotlib.pyplot as plt
from matplotlib import colors
import pybedtools as pbed
from scipy import stats, special
from statsmodels.stats import multitest
import statsmodels.api as sm
import statsmodels.formula.api as smf
import plotly.io as pio
import seaborn as sns

plt.rcParams['svg.fonttype'] = 'none'
base_dir = "http://bartzabel.ls.manchester.ac.uk/orozcolab/SNP2Mechanism/"

In [2]:
metadata_hic = pd.read_csv(f"{base_dir}/metadata/cleaned_HiC_metadata.csv", index_col = 0)
loops_analysed = pd.read_pickle(f"http://bartzabel.ls.manchester.ac.uk/orozcolab/SNP2Mechanism/hic/loops/aggregated_counts/aggregated_normalized_loops_CD4_CD8.pk")

In [3]:
column_name_dict = dict(zip(metadata_hic['folder_name'], metadata_hic['proper_name']))
loops_analysed = loops_analysed.rename(columns=column_name_dict)
loops_counts_melted = pd.melt(loops_analysed, id_vars=['chrA', 'A_start', 'A_end', 'chrB', 'B_start', 'B_end', 'FDR', 'DETECTION_SCALE', 'distance_bin'], 
        value_vars=loops_analysed.columns.difference(['chrA', 'A_start', 'A_end', 'chrB', 'B_start', 'B_end', 'FDR', 'DETECTION_SCALE', 'distance_bin']),
        var_name="proper_name",value_name="interaction_strength")
loops_counts_melted = loops_counts_melted.merge(metadata_hic[["patient","cell_type","condition","proper_name"]], on = "proper_name")

In [10]:
chrom, start_A, start_B = "12", 6755000, 6825000
gene = "CD4"
filtered_df = loops_counts_melted[(loops_counts_melted["chrA"] == chrom) & (loops_counts_melted["A_start"] == start_A) & (loops_counts_melted["B_start"] == start_B)]
filtered_df["cell_type"] = filtered_df["cell_type"].map({"CD4": "CD4", "CD8": "CD8", "CD4_SF": "CD4", "CD8_SF": "CD8"})
# Create the strip plot
fig = px.strip(filtered_df, x="cell_type", y="interaction_strength", color="condition",
               hover_name="proper_name", hover_data=["interaction_strength"],
               color_discrete_map={"Healthy": "blue", "Diseased": "red"},
               title=f'Loop Strength of {gene} loop across samples',
               labels={"cell_type": "Cell Type", "interaction_strength": "interaction strength"})

# Customizing the marker appearance
fig.update_traces(marker=dict(size=8, opacity=0.8, line=dict(width=1, color='DarkSlateGrey')))
fig.update_layout(
    width=600,  # Set the width of the figure in pixels
    height=700  # Set the height of the figure in pixels
)
# Enhancing layout
fig.update_layout(
    title={'text': f'Loop Strength of {gene} loop across samples', 'y':0.95, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top'},
    xaxis_title="Cell Type",
    yaxis_title="Loop Strength",
    legend_title="Condition",
    font=dict(family="Arial, sans-serif", size=16),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

# Show the plot
fig.show()
fig.write_image(f"figures/loop_strength_{gene}.svg")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [11]:
chrom, start_A, start_B = "2", 86765000, 86805000
gene = "CD8"
filtered_df = loops_counts_melted[(loops_counts_melted["chrA"] == chrom) & (loops_counts_melted["A_start"] == start_A) & (loops_counts_melted["B_start"] == start_B)]
filtered_df["cell_type"] = filtered_df["cell_type"].map({"CD4": "CD4", "CD8": "CD8", "CD4_SF": "CD4", "CD8_SF": "CD8"})
# Create the strip plot
fig = px.strip(filtered_df, x="cell_type", y="interaction_strength", color="condition",
               hover_name="proper_name", hover_data=["interaction_strength"],
               color_discrete_map={"Healthy": "blue", "Diseased": "red"},
               title=f'Loop Strength of {gene} loop across samples',
               labels={"cell_type": "Cell Type", "interaction_strength": "interaction strength"})

# Customizing the marker appearance
fig.update_traces(marker=dict(size=8, opacity=0.8, line=dict(width=1, color='DarkSlateGrey')))
fig.update_layout(
    width=600,  # Set the width of the figure in pixels
    height=700  # Set the height of the figure in pixels
)
# Enhancing layout
fig.update_layout(
    title={'text': f'Loop Strength of {gene} loop across samples', 'y':0.95, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top'},
    xaxis_title="Cell Type",
    yaxis_title="Loop Strength",
    legend_title="Condition",
    font=dict(family="Arial, sans-serif", size=16),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

# Show the plot
fig.show()
fig.write_image(f"figures/loop_strength_{gene}.svg")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

