# Spatial Correlation Plotter _ 3

#### Note: Use epi-paint kernel. 

What does this do?
- Build box plots for visualizing the distribution of the fraction of points above a threshold value.

In [None]:
import numpy as _np
import pandas as _pd
import matplotlib.pyplot as _plt
import seaborn as _sns
import os.path as _ospath
import os as _os

In [None]:
folder = '' # Folder name for specific cell.
maximum_threshold = 0.7
minimum_threshold = -0.7
min_radius = 100
step_size = 100
maximum_radius = 1000
correlation_data_folder = _ospath.join(folder, 'Analysis', 'Correlations', str(min_radius) + '_' + str(step_size) + '_' +str(maximum_radius))
correlation_data_files = [f for f in _os.listdir(correlation_data_folder) if f.endswith('.csv')]

output_folder = _ospath.join(folder, 'Analysis', 'Correlations', 'Plots' + '_' + str(min_radius) + '_' + str(step_size) + '_' +str(maximum_radius))
if not _ospath.exists(output_folder):
    _os.makedirs(output_folder)

In [None]:
def fraction_data_above_threshold(data, threshold):
    return _np.sum(data > threshold) / len(data) * 100

def fraction_data_below_threshold(data, threshold):
    return _np.sum(data < threshold) / len(data) * -100

def feature_type(protein_1, protein_2):
    euchromatin = ['S2P', 'S5P', 'SC35', 'H3K4me3', 'H3K27ac']
    heterochromatin = ['H3K9me3', 'H3K27me3', 'Lamin']
    other = ['CTCF']

    if protein_1 in euchromatin and protein_2 in euchromatin:
        return 'eu-eu'
    elif protein_1 in heterochromatin and protein_2 in heterochromatin:
        return 'het-het'
    elif protein_1 in euchromatin and protein_2 in heterochromatin:
        return 'eu_het'
    elif protein_1 in heterochromatin and protein_2 in euchromatin:
        return 'het_eu'
    elif protein_1 in other and protein_2 in euchromatin:
        return 'eu-eu'
    elif protein_1 in euchromatin and protein_2 in other:
        return 'eu-eu'
    elif protein_1 in other and protein_2 in heterochromatin:
        return 'het-het'
    elif protein_1 in heterochromatin and protein_2 in other:
        return 'het-het'
    else:
        return 'other'

In [None]:
data = []
for file in correlation_data_files:
    if 'CTCF' in file:
        continue
    if 'vs' in file: # An easy work around as there are other *.csv files in the folder that are not correlation data.
        protein_1 = file.split('_')[0]
        protein_2 = file.split('_')[2].split('.')[0]
        data_file = _np.loadtxt(_ospath.join(correlation_data_folder, file), delimiter=',')
        data_file = _np.nan_to_num(data_file)
        fraction_above_maximum = fraction_data_above_threshold(data_file, maximum_threshold)
        fraction_below_minimum = fraction_data_below_threshold(data_file, minimum_threshold)
        feature = feature_type(protein_1, protein_2)
        data.append({
            'Protein_Pair': protein_1 + '_' + protein_2,
            'Fraction_Above_Positive_Threshold': fraction_above_maximum, 
            'Fraction_Below_Negative_Threshold': fraction_below_minimum, 
            'Feature_Type': feature
        })
    
data = _pd.DataFrame(data, columns=['Protein_Pair', 'Fraction_Above_Positive_Threshold', 'Fraction_Below_Negative_Threshold', 'Feature_Type'])
data['SC35'] = _np.where(data['Protein_Pair'].str.contains('SC35'), 'Yes', 'No')
filter = ['eu-eu', 'eu_het', 'het_eu']
data = data[data['Feature_Type'].isin(filter)]
data_sc35 = data[data['Protein_Pair'].str.contains('SC35')]
data_not_sc35 = data[~data['Protein_Pair'].str.contains('SC35')]


In [None]:
color_palette_pos = {
    'Yes' : '#ff7e00',
    'No' : '#9d9d9d'
}

color_palette_neg = {
    'Yes' : '#7F5AFF',
    'No' : '#9d9d9d'
}
_plt.figure(figsize=(3, 5))
_sns.boxplot(data = data, x = 'Feature_Type', y = 'Fraction_Above_Positive_Threshold', hue = 'SC35', legend = False, palette = color_palette_pos, dodge = True, whis = [0, 100], linecolor= 'k', fill = True)
_sns.stripplot(data = data, x = 'Feature_Type', y = 'Fraction_Above_Positive_Threshold', hue = 'SC35', legend = False, alpha = 1, palette = 'light:#aaff00', linewidth=1, dodge = True)
_sns.boxplot(data = data, x = 'Feature_Type', y = 'Fraction_Below_Negative_Threshold', hue = 'SC35', legend = False, palette = color_palette_neg, dodge = True, whis = [0, 100], linecolor= 'k', fill = True)
_sns.stripplot(data = data, x = 'Feature_Type', y = 'Fraction_Below_Negative_Threshold', hue = 'SC35', legend = False, alpha = 1, palette = 'light:#aaff00', linewidth=1, dodge = True)
_plt.ylim(-45, 45)
_plt.yticks([-45, -30, -15, 0, 15, 30, 45])
_plt.axhline(y = 0, color = 'black', linestyle = '-', linewidth = 1)
_plt.ylabel('Fraction of Data (%)')
_plt.savefig(_ospath.join(output_folder, 'DoC_fractions.svg'), format = 'svg', bbox_inches = 'tight')
_plt.show