# Spatial Correlation Plotter _ 2

#### Note: Use epi-paint kernel. 

What does this do?
- Plots a scatter plot for all vs all combination with a colormap for DoC measure. 

In [None]:
import os as _os
import os.path as _ospath
import numpy as _np
import pandas as _pd
import h5py as _h5py
import yaml as _yaml
from PyQt5.QtWidgets import QMessageBox as _QMessageBox
import matplotlib.pyplot as _plt
import seaborn as _sns
import itertools
from tqdm import tqdm
from matplotlib.colors import LogNorm
from matplotlib.colors import LinearSegmentedColormap, to_hex

In [None]:
# Define the folder with the data and the correlation files.
folder = '' # Folder name for specific cell.
min_radius = 100
step_size = 100
max_radius = 1000

# Define the data folder and the correlation data folder
data_folder = _ospath.join(folder, 'Masked')
correlation_data_folder = _ospath.join(folder, 'Analysis', 'Correlations', str(min_radius) + '_' + str(step_size) + '_' + str(max_radius))

#Define the data files and the correlation files.
data_file_extn = '.hdf5'
correlation_file_extn = '.csv'
data_files = [f for f in _os.listdir(data_folder) if f.endswith(data_file_extn)]
correlation_files = [f for f in _os.listdir(correlation_data_folder) if f.endswith(correlation_file_extn)]
pixel_size = 130

# Define and make the output folder.capitalize
output_folder = _ospath.join(folder, 'Analysis', 'Correlations', 'Plots' + '_' + str(min_radius) + '_' + str(step_size) + '_' + str(max_radius))
if not _ospath.exists(output_folder):
    _os.makedirs(output_folder)

# Define the proteins in the data. The plotting will be in this order. 
proteins = ['S2P', 'S5P', 'SC35', 'H3K4me3', 'H3K27ac', 'CTCF', 'H3K27me3', 'H3K9me3', 'Lamin']

In [None]:
# Picasso Funtions
def load_locs(path, qt_parent=None):
    with _h5py.File(path, "r") as locs_file:
        locs = locs_file["locs"][...]
    locs = _np.rec.array(
        locs, dtype=locs.dtype
    )  # Convert to rec array with fields as attributes
    info = load_info(path, qt_parent=qt_parent)
    return locs, info

class NoMetadataFileError(FileNotFoundError):
    pass

def load_info(path, qt_parent=None):
    path_base, path_extension = _ospath.splitext(path)
    filename = path_base + ".yaml"
    try:
        with open(filename, "r") as info_file:
            info = list(_yaml.load_all(info_file, Loader=_yaml.UnsafeLoader))
    except FileNotFoundError as e:
        print("\nAn error occured. Could not find metadata file:\n{}".format(filename))
        if qt_parent is not None:
            _QMessageBox.critical(
                qt_parent,
                "An error occured",
                "Could not find metadata file:\n{}".format(filename),
            )
        raise NoMetadataFileError(e)
    return info



In [None]:
# Total locs counter for alpha normalization

total_locs = 0
max_locs = 0
for file in data_files:
    locs, info = load_locs(_ospath.join(data_folder, file))
    total_locs += locs.shape[0]
    if locs.shape[0] > max_locs:
        max_locs = locs.shape[0]

total_locs, max_locs

In [None]:
alphas = []

fig, axes = _plt.subplots(9, 9, figsize = (20,20), sharex = True, sharey = True)

for row_id, row_protein in enumerate(proteins):
    for column_id, column_protein in enumerate(proteins):
        if row_protein == column_protein:
            ax = axes[row_id, column_id]
            ax.tick_params(axis='both', which='both', bottom=False, top=False, left=False, right=False, labelbottom=False, labelleft=False)
            ax.set_aspect('equal', adjustable='box')
            if column_id == 0:
                ax.set_ylabel(row_protein)
            if row_id == len(proteins) - 1:
                ax.set_xlabel(column_protein)
            continue
        else:
            data_file_name = next((f for f in data_files if row_protein in f), None)
            if data_file_name is None:
                print(f'Data for {row_protein} not found')
                continue
            data_file_path = _ospath.join(data_folder, data_file_name)
            locs, info = load_locs(data_file_path)
            data = _np.column_stack((locs.x, locs.y))
            corr = _np.loadtxt(_ospath.join(correlation_data_folder, row_protein + '_vs_' + column_protein + correlation_file_extn), delimiter = ',')
            # Mask Data
            mask = abs(corr) > 0.7
            data = data[mask]
            corr = corr[mask]
            # Define Alpha for the scatter plot
            gamma = 0.005
            alpha = 10 * (1 - (1/(corr.shape[0]/ max_locs) ** -gamma))
            # alpha = 0.1 * (1 - (corr.shape[0]/(max_locs)))
            alphas.append(alpha)
            # Plot the data in the subplot
            ax = axes[row_id, column_id]
            ax.scatter(data[:, 0], data[:, 1], marker = '.', edgecolors = 'none', s = 1.5, alpha = alpha, c = corr, cmap = 'PuOr_r', vmin = -1, vmax = 1)
            ax.tick_params(axis='both', which='both', bottom=False, top=False, left=False, right=False, labelbottom=False, labelleft=False)
            ax.set_aspect('equal', adjustable='box')
            if column_id == 0:
                ax.set_ylabel(row_protein)
            if row_id == len(proteins) - 1:
                ax.set_xlabel(column_protein)

for ax_row in axes:
    for ax in ax_row:
        ax.invert_yaxis()
_plt.savefig(_ospath.join(output_folder, 'All_Proteins.png'), dpi = 600, bbox_inches = 'tight')
_plt.show()
    
    