# Associations between molecular and environmental changes along the proximal-to-distal axis of the colon
### Abstract
[ABSTRACT]
### About this notebook
This notebook allow to reproduce all the results and figures presented in [CITAZIONE] 

# TRANSCRIPTOME PROFILING

In [None]:
import sys
sys.path.append("../../../git/lib") # Path to the profile_analysis_class.py file
from profile_analysis_class import ProfileAnalysis # Import the profile workflow class
import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('../../assets/styles/plotting_style.mplstyle') # Path to the matplotlib style sheet
# Create workflow class, specifying the path to the SETTINGS.ini file
pa_transcriptome = ProfileAnalysis('../../../docker/analysis/transcriptome')

## Assign each sample in clinical data file to a colon section

In [None]:
pa_transcriptome.create_samples_to_sections_table()

In [None]:
pa_transcriptome.plot_sample_distribution()

## Calculate median value for each colon section

In [None]:
medians_tr, mad_tr = pa_transcriptome.median_by_section()

In [None]:
medians_tr.head()

In [None]:
mad_tr.head()

## Fit Observables

In [None]:
scores_tr, poly_obs_scores_tr, sig_obs_scores_tr, poly_models_tr, sig_models_tr = pa_transcriptome.fit_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=False)

In [None]:
scores_tr.head()

## Fit Random permutated data

In [None]:
poly_perm_scores_tr, sig_perm_scores_tr=pa_transcriptome.fit_random_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=True)

## Compare obrservable vs permutated data

In [None]:
models_pvalue_tr=pa_transcriptome.plot_gof(poly_obs_scores_tr, sig_obs_scores_tr, poly_perm_scores_tr, sig_perm_scores_tr, dist_obs=False, dist_perm=False)

In [None]:
models_pvalue_tr

## Cluster genes

In [None]:
genes_clusters_tr=pa_transcriptome.cluster_genes(scores_tr)

In [None]:
pa_transcriptome.plot_clusters(genes_clusters_tr)

In [None]:
summary_tr=pa_transcriptome.get_summary_table(genes_clusters_tr, scores_tr)

In [None]:
continuum_tr, sigmoid_tr, discarded_tr = pa_transcriptome.classify_genes(summary_tr)

## Plot distribution of inflexion points for sigmoid genes

In [None]:
gene_list_tr, section_l_tr = pa_transcriptome.strict_sig_list(sigmoid_tr, sig_models_tr, plot_dist = True)

## Plot distribution of inflexion points for random permutation

In [None]:
pa_transcriptome.strict_sig_list_random(medians_tr, mad_tr, sigmoid_tr, sig_models_tr, plot_dist = True)

# METHYLOME PROFILING

In [None]:
# Create workflow class, specifying the path to the SETTINGS.ini file
pa_methylome = ProfileAnalysis('../../../docker/analysis/methylome')

## Assign each sample in clinical data file to a colon section

In [None]:
pa_methylome.create_samples_to_sections_table()

In [None]:
pa_methylome.plot_sample_distribution()

## Calculate median value for each colon section

In [None]:
medians_tr, mad_tr = pa_methylome.median_by_section()

In [None]:
medians_tr.head()

In [None]:
mad_tr.head()

## Fit Observables

In [None]:
scores_tr, poly_obs_scores_tr, sig_obs_scores_tr, poly_models_tr, sig_models_tr = pa_methylome.fit_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=False)

In [None]:
scores_tr.head()

## Fit Random permutated data

In [None]:
poly_perm_scores_tr, sig_perm_scores_tr=pa_methylome.fit_random_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=True)

## Compare obrservable vs permutated data

In [None]:
models_pvalue_tr=pa_methylome.plot_gof(poly_obs_scores_tr, sig_obs_scores_tr, poly_perm_scores_tr, sig_perm_scores_tr, dist_obs=False, dist_perm=False)

In [None]:
models_pvalue_tr

## Cluster genes

In [None]:
genes_clusters_tr=pa_methylome.cluster_genes(scores_tr)

In [None]:
pa_methylome.plot_clusters(genes_clusters_tr)

In [None]:
summary_tr=pa_methylome.get_summary_table(genes_clusters_tr, scores_tr)

In [None]:
continuum_tr, sigmoid_tr, discarded_tr = pa_methylome.classify_genes(summary_tr)

## Plot distribution of inflexion points for sigmoid genes

In [None]:
gene_list_tr, section_l_tr = pa_methylome.strict_sig_list(sigmoid_tr, sig_models_tr, plot_dist = True)

## Plot distribution of inflexion points for random permutation

In [None]:
pa_methylome.strict_sig_list_random(medians_tr, mad_tr, sigmoid_tr, sig_models_tr, plot_dist = True)

# MUTATIONS PROFILING

In [None]:
# Create workflow class, specifying the path to the SETTINGS.ini file
pa_mutations = ProfileAnalysis('../../../docker/analysis/mutations')

## Assign each sample in clinical data file to a colon section

In [None]:
pa_mutations.create_samples_to_sections_table()

In [None]:
pa_mutations.plot_sample_distribution()

## Calculate median value for each colon section

In [None]:
medians_tr, mad_tr = pa_mutations.median_by_section()

In [None]:
medians_tr.head()

In [None]:
mad_tr.head()

## Fit Observables

In [None]:
scores_tr, poly_obs_scores_tr, sig_obs_scores_tr, poly_models_tr, sig_models_tr = pa_mutations.fit_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=False)

In [None]:
scores_tr.head()

## Fit Random permutated data

In [None]:
poly_perm_scores_tr, sig_perm_scores_tr=pa_mutations.fit_random_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=True)

## Compare obrservable vs permutated data

In [None]:
models_pvalue_tr=pa_mutations.plot_gof(poly_obs_scores_tr, sig_obs_scores_tr, poly_perm_scores_tr, sig_perm_scores_tr, dist_obs=False, dist_perm=False)

In [None]:
models_pvalue_tr

## Cluster genes

In [None]:
genes_clusters_tr=pa_mutations.cluster_genes(scores_tr)

In [None]:
pa_mutations.plot_clusters(genes_clusters_tr)

In [None]:
summary_tr=pa_mutations.get_summary_table(genes_clusters_tr, scores_tr)

In [None]:
continuum_tr, sigmoid_tr, discarded_tr = pa_mutations.classify_genes(summary_tr)

## Plot distribution of inflexion points for sigmoid genes

In [None]:
gene_list_tr, section_l_tr = pa_mutations.strict_sig_list(sigmoid_tr, sig_models_tr, plot_dist = True)

## Plot distribution of inflexion points for random permutation

In [None]:
pa_mutations.strict_sig_list_random(medians_tr, mad_tr, sigmoid_tr, sig_models_tr, plot_dist = True)

# ANEUPLOIDY PROFILING

## Duplication

In [None]:
# Create workflow class, specifying the path to the SETTINGS.ini file
pa_duplication = ProfileAnalysis('../../../../docker/analysis/aneuploidy/duplication')

## Assign each sample in clinical data file to a colon section

In [None]:
pa_duplication.create_samples_to_sections_table()

In [None]:
pa_duplication.plot_sample_distribution()

## Calculate median value for each colon section

In [None]:
medians_tr, mad_tr = pa_duplication.median_by_section()

In [None]:
medians_tr.head()

In [None]:
mad_tr.head()

## Fit Observables

In [None]:
scores_tr, poly_obs_scores_tr, sig_obs_scores_tr, poly_models_tr, sig_models_tr = pa_duplication.fit_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=False)

In [None]:
scores_tr.head()

## Fit Random permutated data

In [None]:
poly_perm_scores_tr, sig_perm_scores_tr=pa_duplication.fit_random_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=True)

## Compare obrservable vs permutated data

In [None]:
models_pvalue_tr=pa_duplication.plot_gof(poly_obs_scores_tr, sig_obs_scores_tr, poly_perm_scores_tr, sig_perm_scores_tr, dist_obs=False, dist_perm=False)

In [None]:
models_pvalue_tr

## Cluster genes

In [None]:
genes_clusters_tr=pa_duplication.cluster_genes(scores_tr)

In [None]:
pa_duplication.plot_clusters(genes_clusters_tr)

In [None]:
summary_tr=pa_duplication.get_summary_table(genes_clusters_tr, scores_tr)

In [None]:
continuum_tr, sigmoid_tr, discarded_tr = pa_duplication.classify_genes(summary_tr)

## Plot distribution of inflexion points for sigmoid genes

In [None]:
gene_list_tr, section_l_tr = pa_duplication.strict_sig_list(sigmoid_tr, sig_models_tr, plot_dist = True)

## Plot distribution of inflexion points for random permutation

In [None]:
pa_duplication.strict_sig_list_random(medians_tr, mad_tr, sigmoid_tr, sig_models_tr, plot_dist = True)

## Deletion

In [None]:
# Create workflow class, specifying the path to the SETTINGS.ini file
pa_deletion = ProfileAnalysis('../../../../docker/analysis/aneuploidy/deletion')

## Assign each sample in clinical data file to a colon section

In [None]:
pa_deletion.create_samples_to_sections_table()

In [None]:
pa_deletion.plot_sample_distribution()

## Calculate median value for each colon section

In [None]:
medians_tr, mad_tr = pa_deletion.median_by_section()

In [None]:
medians_tr.head()

In [None]:
mad_tr.head()

## Fit Observables

In [None]:
scores_tr, poly_obs_scores_tr, sig_obs_scores_tr, poly_models_tr, sig_models_tr = pa_deletion.fit_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=False)

In [None]:
scores_tr.head()

## Fit Random permutated data

In [None]:
poly_perm_scores_tr, sig_perm_scores_tr=pa_deletion.fit_random_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=True)

## Compare obrservable vs permutated data

In [None]:
models_pvalue_tr=pa_deletion.plot_gof(poly_obs_scores_tr, sig_obs_scores_tr, poly_perm_scores_tr, sig_perm_scores_tr, dist_obs=False, dist_perm=False)

In [None]:
models_pvalue_tr

## Cluster genes

In [None]:
genes_clusters_tr=pa_deletion.cluster_genes(scores_tr)

In [None]:
pa_deletion.plot_clusters(genes_clusters_tr)

In [None]:
summary_tr=pa_deletion.get_summary_table(genes_clusters_tr, scores_tr)

In [None]:
continuum_tr, sigmoid_tr, discarded_tr = pa_deletion.classify_genes(summary_tr)

## Plot distribution of inflexion points for sigmoid genes

In [None]:
gene_list_tr, section_l_tr = pa_deletion.strict_sig_list(sigmoid_tr, sig_models_tr, plot_dist = True)

## Plot distribution of inflexion points for random permutation

In [None]:
pa_deletion.strict_sig_list_random(medians_tr, mad_tr, sigmoid_tr, sig_models_tr, plot_dist = True)

# CMS profiling

In [None]:
# Create workflow class, specifying the path to the SETTINGS.ini file
pa_cms = ProfileAnalysis('../../../docker/analysis/cms')

## Assign each sample in clinical data file to a colon section

In [None]:
pa_cms.create_samples_to_sections_table()

In [None]:
pa_cms.plot_sample_distribution()

## Calculate median value for each colon section

In [None]:
medians_tr, mad_tr = pa_cms.median_by_section()

In [None]:
medians_tr.head()

In [None]:
mad_tr.head()

## Fit Observables

In [None]:
scores_tr, poly_obs_scores_tr, sig_obs_scores_tr, poly_models_tr, sig_models_tr = pa_cms.fit_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=False)

In [None]:
scores_tr.head()

## Fit Random permutated data

In [None]:
poly_perm_scores_tr, sig_perm_scores_tr=pa_cms.fit_random_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=True)

## Compare obrservable vs permutated data

In [None]:
models_pvalue_tr=pa_cms.plot_gof(poly_obs_scores_tr, sig_obs_scores_tr, poly_perm_scores_tr, sig_perm_scores_tr, dist_obs=False, dist_perm=False)

In [None]:
models_pvalue_tr

## Cluster genes

In [None]:
genes_clusters_tr=pa_cms.cluster_genes(scores_tr)

In [None]:
pa_cms.plot_clusters(genes_clusters_tr)

In [None]:
summary_tr=pa_cms.get_summary_table(genes_clusters_tr, scores_tr)

In [None]:
continuum_tr, sigmoid_tr, discarded_tr = pa_cms.classify_genes(summary_tr)

## Plot distribution of inflexion points for sigmoid genes

In [None]:
gene_list_tr, section_l_tr = pa_cms.strict_sig_list(sigmoid_tr, sig_models_tr, plot_dist = True)

## Plot distribution of inflexion points for random permutation

In [None]:
pa_cms.strict_sig_list_random(medians_tr, mad_tr, sigmoid_tr, sig_models_tr, plot_dist = True)

# MOLECULAR FEATURES PROFILING

In [None]:
# Create workflow class, specifying the path to the SETTINGS.ini file
pa_mf = ProfileAnalysis('../../../docker/analysis/molecular_features')

## Assign each sample in clinical data file to a colon section

In [None]:
pa_mf.create_samples_to_sections_table()

In [None]:
pa_mf.plot_sample_distribution()

## Calculate median value for each colon section

In [None]:
medians_tr, mad_tr = pa_mf.median_by_section()

In [None]:
medians_tr.head()

In [None]:
mad_tr.head()

## Fit Observables

In [None]:
scores_tr, poly_obs_scores_tr, sig_obs_scores_tr, poly_models_tr, sig_models_tr = pa_mf.fit_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=False)

In [None]:
scores_tr.head()

## Fit Random permutated data

In [None]:
poly_perm_scores_tr, sig_perm_scores_tr=pa_mf.fit_random_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=True)

## Compare obrservable vs permutated data

In [None]:
models_pvalue_tr=pa_mf.plot_gof(poly_obs_scores_tr, sig_obs_scores_tr, poly_perm_scores_tr, sig_perm_scores_tr, dist_obs=False, dist_perm=False)

In [None]:
models_pvalue_tr

## Cluster genes

In [None]:
genes_clusters_tr=pa_mf.cluster_genes(scores_tr)

In [None]:
pa_mf.plot_clusters(genes_clusters_tr)

In [None]:
summary_tr=pa_mf.get_summary_table(genes_clusters_tr, scores_tr)

In [None]:
continuum_tr, sigmoid_tr, discarded_tr = pa_mf.classify_genes(summary_tr)

## Plot distribution of inflexion points for sigmoid genes

In [None]:
gene_list_tr, section_l_tr = pa_mf.strict_sig_list(sigmoid_tr, sig_models_tr, plot_dist = True)

## Plot distribution of inflexion points for random permutation

In [None]:
pa_mf.strict_sig_list_random(medians_tr, mad_tr, sigmoid_tr, sig_models_tr, plot_dist = True)

# SIGNATURES PROFILING

In [None]:
# Create workflow class, specifying the path to the SETTINGS.ini file
pa_sig = ProfileAnalysis('../../../docker/analysis/signatures')

## Assign each sample in clinical data file to a colon section

In [None]:
pa_sig.create_samples_to_sections_table()

In [None]:
pa_sig.plot_sample_distribution()

## Calculate median value for each colon section

In [None]:
medians_tr, mad_tr = pa_sig.median_by_section()

In [None]:
medians_tr.head()

In [None]:
mad_tr.head()

## Fit Observables

In [None]:
scores_tr, poly_obs_scores_tr, sig_obs_scores_tr, poly_models_tr, sig_models_tr = pa_sig.fit_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=False)

In [None]:
scores_tr.head()

## Fit Random permutated data

In [None]:
poly_perm_scores_tr, sig_perm_scores_tr=pa_sig.fit_random_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=True)

## Compare obrservable vs permutated data

In [None]:
models_pvalue_tr=pa_sig.plot_gof(poly_obs_scores_tr, sig_obs_scores_tr, poly_perm_scores_tr, sig_perm_scores_tr, dist_obs=False, dist_perm=False)

In [None]:
models_pvalue_tr

## Cluster genes

In [None]:
genes_clusters_tr=pa_sig.cluster_genes(scores_tr)

In [None]:
pa_sig.plot_clusters(genes_clusters_tr)

In [None]:
summary_tr=pa_sig.get_summary_table(genes_clusters_tr, scores_tr)

In [None]:
continuum_tr, sigmoid_tr, discarded_tr = pa_sig.classify_genes(summary_tr)

## Plot distribution of inflexion points for sigmoid genes

In [None]:
gene_list_tr, section_l_tr = pa_sig.strict_sig_list(sigmoid_tr, sig_models_tr, plot_dist = True)

## Plot distribution of inflexion points for random permutation

In [None]:
pa_sig.strict_sig_list_random(medians_tr, mad_tr, sigmoid_tr, sig_models_tr, plot_dist = True)

# MICROBIOME PROFILING

In [None]:
# Create workflow class, specifying the path to the SETTINGS.ini file
pa_mic = ProfileAnalysis('../../../docker/analysis/microbiome')

## Assign each sample in clinical data file to a colon section

In [None]:
pa_mic.create_samples_to_sections_table()

In [None]:
pa_mic.plot_sample_distribution()

## Calculate median value for each colon section

In [None]:
medians_tr, mad_tr = pa_mic.median_by_section()

In [None]:
medians_tr.head()

In [None]:
mad_tr.head()

## Fit Observables

In [None]:
scores_tr, poly_obs_scores_tr, sig_obs_scores_tr, poly_models_tr, sig_models_tr = pa_mic.fit_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=False)

In [None]:
scores_tr.head()

## Fit Random permutated data

In [None]:
poly_perm_scores_tr, sig_perm_scores_tr=pa_mic.fit_random_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=True)

## Compare obrservable vs permutated data

In [None]:
models_pvalue_tr=pa_mic.plot_gof(poly_obs_scores_tr, sig_obs_scores_tr, poly_perm_scores_tr, sig_perm_scores_tr, dist_obs=False, dist_perm=False)

In [None]:
models_pvalue_tr

## Cluster genes

In [None]:
genes_clusters_tr=pa_mic.cluster_genes(scores_tr)

In [None]:
pa_mic.plot_clusters(genes_clusters_tr)

In [None]:
summary_tr=pa_mic.get_summary_table(genes_clusters_tr, scores_tr)

In [None]:
continuum_tr, sigmoid_tr, discarded_tr = pa_mic.classify_genes(summary_tr)

## Plot distribution of inflexion points for sigmoid genes

In [None]:
gene_list_tr, section_l_tr = pa_mic.strict_sig_list(sigmoid_tr, sig_models_tr, plot_dist = True)

## Plot distribution of inflexion points for random permutation

In [None]:
pa_mic.strict_sig_list_random(medians_tr, mad_tr, sigmoid_tr, sig_models_tr, plot_dist = True)

# IMMUNE CELLS PROFILING

In [None]:
# Create workflow class, specifying the path to the SETTINGS.ini file
pa_imm = ProfileAnalysis('../../../docker/analysis/immune_cells')

## Assign each sample in clinical data file to a colon section

In [None]:
pa_imm.create_samples_to_sections_table()

In [None]:
pa_imm.plot_sample_distribution()

## Calculate median value for each colon section

In [None]:
medians_tr, mad_tr = pa_imm.median_by_section()

In [None]:
medians_tr.head()

In [None]:
mad_tr.head()

## Fit Observables

In [None]:
scores_tr, poly_obs_scores_tr, sig_obs_scores_tr, poly_models_tr, sig_models_tr = pa_imm.fit_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=False)

In [None]:
scores_tr.head()

## Fit Random permutated data

In [None]:
poly_perm_scores_tr, sig_perm_scores_tr=pa_imm.fit_random_data(medians_tr, mad_tr, guess_bounds = True, dog_allowed=True)

## Compare obrservable vs permutated data

In [None]:
models_pvalue_tr=pa_imm.plot_gof(poly_obs_scores_tr, sig_obs_scores_tr, poly_perm_scores_tr, sig_perm_scores_tr, dist_obs=False, dist_perm=False)

In [None]:
models_pvalue_tr

## Cluster genes

In [None]:
genes_clusters_tr=pa_imm.cluster_genes(scores_tr)

In [None]:
pa_imm.plot_clusters(genes_clusters_tr)

In [None]:
summary_tr=pa_imm.get_summary_table(genes_clusters_tr, scores_tr)

In [None]:
continuum_tr, sigmoid_tr, discarded_tr = pa_imm.classify_genes(summary_tr)

## Plot distribution of inflexion points for sigmoid genes

In [None]:
gene_list_tr, section_l_tr = pa_imm.strict_sig_list(sigmoid_tr, sig_models_tr, plot_dist = True)

## Plot distribution of inflexion points for random permutation

In [None]:
pa_imm.strict_sig_list_random(medians_tr, mad_tr, sigmoid_tr, sig_models_tr, plot_dist = True)