# Roary
Summary of [Roary](https://github.com/sanger-pathogens/Roary) results from project: `[{{ project().name }}]` 

## Description
Rapid large-scale prokaryote pan genome analysis.

In [None]:
import pandas as pd
import shutil
from pathlib import Path
from IPython.display import display, Markdown, HTML
from itables import to_html_datatable as DT
import itables.options as opt
opt.css = """
.itables table td { font-style: italic; font-size: .8em;}
.itables table th { font-style: oblique; font-size: .8em; }
"""
opt.classes = ["display", "compact"]
opt.lengthMenu = [5, 10, 20, 50, 100, 200, 500]
opt.maxBytes = 100000

import warnings
warnings.filterwarnings('ignore')

In [None]:
report_dir = Path("../").resolve()

In [None]:
# Define the source and target paths
source_path = report_dir / "roary"
target_path = Path('assets/data/roary')

# Ensure that the source path and the target path are different
assert source_path != target_path, "The source path and the target path are the same."

# Create the parent directories if they do not exist
target_path.parent.mkdir(parents=True, exist_ok=True)

# Remove the existing symbolic link if it exists
if target_path.is_symlink():
    target_path.unlink()

# Create the symbolic link
target_path.symlink_to(source_path)

In [None]:
# List of files to be copied
targets = ["assets/data/roary/gene_frequency.jpeg", 
           "assets/data/roary/pan_core_curve.jpeg", 
           "assets/data/roary/pangenome_pie.jpeg", 
           "assets/data/roary/phylo_presence_heatmap.jpeg"]

# Destination directory
destination = Path("assets/figures/roary")
destination.mkdir(parents=True, exist_ok=True)

# Copy each file to the destination directory
for file in targets:
    shutil.copy(file, destination)

## Pangenome Statistics
|  |  |
|:-|:-|
| <img src="../assets/data/roary/conserved_vs_total_genes.png"  width="400"> | <img src="../assets/figures/roary/gene_frequency.jpeg"  width="500" height="100%"> |
| <img src="../assets/data/roary/new_unique_curve.jpeg"  width="500" height="100%"> | <img src="../assets/figures/roary/pan_core_curve.jpeg"  width="500" height="100%"> |

## Pangenome Distribution
<p align="center">
    <img src="../assets/figures/roary/pangenome_pie.jpeg"  width="70%" height="70%">
</p>

In [None]:
display(HTML(DT(pd.read_csv(report_dir / "roary/summary_statistics.txt", sep="\t", header=None).rename(columns={0:'Pangenome Category', 1:'Description', 2:'Counts'}))))

## Pangene Distribution across phylogenetic tree

<img src="../assets/figures/roary/phylo_presence_heatmap.jpeg"  width="100%" height="100%">

## Pangene Tables

In [None]:
df_pangene = pd.read_csv(report_dir / "roary/df_pangene_summary.csv")
df_pangene_core = df_pangene[df_pangene.pangenome_class == 'core']
df_pangene_shell = df_pangene[df_pangene.pangenome_class == 'shell']

### Core Genes

In [None]:
display(HTML(DT(df_pangene_core, columnDefs=[{"className": "dt-center", "targets": "_all"}], scrollX = True)))

### Shell Genes

In [None]:
display(HTML(DT(df_pangene_shell, columnDefs=[{"className": "dt-center", "targets": "_all"}], scrollX = True)))

## References

<font size="2">

{% for i in project().rule_used['roary']['references'] %}
- *{{ i }}*
{% endfor %}

</font>