## Creating a custom panel for use in small sequencers for LungCancerMutations

### Strategy: Look for mutational hotspots for lung cancer in the COSMIC Database

In [None]:
# some sensible settings for better output
import os
import pandas as pd
from IPython.display import display
pd.set_option('display.max_columns', None)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('max_colwidth', 200)


# get the code
import sys
sys.path.append('../code')
from script_utils import show_output

# paths
static_path = os.path.join(os.environ['STATIC'], "annotation/clinical")
local_path = os.path.join(os.environ['WORK'], "LO/Sequencing/LungCustomPanel/output")

## putting it all together

In [None]:
from cosmic_panel import cosmic_panel_master
from cosmic_panel import analyze_genes
# load exonic annotations
cosmic_exon_path = os.path.join(static_path, "cosmic_exon.csv")
cosmic_exon_df = pd.read_csv(cosmic_exon_path, sep="\t", compression="gzip")

In [None]:
# filter settings
custom_filter = dict(
    exonic_list = ['exonic', 'UTR3', 'UTR5', 'UTR5;UTR3', 'exonic;splicing'],
    mut_list = ['nonsynonymous SNV', 'stopgain', 'startloss', 'stoploss', 'frameshift deletion', 'nonframeshift deletion'],
    gnomad_max=1e-2,
    cosmic_rolling_min=500,
    rolling_window_size=5,
    cosmic_min = 3500,
    cosmic_density_min = 150,
    padding=75
)

clinscore_file = "../configs/clinscoreLung.yaml"

cosmic_muts, gene_df, _, cosmic_scored = cosmic_panel_master(cosmic_exon_df,
                                              cosmic_weights_file=clinscore_file, 
                                              filter_setting=custom_filter,
                                                threads=8,
                                              verbose=1
                                             )

### reruns can be performed without re-computing the cosmic scores
+ just use the last output from previous run and remove the cosmic_weights file

In [None]:
cosmic_scored.query("Gene == 'BRCA2'").sort_values('cosmic_score', ascending=False)

In [None]:
custom_filter = dict(
    exonic_list = ['exonic', 'UTR3', 'UTR5', 'UTR5;UTR3', 'exonic;splicing'],
    mut_list = ['nonsynonymous SNV', 'stopgain', 'startloss', 'stoploss', 'frameshift deletion', 'nonframeshift deletion'],
    gnomad_max=1e-2,
    cosmic_rolling_min=1000,
    rolling_window_size=5,
    cosmic_min = 3300,
    cosmic_density_min = 250,
    padding=75
)

cosmic_muts, gene_df, group_df, cosmic_scored = cosmic_panel_master(cosmic_scored, cosmic_weights_file="", filter_setting=custom_filter, verbose=1)

## analyse for the top genes and inclusion of panel genes

### look for the genes with highest accumulative clinscore in all cosmic

### including gene information

In [None]:
genes_excel_file = "/Users/martinszyska/Desktop/GeneList.xlsx"
save_excel = os.path.join(local_path, "panel_design.xlsx")


In [None]:
in_panel, cosmic_not_included, list_not_included = analyze_genes(cosmic_muts, gene_df, group_df, cosmic_scored, panel_excel=genes_excel_file, save_excel=save_excel)

In [None]:
genelist_not_included