In [None]:
import os
import pandas as pd
import seaborn as sns
import numpy as np
import networkx as nx
from shutil import copyfile


In [None]:
bigscape_dir = '/home/omkar/Projects/panGenome/data/entero_project/bigscape_as4_no_hybrids/network_files/2019-05-08_14-19-17_glocal_hybrids_off_multi_cutoff/'
cluster_class = 'PKS-NRP_Hybrids'
family_id = 2
bigscape_input_data = '/home/omkar/Projects/panGenome/data/entero_project/bigscape_specific_inputs/subfamilies/'
cluster_from_dir = '/home/omkar/Projects/panGenome/data/entero_project/genomes/'
df_sub_fam_bigscape = get_biscape_fam(cluster_class, bigscape_dir)
all_fam_networks = pd.read_pickle('/home/omkar/Projects/panGenome/data/entero_project/general/family_graphs.p')
fam_network = all_fam_networks[family_id]
select_clusters_dict = select_clusters(df_sub_fam_bigscape, fam_network)
create_bigscape_data(select_clusters_dict, bigscape_input_data, cluster_class, family_id, cluster_from_dir)

python /home/omkar/Projects/packages/BiG-SCAPE/bigscape.py -i '/home/omkar/Projects/panGenome/data/entero_project/bigscape_specific_inputs/subfamilies/' -o '/home/omkar/Projects/panGenome/data/entero_project/bigscape_as4_subfamilies/' --hybrids-off --include_singletons --cutoffs 0.3 0.5 0.7

In [None]:
def get_biscape_fam(cluster_class, bigscape_dir):
    post_path = cluster_class + '_clustering_c0.30.tsv'
    fam_path = os.path.join(bigscape_dir, cluster_class, post_path)
    df_sub_fam_bigscape = pd.read_table(fam_path, sep='\t')

    df_sub_fam_bigscape.set_index('#BGC Name',inplace=True)
    
    return(df_sub_fam_bigscape)


def select_clusters(df_sub_fam_bigscape, fam_network):
    'Select a cluster from each of the BiGSCAPE annotated families of clusters'
    input_clusters = list(fam_network.nodes)
    df_input_fam = df_sub_fam_bigscape.loc[input_clusters,:]
    
    list_sub_families = df_input_fam['Family Number'].unique()
    
    select_clusters_dict = dict()
    
    for sub_fam_id in list_sub_families:
        sub_fam_nodes = df_input_fam[df_input_fam['Family Number'] == sub_fam_id].index.tolist()
        sub_fam_nodes = [node for node in sub_fam_nodes if 'BGC' not in node]
        sub_fam_network = fam_network.subgraph(sub_fam_nodes)
        
        centrality = nx.algorithms.degree_centrality(sub_fam_network)
        sorted_x = sorted(centrality.items(), key=lambda kv: kv[1], reverse=True)
        sorted_dict = collections.OrderedDict(sorted_x)
        top_cluster = list(sorted_dict.keys())[0]
        select_clusters_dict[sub_fam_id] = top_cluster
        
    return select_clusters_dict

def create_bigscape_data(select_clusters_dict, bigscape_input_data, cluster_class, family_id, cluster_from_dir):
    'Move cluster files to bigscape input data'
    class_dir = os.path.join(bigscape_input_data, cluster_class)
    if not os.path.isdir(class_dir):
        os.mkdir(class_dir)
    family_dir = os.path.join(class_dir, 'family_' + str(family_id))
    if not os.path.isdir(family_dir):
        os.mkdir(family_dir)
    
    for cluster in select_clusters_dict.values():
        genome = cluster.split('.cluster')[0]
        file_from = os.path.join(cluster_from_dir, genome, 'as4', cluster + '.gbk')
        file_to = os.path.join(family_dir, cluster + '.gbk')
        copyfile(file_from, file_to)
        
def get_adj_mat(G, df_sub_fam_bigscape):
    node_colors = []
    cluster_genus_list = []
    cluster_genome_list = []
    
    for node in G.nodes:
        if 'BGC' in node:
            node_colors.append((0,0,0))
            cluster_genus_list.append('MIBIG')
            cluster_genome_list.append('MIBIG')
        else:
            node_colors.append(cluster_color[node])                       
            cluster_genus_list.append(cluster_genus[node])
            cluster_genome_list.append(cluster_genome[node])

    # Plot adjacency matrix
    plt.figure()
    df_g = nx.to_pandas_adjacency(G)
    r = sns.clustermap(df_g, figsize=(20,20), cmap='BuPu', row_colors=node_colors, col_colors=node_colors, xticklabels=False, yticklabels=False)
    for label in genus_list:
        r.ax_col_dendrogram.bar(0, 0, color=my_palette[label],
                            label=label, linewidth=0)
    r.ax_col_dendrogram.legend(loc=(1.1,-4), ncol=1)