# Venn diagram and summary

In [None]:
import numpy as np
import pandas as pd
from venn import venn
from matplotlib import pyplot as plt

## Prepare data

In [None]:
def limiting_features(set_dict, f1, f2):
    xx = len(set_dict[f1] & set_dict[f2]) / len(set_dict[f2]) * 100
    print("Comparing %s with %s: %0.2f%%" % (f1, f2, xx))
    print("Features in common: %d" % len(set_dict[f1] & set_dict[f2]))

### Load PGC3 GWAS

In [None]:
pgc3_file = '/ceph/projects/v4_phase3_paper/inputs/sz_gwas/'+\
           'pgc2_clozuk/map_phase3/_m/libd_hg38_pgc2sz_snps.tsv'
pgc3_df = pd.read_csv(pgc3_file, sep='\t', low_memory=False, index_col=0)

### With no MHC

#### Genes

In [None]:
genes = pd.read_csv('/ceph/projects/v4_phase3_paper/analysis/twas_ea/'+\
                    'gene_weights/fusion/summary_stats/_m/fusion_associations_noMHC.txt', sep='\t')
annot = pd.read_csv('../../../differential_expression/_m/genes/diffExpr_szVctl_full.txt', sep='\t')
genes = annot[['ensemblID']].merge(genes, left_on='ensemblID', right_on='FILE')
genes = genes[['FILE', 'ensemblID', 'ID', 'HSQ', 'BEST.GWAS.ID', 'EQTL.ID', 
               'TWAS.Z', 'TWAS.P', 'FDR', 'Bonferroni']]
genes['Type'] = 'Gene'
genes.rename(columns={'FILE': 'Feature'}, inplace=True)
genes.sort_values('TWAS.P').head(2)

#### Transcripts

In [None]:
trans = pd.read_csv('/ceph/projects/v4_phase3_paper/analysis/twas_ea/'+\
                    'transcript_weights/fusion/summary_stats/_m/fusion_associations_noMHC.txt', sep='\t')
annot = pd.read_csv('../../../differential_expression/_m/transcripts/diffExpr_szVctl_full.txt', sep='\t')
annot['ensemblID'] = annot.gene_id.str.replace('\\..*', '', regex=True)
annot['FILE'] = annot.transcript_id.str.replace('\\..*', '', regex=True)
trans = annot[['ensemblID', 'FILE']].merge(trans, on='FILE')
trans = trans[['FILE', 'ensemblID', 'ID', 'HSQ', 'BEST.GWAS.ID', 'EQTL.ID', 
               'TWAS.Z', 'TWAS.P', 'FDR', 'Bonferroni']]
trans['Type'] = 'Transcript'
trans.rename(columns={'FILE': 'Feature'}, inplace=True)
trans.sort_values('TWAS.P').head(2)

#### Exons

In [None]:
exons = pd.read_csv('/ceph/projects/v4_phase3_paper/analysis/twas_ea/'+\
                    'exon_weights/fusion/summary_stats/_m/fusion_associations_noMHC.txt', sep='\t')
annot = pd.read_csv('../../../differential_expression/_m/exons/diffExpr_szVctl_full.txt', sep='\t', index_col=0)
exons = annot[['ensemblID']].merge(exons, left_index=True, right_on='FILE')
exons = exons[['FILE', 'ensemblID', 'ID', 'HSQ', 'BEST.GWAS.ID', 'EQTL.ID', 
               'TWAS.Z', 'TWAS.P', 'FDR', 'Bonferroni']]
exons['Type'] = 'Exon'
exons.rename(columns={'FILE': 'Feature'}, inplace=True)
exons.sort_values('TWAS.P').head(2)

### Junctions

In [None]:
annot = pd.read_csv('jxn_annotation.tsv', sep='\t', index_col=1)
annot["gene_id"] = annot.index
juncs = pd.read_csv('/ceph/projects/v4_phase3_paper/analysis/twas_ea/'+\
                    'junction_weights/fusion/summary_stats/_m/fusion_associations_noMHC.txt', sep='\t')
juncs = pd.merge(annot, juncs, left_on='JxnID', right_on='FILE')
juncs = juncs[['gene_id', 'ensemblID', 'ID', 'HSQ', 'BEST.GWAS.ID', 'EQTL.ID', 
               'TWAS.Z', 'TWAS.P', 'FDR', 'Bonferroni']]
juncs['Type'] = 'Junction'
juncs.rename(columns={'Symbol': 'ID', 'gene_id': 'Feature'}, inplace=True)
juncs.sort_values('TWAS.P').head(2)

## Heritable features

### Feature summary

In [None]:
gg = len(set(genes['Feature']))
tt = len(set(trans['Feature']))
ee = len(set(exons['Feature']))
jj = len(set(juncs['Feature']))

print("===Unique Features===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

gg = len(set(genes['ensemblID']))
tt = len(set(trans['ensemblID']))
ee = len(set(exons['ensemblID']))
jj = len(set(juncs['ensemblID']))

print("===Unique Ensembl Gene===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

gg = len(set(genes['ID']))
tt = len(set(trans['ID']))
ee = len(set(exons['ID']))
jj = len(set(juncs['ID']))

print("===Unique Gene Name===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

### Plot venn

In [None]:
features = {
    'Genes': set(genes['ensemblID']),
    'Transcripts': set(trans['ensemblID']),
    'Exons': set(exons['ensemblID']),
    'Junctions': set(juncs['ensemblID']),
}

In [None]:
venn(features, fmt="{size}\n{percentage:0.1f}%", fontsize=18, legend_loc="best", 
     figsize=(12, 12), cmap=['red', 'green', 'blue', 'purple'])
plt.savefig('heritable_allFeatures_venn_diagram_percentage.png')
plt.savefig('heritable_allFeatures_venn_diagram_percentage.pdf')
plt.savefig('heritable_allFeatures_venn_diagram_percentage.svg')
plt.show()

In [None]:
limiting_features(features, 'Genes', 'Transcripts')
limiting_features(features, 'Genes', 'Junctions')
limiting_features(features, 'Exons', 'Genes')

In [None]:
limiting_features(features, 'Transcripts', 'Junctions')
limiting_features(features, 'Exons', 'Transcripts')
limiting_features(features, 'Exons', 'Junctions')

In [None]:
len(features['Genes'] & features['Transcripts'] & features['Exons'] & features['Junctions'])

In [None]:
len(features['Genes'] | features['Transcripts'] | features['Exons'] | features['Junctions'])

### SNPs not in significant PGC2+CLOZUK GWAS

In [None]:
new_genes = pd.merge(genes, pgc3_df, left_on='BEST.GWAS.ID', right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])
new_trans = pd.merge(trans, pgc3_df, left_on='BEST.GWAS.ID', right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])
new_exons = pd.merge(exons, pgc3_df, left_on='BEST.GWAS.ID', right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])
new_juncs = pd.merge(juncs, pgc3_df, left_on='BEST.GWAS.ID', right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])

new_genes = new_genes[(new_genes['P'] > 5e-8)].copy()
new_trans = new_trans[(new_trans['P'] > 5e-8)].copy()
new_exons = new_exons[(new_exons['P'] > 5e-8)].copy()
new_juncs = new_juncs[(new_juncs['P'] > 5e-8)].copy()

In [None]:
gg = len(set(new_genes['BEST.GWAS.ID']))
tt = len(set(new_trans['BEST.GWAS.ID']))
ee = len(set(new_exons['BEST.GWAS.ID']))
jj = len(set(new_juncs['BEST.GWAS.ID']))

print("===Unique novel SNPs===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

In [None]:
len(set(new_genes['BEST.GWAS.ID']) | set(new_trans['BEST.GWAS.ID']) | 
    set(new_exons['BEST.GWAS.ID']) | set(new_juncs['BEST.GWAS.ID']))

## TWAS P-value < 0.05

### Feature summary

In [None]:
gg = len(set(genes[(genes['TWAS.P'] <= 0.05)].loc[:, 'Feature']))
tt = len(set(trans[(trans['TWAS.P'] <= 0.05)].loc[:, 'Feature']))
ee = len(set(exons[(exons['TWAS.P'] <= 0.05)].loc[:, 'Feature']))
jj = len(set(juncs[(juncs['TWAS.P'] <= 0.05)].loc[:, 'Feature']))

print("===Unique Features===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

gg = len(set(genes[(genes['TWAS.P'] <= 0.05)].loc[:, 'ensemblID']))
tt = len(set(trans[(trans['TWAS.P'] <= 0.05)].loc[:, 'ensemblID']))
ee = len(set(exons[(exons['TWAS.P'] <= 0.05)].loc[:, 'ensemblID']))
jj = len(set(juncs[(juncs['TWAS.P'] <= 0.05)].loc[:, 'ensemblID']))

print("===Unique Ensembl Gene===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

gg = len(set(genes[(genes['TWAS.P'] <= 0.05)].loc[:, 'ID']))
tt = len(set(trans[(trans['TWAS.P'] <= 0.05)].loc[:, 'ID']))
ee = len(set(exons[(exons['TWAS.P'] <= 0.05)].loc[:, 'ID']))
jj = len(set(juncs[(juncs['TWAS.P'] <= 0.05)].loc[:, 'ID']))

print("===Unique Gene Names===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

### Plot venn

In [None]:
features = {
    'Genes': set(genes[(genes['TWAS.P'] <= 0.05)].loc[:, 'ensemblID']),
    'Transcripts': set(trans[(trans['TWAS.P'] <= 0.05)].loc[:, 'ensemblID']),
    'Exons': set(exons[(exons['TWAS.P'] <= 0.05)].loc[:, 'ensemblID']),
    'Junctions': set(juncs[(juncs['TWAS.P'] <= 0.05)].loc[:, 'ensemblID']),
}

In [None]:
venn(features, fmt="{size}\n{percentage:0.1f}%", fontsize=18, legend_loc="best", 
     figsize=(12, 12), cmap=['red', 'green', 'blue', 'purple'])
plt.savefig('sigPval_allFeatures_venn_diagram_percentage.png')
plt.savefig('sigPval_allFeatures_venn_diagram_percentage.pdf')
plt.savefig('sigPval_allFeatures_venn_diagram_percentage.svg')
plt.show()

In [None]:
limiting_features(features, 'Genes', 'Transcripts')
limiting_features(features, 'Genes', 'Junctions')
limiting_features(features, 'Exons', 'Genes')

In [None]:
limiting_features(features, 'Transcripts', 'Junctions')
limiting_features(features, 'Exons', 'Transcripts')
limiting_features(features, 'Exons', 'Junctions')

In [None]:
len(features['Genes'] & features['Transcripts'] & features['Exons'] & features['Junctions'])

In [None]:
len(features['Genes'] | features['Transcripts'] | features['Exons'] | features['Junctions'])

### SNPs not in significant PGC2+CLOZUK GWAS

In [None]:
new_genes = pd.merge(genes[(genes['TWAS.P'] <= 0.05)], pgc3_df, left_on='BEST.GWAS.ID', 
                     right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])
new_trans = pd.merge(trans[(trans['TWAS.P'] <= 0.05)], pgc3_df, left_on='BEST.GWAS.ID', 
                     right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])
new_exons = pd.merge(exons[(exons['TWAS.P'] <= 0.05)], pgc3_df, left_on='BEST.GWAS.ID', 
                     right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])
new_juncs = pd.merge(juncs[(juncs['TWAS.P'] <= 0.05)], pgc3_df, left_on='BEST.GWAS.ID', 
                     right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])

new_genes = new_genes[(new_genes['P'] > 5e-8)].copy()
new_trans = new_trans[(new_trans['P'] > 5e-8)].copy()
new_exons = new_exons[(new_exons['P'] > 5e-8)].copy()
new_juncs = new_juncs[(new_juncs['P'] > 5e-8)].copy()

In [None]:
gg = len(set(new_genes['BEST.GWAS.ID']))
tt = len(set(new_trans['BEST.GWAS.ID']))
ee = len(set(new_exons['BEST.GWAS.ID']))
jj = len(set(new_juncs['BEST.GWAS.ID']))

print("===Unique novel SNPs===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

In [None]:
len(set(new_genes['BEST.GWAS.ID']) | set(new_trans['BEST.GWAS.ID']) | 
    set(new_exons['BEST.GWAS.ID']) | set(new_juncs['BEST.GWAS.ID']))

## TWAS FDR < 0.05

### Feature summary

In [None]:
gg = len(set(genes[(genes['FDR'] <= 0.05)].loc[:, 'Feature']))
tt = len(set(trans[(trans['FDR'] <= 0.05)].loc[:, 'Feature']))
ee = len(set(exons[(exons['FDR'] <= 0.05)].loc[:, 'Feature']))
jj = len(set(juncs[(juncs['FDR'] <= 0.05)].loc[:, 'Feature']))

print("===Unique Features===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

gg = len(set(genes[(genes['FDR'] <= 0.05)].loc[:, 'ensemblID']))
tt = len(set(trans[(trans['FDR'] <= 0.05)].loc[:, 'ensemblID']))
ee = len(set(exons[(exons['FDR'] <= 0.05)].loc[:, 'ensemblID']))
jj = len(set(juncs[(juncs['FDR'] <= 0.05)].loc[:, 'ensemblID']))

print("===Unique Ensembl Gene===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

gg = len(set(genes[(genes['FDR'] <= 0.05)].loc[:, 'ID']))
tt = len(set(trans[(trans['FDR'] <= 0.05)].loc[:, 'ID']))
ee = len(set(exons[(exons['FDR'] <= 0.05)].loc[:, 'ID']))
jj = len(set(juncs[(juncs['FDR'] <= 0.05)].loc[:, 'ID']))

print("===Unique Gene Name===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

### Plot venn

In [None]:
features = {
    'Genes': set(genes[(genes['FDR'] <= 0.05)].loc[:, 'ensemblID']),
    'Transcripts': set(trans[(trans['FDR'] <= 0.05)].loc[:, 'ensemblID']),
    'Exons': set(exons[(exons['FDR'] <= 0.05)].loc[:, 'ensemblID']),
    'Junctions': set(juncs[(juncs['FDR'] <= 0.05)].loc[:, 'ensemblID']),
}

In [None]:
venn(features, fmt="{size}\n{percentage:0.1f}%", fontsize=18, legend_loc="best", 
     figsize=(12, 12), cmap=['red', 'green', 'blue', 'purple'])
plt.savefig('fdr_allFeatures_venn_diagram_percentage.png')
plt.savefig('fdr_allFeatures_venn_diagram_percentage.pdf')
plt.savefig('fdr_allFeatures_venn_diagram_percentage.svg')
plt.show()

In [None]:
limiting_features(features, 'Genes', 'Transcripts')
limiting_features(features, 'Genes', 'Junctions')
limiting_features(features, 'Exons', 'Genes')

In [None]:
limiting_features(features, 'Transcripts', 'Junctions')
limiting_features(features, 'Exons', 'Transcripts')
limiting_features(features, 'Exons', 'Junctions')

In [None]:
len(features['Genes'] & features['Transcripts'] & features['Exons'] & features['Junctions'])

In [None]:
len(features['Genes'] | features['Transcripts'] | features['Exons'] | features['Junctions'])

### SNPs not in significant PGC2+CLOZUK GWAS

In [None]:
new_genes = pd.merge(genes[(genes['FDR'] <= 0.05)], pgc3_df, left_on='BEST.GWAS.ID', 
                     right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])
new_trans = pd.merge(trans[(trans['FDR'] <= 0.05)], pgc3_df, left_on='BEST.GWAS.ID', 
                     right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])
new_exons = pd.merge(exons[(exons['FDR'] <= 0.05)], pgc3_df, left_on='BEST.GWAS.ID', 
                     right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])
new_juncs = pd.merge(juncs[(juncs['FDR'] <= 0.05)], pgc3_df, left_on='BEST.GWAS.ID', 
                     right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])

new_genes = new_genes[(new_genes['P'] > 5e-8)].copy()
new_trans = new_trans[(new_trans['P'] > 5e-8)].copy()
new_exons = new_exons[(new_exons['P'] > 5e-8)].copy()
new_juncs = new_juncs[(new_juncs['P'] > 5e-8)].copy()

In [None]:
gg = len(set(new_genes['BEST.GWAS.ID']))
tt = len(set(new_trans['BEST.GWAS.ID']))
ee = len(set(new_exons['BEST.GWAS.ID']))
jj = len(set(new_juncs['BEST.GWAS.ID']))

print("===Unique novel SNPs===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

In [None]:
len(set(new_genes['BEST.GWAS.ID']) | set(new_trans['BEST.GWAS.ID']) | 
    set(new_exons['BEST.GWAS.ID']) | set(new_juncs['BEST.GWAS.ID']))

## TWAS Bonferroni < 0.05

### Feature summary

In [None]:
gg = len(set(genes[(genes['Bonferroni'] <= 0.05)].loc[:, 'Feature']))
tt = len(set(trans[(trans['Bonferroni'] <= 0.05)].loc[:, 'Feature']))
ee = len(set(exons[(exons['Bonferroni'] <= 0.05)].loc[:, 'Feature']))
jj = len(set(juncs[(juncs['Bonferroni'] <= 0.05)].loc[:, 'Feature']))

print("===Unique Features===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

gg = len(set(genes[(genes['Bonferroni'] <= 0.05)].loc[:, 'ensemblID']))
tt = len(set(trans[(trans['Bonferroni'] <= 0.05)].loc[:, 'ensemblID']))
ee = len(set(exons[(exons['Bonferroni'] <= 0.05)].loc[:, 'ensemblID']))
jj = len(set(juncs[(juncs['Bonferroni'] <= 0.05)].loc[:, 'ensemblID']))

print("===Unique Ensembl Gene===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

gg = len(set(genes[(genes['Bonferroni'] <= 0.05)].loc[:, 'ID']))
tt = len(set(trans[(trans['Bonferroni'] <= 0.05)].loc[:, 'ID']))
ee = len(set(exons[(exons['Bonferroni'] <= 0.05)].loc[:, 'ID']))
jj = len(set(juncs[(juncs['Bonferroni'] <= 0.05)].loc[:, 'ID']))

print("===Unique Gene Name===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

### Plot venn

In [None]:
features = {
    'Genes': set(genes[(genes['Bonferroni'] <= 0.05)].loc[:, 'ensemblID']),
    'Transcripts': set(trans[(trans['Bonferroni'] <= 0.05)].loc[:, 'ensemblID']),
    'Exons': set(exons[(exons['Bonferroni'] <= 0.05)].loc[:, 'ensemblID']),
    'Junctions': set(juncs[(juncs['Bonferroni'] <= 0.05)].loc[:, 'ensemblID']),
}

In [None]:
venn(features, fmt="{size}\n{percentage:0.1f}%", fontsize=18, legend_loc="best", 
     figsize=(12, 12), cmap=['red', 'green', 'blue', 'purple'])
plt.savefig('bonferroni_allFeatures_venn_diagram_percentage.png')
plt.savefig('bonferroni_allFeatures_venn_diagram_percentage.pdf')
plt.savefig('bonferroni_allFeatures_venn_diagram_percentage.svg')
plt.show()

In [None]:
limiting_features(features, 'Genes', 'Transcripts')
limiting_features(features, 'Genes', 'Junctions')
limiting_features(features, 'Exons', 'Genes')

In [None]:
limiting_features(features, 'Transcripts', 'Junctions')
limiting_features(features, 'Exons', 'Transcripts')
limiting_features(features, 'Exons', 'Junctions')

In [None]:
len(features['Genes'] & features['Transcripts'] & features['Exons'] & features['Junctions'])

In [None]:
len(features['Genes'] | features['Transcripts'] | features['Exons'] | features['Junctions'])

### SNPs not in significant PGC2+CLOZUK GWAS

In [None]:
new_genes = pd.merge(genes[(genes['Bonferroni'] <= 0.05)], pgc3_df, left_on='BEST.GWAS.ID', 
                     right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])
new_trans = pd.merge(trans[(trans['Bonferroni'] <= 0.05)], pgc3_df, left_on='BEST.GWAS.ID', 
                     right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])
new_exons = pd.merge(exons[(exons['Bonferroni'] <= 0.05)], pgc3_df, left_on='BEST.GWAS.ID', 
                     right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])
new_juncs = pd.merge(juncs[(juncs['Bonferroni'] <= 0.05)], pgc3_df, left_on='BEST.GWAS.ID', 
                     right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])

new_genes = new_genes[(new_genes['P'] > 5e-8)].copy()
new_trans = new_trans[(new_trans['P'] > 5e-8)].copy()
new_exons = new_exons[(new_exons['P'] > 5e-8)].copy()
new_juncs = new_juncs[(new_juncs['P'] > 5e-8)].copy()

In [None]:
gg = len(set(new_genes['BEST.GWAS.ID']))
tt = len(set(new_trans['BEST.GWAS.ID']))
ee = len(set(new_exons['BEST.GWAS.ID']))
jj = len(set(new_juncs['BEST.GWAS.ID']))

print("===Unique novel SNPs===\nGene:\t\t%d\nTranscript:\t%d\nExon:\t\t%d\nJunction:\t%d\n" % (gg, tt, ee, jj)) 

In [None]:
len(set(new_genes['BEST.GWAS.ID']) | set(new_trans['BEST.GWAS.ID']) | 
    set(new_exons['BEST.GWAS.ID']) | set(new_juncs['BEST.GWAS.ID']))

## Session Information

In [None]:
import types
from IPython import sys_info

def imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            yield val.__name__

#exclude all modules not listed by `!pip freeze`
excludes = ['__builtin__', 'types', 'IPython.core.shadowns', 'sys', 'os']
function_modules = []
imported_modules = [module for module in imports() if module not in excludes] + function_modules
pip_modules = !pip freeze #you could also use `!conda list` with anaconda

In [None]:
print(sys_info())