# Figure 3f - FOXF1(+) target genes

In [52]:
import json, zlib, base64
import pandas as pd
import scanpy as sc
import loompy as lp

## 1) load anndata object

she used the vasculature wide object and filters out smooth muscle cells, pericytes and lymphatic endothelial cells

In [70]:
ad = sc.read_h5ad('/nfs/team205/ac65/vasculature/after_annotation/annotated_objects/vas_object_for_scenic_july2024.h5ad')

In [71]:
ad = ad[~ad.obs['scenic_compartment'].isin(['smc', 'pericytes', 'lymphatic_ec'])].copy()

In [72]:
ad.X.min(), ad.X.max(), ad.raw

(0.0, 8.639950230999009, None)

In [35]:
ad

AnnData object with n_obs × n_vars = 27345 × 30021
    obs: 'orig.ident', 'Sample', 'Winkler_annotation_ec', 'Winkler_annotation_mural', 'Winkler_annotation_mural_fine', 'donor', 'kit', 'study', 'organ_uni', 'Winkler_annotation', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_rb', 'pct_counts_rb', 'Winkler_broad_annotation', 'id', 'organ', 'sorting', 'location', 'age', 'gender', 'dissociation', 'scrublet_score', 'doublet_bh_pval', 'published_annotation', 'HCA', 'published_annotation_skin', 'batch', 'cell_ontology_class', 'free_annotation', 'compartment', 'age2', 'barcode', 'concat_sample_no', 'Population ', 'hospital_id', 'Organ', 'Cell_category', 'Predicted_labels_CellTypist', 'Majority_voting_CellTypist', 'Majority_voting_CellTypist_high', 'doublet_pv

## 2) find DE genes

In [73]:
ad.obs['ann_vas_scenic'].value_counts()

myo_cap_ec                7842
Littoral_EC               4845
cap_ec                    3334
ven_ec_2                  2522
pul_cap_ec                2277
art_ec_2                  2215
ven_ec_1                  2137
adip_cap_ec               1879
pericentral_cap_ec        1809
endometrium_cap_ec        1755
pul_ven_ec                1286
kidney_cap_ec             1067
aerocyte_ec                952
art_ec_1                   858
spleen_art_ec              700
periportal_cap_ec          660
glomeruli_ec               609
pul_art_ec                 581
pul_tip_cap_ec             576
brain_art_ec               426
blood_brain_barrier_ec     368
kidney_art_ec              235
brain_ven_ec               143
aorta_coronary_ec          138
endocardial_ec              64
Name: ann_vas_scenic, dtype: int64

find DE genes with same parameters

In [74]:
sc.tl.rank_genes_groups(ad, groupby="ann_vas_scenic", method='wilcoxon', use_raw=False)

  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group

In [75]:
sc.tl.filter_rank_genes_groups(
    ad,
    groupby='ann_vas_scenic',
    key_added="DEG_filtered",
    min_in_group_fraction=0.25,
    min_fold_change=2,
    max_out_group_fraction=0.25,
    use_raw=False
)

In [76]:
dedf = sc.get.rank_genes_groups_df(ad, group = None, key='DEG_filtered').query("~names.isna()")

In [77]:
dedf.head()

Unnamed: 0,group,names,scores,logfoldchanges,pvals,pvals_adj
1,Littoral_EC,KAZN,95.540451,5.738473,0.0,0.0
3,Littoral_EC,NTN4,91.742348,4.13132,0.0,0.0
4,Littoral_EC,LSP1,91.435188,7.229937,0.0,0.0
6,Littoral_EC,DHCR24,91.178131,6.136711,0.0,0.0
7,Littoral_EC,NR5A1,89.604752,7.566905,0.0,0.0


In [78]:
print(f"> number of DEGs per cell type: \n\n{dedf.group.value_counts()}")

> number of DEGs per cell type: 

blood_brain_barrier_ec    1027
brain_ven_ec               733
brain_art_ec               461
Littoral_EC                306
aorta_coronary_ec          201
ven_ec_2                   138
pericentral_cap_ec          74
endocardial_ec              72
adip_cap_ec                 66
art_ec_1                    59
periportal_cap_ec           57
glomeruli_ec                54
aerocyte_ec                 47
pul_art_ec                  45
spleen_art_ec               40
endometrium_cap_ec          39
kidney_art_ec               36
art_ec_2                    32
pul_tip_cap_ec              27
pul_ven_ec                  26
pul_cap_ec                  23
cap_ec                      23
ven_ec_1                    23
myo_cap_ec                  19
kidney_cap_ec                9
Name: group, dtype: int64


## 3) load regulons

In [48]:
# vasculature all
f_final_loom = "/lustre/scratch126/cellgen/team205/jp30/vasculature/vas_pulmonary_24Jul24/outputs/results/AUCell/vas_pulmonary_24Jul24_all_lineage_splitcomb_comb2/vas_pulmonary_24Jul24_all_lineage_splitcomb_comb2_pyscenic_output.loom"

In [53]:
# scenic output
lf = lp.connect( f_final_loom, mode='r', validate=False )
meta = json.loads(zlib.decompress(base64.b64decode( lf.attrs.MetaData )))
exprMat = pd.DataFrame( lf[:,:], index=lf.ra.Gene, columns=lf.ca.CellID).T
auc_mtx = pd.DataFrame( lf.ca.RegulonsAUC, index=lf.ca.CellID)

In [54]:
# create a dictionary of regulons
regulons = {}
for i,r in pd.DataFrame(lf.ra.Regulons,index=lf.ra.Gene).iteritems():
    regulons[i] =  list(r[r==1].index.values)

  for i,r in pd.DataFrame(lf.ra.Regulons,index=lf.ra.Gene).iteritems():


In [55]:
lf.close()

In [56]:
list(regulons)[:5]

['ALX4(+)', 'AR(+)', 'AR(-)', 'ARID3A(+)', 'ARID3A(-)']

## 4) select FOXF1(+) and subset target genes

In [62]:
sel_reg = "FOXF1(+)"
sel_trg = set(regulons[sel_reg])

In [63]:
print(f"number of target genes for '{sel_reg}': {len(sel_trg)}")

number of target genes for 'FOXF1(+)': 343


### subset per cell type

In [79]:
sel_subsets = {}

for cell_type in dedf.group.unique().tolist():
    de_genes = dedf.query(f"group == '{cell_type}'").names.unique().tolist()
    sel_subsets[cell_type] = sel_trg & set(de_genes)

Littoral_EC
adip_cap_ec
aerocyte_ec
aorta_coronary_ec
art_ec_1
art_ec_2
blood_brain_barrier_ec
brain_art_ec
brain_ven_ec
cap_ec
endocardial_ec
endometrium_cap_ec
glomeruli_ec
kidney_art_ec
kidney_cap_ec
myo_cap_ec
pericentral_cap_ec
periportal_cap_ec
pul_art_ec
pul_cap_ec
pul_tip_cap_ec
pul_ven_ec
spleen_art_ec
ven_ec_1
ven_ec_2


In [80]:
print(f"number of {sel_reg} target genes per cell type:\n\n" + "\n".join(f"  - {k}: {len(v)}" for k, v in sel_subsets.items()))

number of FOXF1(+) target genes per cell type:
  - Littoral_EC: 7
  - adip_cap_ec: 1
  - aerocyte_ec: 22
  - aorta_coronary_ec: 4
  - art_ec_1: 4
  - art_ec_2: 2
  - blood_brain_barrier_ec: 35
  - brain_art_ec: 16
  - brain_ven_ec: 16
  - cap_ec: 0
  - endocardial_ec: 1
  - endometrium_cap_ec: 0
  - glomeruli_ec: 6
  - kidney_art_ec: 1
  - kidney_cap_ec: 0
  - myo_cap_ec: 0
  - pericentral_cap_ec: 1
  - periportal_cap_ec: 3
  - pul_art_ec: 15
  - pul_cap_ec: 14
  - pul_tip_cap_ec: 5
  - pul_ven_ec: 1
  - spleen_art_ec: 2
  - ven_ec_1: 1
  - ven_ec_2: 0


## 5) export data frame for import in Cytoscape

In [84]:
sel_cell_types = ['pul_ven_ec', 'pul_tip_cap_ec', 'pul_cap_ec', 'pul_art_ec', 'aerocyte_ec']

In [113]:
target_gene_df = pd.DataFrame({ct: {tg: 1 for tg in tgs} for ct, tgs in sel_subsets.items() if ct in sel_cell_types})
target_gene_df[target_gene_df.isna()] = 0

# get fractions
target_gene_df = (target_gene_df.T / target_gene_df.sum(axis=1)).T

# add source and target columns
target_gene_df.insert(0, 'target', target_gene_df.index.tolist())
target_gene_df = target_gene_df.reset_index(drop=True)
target_gene_df.insert(0, 'source', sel_reg)

In [112]:
target_gene_df.head()

Unnamed: 0,source,target,aerocyte_ec,pul_art_ec,pul_cap_ec,pul_tip_cap_ec,pul_ven_ec
0,FOXF1(+),TBX3,1.0,0.0,0.0,0.0,0.0
1,FOXF1(+),OCLN,1.0,0.0,0.0,0.0,0.0
2,FOXF1(+),RCSD1,1.0,0.0,0.0,0.0,0.0
3,FOXF1(+),PRX,0.5,0.0,0.5,0.0,0.0
4,FOXF1(+),ITGA3,1.0,0.0,0.0,0.0,0.0


In [114]:
target_gene_df.to_csv(f'{sel_reg}_for_cytoscape.csv')

## 6) plotting in Cytoscape

plot [node charts](https://manual.cytoscape.org/en/stable/Styles.html#tutorial-6-creating-node-charts) in [Cytoscape](https://cytoscape.org/):

1. load the network from CSV file
    - File > Import > Network from File
    - select "*_for_cytoscape.csv"
    - select fraction columns as target node attribute
2. go to styles panel
    - select Image/Chart > select all columns > choose pie diagram
    - change node shape to circle
    - adjust other styles as needed
3. the order of colors under Image/Chart > Customize corresponds to the order of columns
4. export as PDF and make legend manually