In [4]:
# Script converts gene name to ensemble ID for gsea analysis by panther.

In [1]:
!pip install gseapy
!pip install scanpy
!pip install enrichr



In [2]:
import scanpy as sc
import gseapy as gp
import pandas as pd
import numpy as np
import enrichr

In [3]:
def read_genes_from_csv(filename, max_num):

    genes = set()
    open_file = open(filename)
    for i, line in enumerate(open_file):
        if i == 0:
            continue
        if i == max_num:
            break
        split = line.split(",")
        genes.add(split[1])
    open_file.close()
    return genes

In [5]:
# First read those genes diff expressed in young/old
older_genes_de = read_genes_from_csv("../project-files/DE_age_older_j.csv", 100)
younger_genes_de = read_genes_from_csv("../project-files/DE_age_younger_j.csv", 100)

In [11]:
# First read those genes diff expressed in young/old
ductal_genes_de = read_genes_from_csv("../project-files/DE_type_ductal_j.csv", 100)
lobular_genes_de = read_genes_from_csv("../project-files/DE_type_lobular_j.csv", 100)

In [6]:
# Need to convert gene names.
from gseapy import Biomart
bm = Biomart()
h2m = bm.query(dataset='hsapiens_gene_ensembl',
               attributes=['ensembl_gene_id','external_gene_name'])

In [7]:
# get a dict symbol mappings
h2m_dict = {}
for i, row in h2m.loc[:,["external_gene_name", "ensembl_gene_id"]].iterrows():
    if row.isna().any(): continue
    h2m_dict[row['external_gene_name']] = row["ensembl_gene_id"]

In [8]:
younger_ensembl = set()
older_ensembl = set()
for elem in younger_genes_de:
    if elem not in h2m_dict:
        continue
    younger_ensembl.add(h2m_dict[elem])

for elem in older_genes_de:
    if elem not in h2m_dict:
        continue
    older_ensembl.add(h2m_dict[elem])

In [12]:
ductal_ensembl = set()
lobular_ensembl = set()
for elem in ductal_genes_de:
    if elem not in h2m_dict:
        continue
    ductal_ensembl.add(h2m_dict[elem])

for elem in lobular_genes_de:
    if elem not in h2m_dict:
        continue
    lobular_ensembl.add(h2m_dict[elem])

In [9]:
def write_to_file(liste, filename):
    writer = open(filename, "w")
    for elem in liste:
        writer.write(elem + "\n")
    writer.close()

In [10]:
write_to_file(older_ensembl, "older_ensembl.out")
write_to_file(younger_ensembl, "younger_ensembl.out")

In [13]:
write_to_file(ductal_ensembl, "ductal_ensembl.out")
write_to_file(lobular_ensembl, "lobular_ensembl.out")