In [None]:
!pip install biopython pandas nltk tqdm

from Bio import Entrez, Medline
import pandas as pd
from tqdm import tqdm
import nltk
import re
import time

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

Entrez.email = "paras1301sharma@gmail.com"

query = '("High Altitude Hypoxia"[Title/Abstract] OR "Hypobaric Hypoxia"[Title/Abstract]) AND ("Gene"[Title/Abstract] OR "Genes"[Title/Abstract] OR "Gene Expression"[Title/Abstract] OR "Transcriptome"[Title/Abstract] OR "Genomics"[Title/Abstract])'

handle = Entrez.esearch(db="pubmed", term=query, retmax=1100)
record = Entrez.read(handle)
handle.close()
id_list = record['IdList']

print(f"Total Articles Found: {len(id_list)}")

def fetch_batch(id_list):
    handle = Entrez.efetch(db="pubmed", id=','.join(id_list), rettype="medline", retmode="text")
    records = Medline.parse(handle)
    return list(records)

batch_size = 20
data = []

for start in tqdm(range(0, len(id_list), batch_size)):
    end = min(start + batch_size, len(id_list))
    batch_ids = id_list[start:end]
    try:
        records = fetch_batch(batch_ids)
        for record in records:
            title = record.get("TI", "No Title")
            abstract = record.get("AB", "No Abstract")
            pmid = record.get("PMID", "No PMID")
            data.append({'PMID': pmid, 'Title': title, 'Abstract': abstract})
    except Exception as e:
        print(e)
    time.sleep(1)

df = pd.DataFrame(data)
df.to_csv("high_altitude_hypoxia_gene_papers.csv", index=False)
print("Data Collection Done & CSV Saved.")


Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


Total Articles Found: 406


100%|██████████| 21/21 [00:32<00:00,  1.56s/it]

Data Collection Done & CSV Saved.





In [None]:
import logging
import warnings

logging.getLogger("biothings.client").setLevel(logging.ERROR)
warnings.filterwarnings("ignore")
!pip install biothings_client biopython pandas nltk tqdm
import pandas as pd
import re
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
from biothings_client import get_client
import torch
import numpy as np

gene_client = get_client('gene')

model_name = "dmis-lab/biobert-base-cased-v1.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=2)

ner = pipeline(
    "ner",
    model=model,
    tokenizer=tokenizer,
    aggregation_strategy="simple",
    device=0 if torch.cuda.is_available() else -1
)

def validate_gene_symbol(symbol):
    try:
        results = gene_client.query(symbol, species='human', fields='symbol,alias')
        for hit in results.get('hits', []):
            if symbol.upper() == hit.get('symbol', '').upper():
                return True
            if any(symbol.upper() == alias.upper() for alias in hit.get('alias', [])):
                return True
        return False
    except:
        return False

def chunk_text(text, max_length=450):
    if not text or pd.isna(text):
        return []

    sentences = re.split(r'(?<=[.!?])\s+', text)
    chunks = []
    current_chunk = ""

    for sentence in sentences:
        test_chunk = current_chunk + " " + sentence if current_chunk else sentence
        tokens = tokenizer.encode(test_chunk, add_special_tokens=True)

        if len(tokens) > max_length:
            if current_chunk:
                chunks.append(current_chunk)

            if len(tokenizer.encode(sentence, add_special_tokens=True)) > max_length:
                words = sentence.split()
                sub_chunk = ""
                for word in words:
                    test_sub = sub_chunk + " " + word if sub_chunk else word
                    if len(tokenizer.encode(test_sub, add_special_tokens=True)) > max_length:
                        chunks.append(sub_chunk)
                        sub_chunk = word
                    else:
                        sub_chunk = test_sub

                if sub_chunk:
                    current_chunk = sub_chunk
                else:
                    current_chunk = ""
            else:
                current_chunk = sentence
        else:
            current_chunk = test_chunk

    if current_chunk:
        chunks.append(current_chunk)

    return chunks

def extract_candidates_from_text(text):
    if not text or pd.isna(text):
        return set()

    candidates = set()

    try:
        tokens = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
        if len(tokens['input_ids'][0]) == 512:
            chunks = chunk_text(text)
            for chunk in chunks:
                try:
                    entities = ner(chunk)
                    for ent in entities:
                        if ent['score'] > 0.85:
                            candidate = ent['word'].strip()
                            if 2 <= len(candidate) <= 15 and re.match(r"^[A-Za-z0-9-]+$", candidate):
                                candidates.add(candidate.upper())
                except Exception as e:
                    print(f"Error processing chunk: {e}")
        else:
            entities = ner(text)
            for ent in entities:
                if ent['score'] > 0.85:
                    candidate = ent['word'].strip()
                    if 2 <= len(candidate) <= 15 and re.match(r"^[A-Za-z0-9-]+$", candidate):
                        candidates.add(candidate.upper())
    except Exception as e:
        print(f"NER error: {e}")

    try:
        pattern = r'\b[A-Z][A-Za-z0-9-]{1,14}\b'
        matches = re.findall(pattern, text)
        for match in matches:
            if 2 <= len(match) <= 15:
                candidates.add(match.upper())
    except Exception as e:
        print(f"Pattern matching error: {e}")

    return candidates

def extract_genes(text):
    if not text or pd.isna(text):
        return []

    candidates = extract_candidates_from_text(text)

    validated_genes = []
    for candidate in candidates:
        if validate_gene_symbol(candidate):
            validated_genes.append(candidate)

    return sorted(list(set(validated_genes)))

test_abstract = """[The Tibetan sheep is a typical hypoxia-tolerant mammal, which lives on the plateau, at an altitude of between 2500 and 5000 m above sea level; the study of its hypoxic adaptation mechanism provides a reference for exploring the hypoxic adaptation mechanism of other animals. To grope for the genetic mechanism of adaptation to the hypoxic environment at the transcriptional level in Tibetan sheep testicular tissue, and to identify candidate genes and key pathways related to sheep adaptation, histological observation of testicular tissues from two sheep breeds was carried out using haematoxylin-eosin (HE) conventional staining. A total of 103 differentially expressed genes (DEGs) were authenticated in high altitude Tibetan sheep (ZYH) and low altitude Tibetan sheep (ZYM) by RNA sequencing technology (RNA-Seq), which included 50 up-regulated genes and 53 down-regulated genes. Functional analyses revealed several terms and pathways that were closely related to testis adaptation to the plateau. Several genes (including GGT5, AGTR2, EDN1, LPAR3, CYP2C19, IGFBP3, APOC3 and PKC1) were remarkably enriched in several pathways and terms, which may impact the Plateau adaptability of sheep by adjusting its reproductive activity and sexual maturation, and protecting Sertoli cells, various spermatocytes, and spermatogenesis processes. The results make a reasonable case for a better understanding of the molecular mechanisms of adaptation to altitude in sheep.]"""

print("Validated genes:", extract_genes(test_abstract))

def process_dataframe_safely(filepath):
    try:
        print("Loading dataset...")
        df = pd.read_csv(filepath)

        results = []
        total = len(df)
        print(f"Starting to process {total} abstracts...")

        for i, row in df.iterrows():
            if i % 10 == 0:
                print(f"Processing abstract {i}/{total}...")

            try:
                abstract = row['Abstract']
                genes = extract_genes(abstract)
                new_row = row.to_dict()
                new_row['Valid_Genes'] = str(genes)
                results.append(new_row)
            except Exception as e:
                print(f"Error processing row {i}: {e}")
                new_row = row.to_dict()
                new_row['Valid_Genes'] = str([])
                results.append(new_row)

        result_df = pd.DataFrame(results)
        result_df.to_csv("final_gene_results.csv", index=False)
        print("Processing complete!")

        return result_df

    except Exception as e:
        print(f"Fatal error: {e}")
        return None

result_df = process_dataframe_safely("/content/high_altitude_hypoxia_gene_papers.csv")


Collecting biothings_client
  Downloading biothings_client-0.4.1-py3-none-any.whl.metadata (10 kB)
Downloading biothings_client-0.4.1-py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.7/46.7 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biothings_client
Successfully installed biothings_client-0.4.1


config.json:   0%|          | 0.00/313 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cpu
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]



Validated genes: ['AGTR2', 'APOC3', 'CYP2C19', 'DEGS', 'EDN1', 'GGT5', 'HE', 'IGFBP3', 'LPAR3']
Loading dataset...
Starting to process 406 abstracts...
Processing abstract 0/406...




Processing abstract 10/406...




Processing abstract 20/406...




Processing abstract 30/406...




Processing abstract 40/406...




Processing abstract 50/406...




Processing abstract 60/406...




Processing abstract 70/406...




Processing abstract 80/406...




Processing abstract 90/406...




Processing abstract 100/406...




Processing abstract 110/406...




Processing abstract 120/406...




Processing abstract 130/406...




Processing abstract 140/406...




Processing abstract 150/406...




Processing abstract 160/406...




Processing abstract 170/406...




Processing abstract 180/406...




Processing abstract 190/406...




Processing abstract 200/406...




Processing abstract 210/406...




Processing abstract 220/406...




Processing abstract 230/406...




Processing abstract 240/406...




Processing abstract 250/406...




Processing abstract 260/406...




Processing abstract 270/406...




Processing abstract 280/406...




Processing abstract 290/406...




Processing abstract 300/406...




Processing abstract 310/406...




Processing abstract 320/406...




Processing abstract 330/406...




Processing abstract 340/406...




Processing abstract 350/406...




Processing abstract 360/406...




Processing abstract 370/406...




Processing abstract 380/406...




Processing abstract 390/406...




Processing abstract 400/406...




Processing complete!


In [None]:
import pandas as pd

# Load your result file
your_df = pd.read_csv('final_gene_results.csv')

# Load HGNC Approved Gene Symbols
hgnc_url = "https://storage.googleapis.com/public-download-files/hgnc/tsv/tsv/hgnc_complete_set.txt"
hgnc_df = pd.read_csv(hgnc_url, sep="\t")

# Extract approved gene symbols as a set for fast lookup
approved_genes = set(hgnc_df['symbol'].str.upper())

# Function to validate genes in your result
def validate_genes(gene_list):
    valid_genes = [gene for gene in gene_list if gene.upper() in approved_genes]
    return valid_genes if valid_genes else []

# Apply the validation
your_df['Valid_Genes'] = your_df['Valid_Genes'].apply(eval)  # Convert string to list
your_df['Valid_Genes'] = your_df['Valid_Genes'].apply(validate_genes)

# Save the final clean result
your_df.to_csv('HAH_final_gene_results.csv', index=False)


In [None]:
import pandas as pd

df = pd.read_csv('HAH_final_gene_results.csv')
df['Valid_Genes'] = df['Valid_Genes'].apply(eval)

all_genes = set(gene for gene_list in df['Valid_Genes'] for gene in gene_list)

print(f"Total Unique Validated Genes: {len(all_genes)}")

print(sorted(all_genes))


Total Unique Validated Genes: 504
['ABCB5', 'ABI3', 'ABR', 'ACE', 'ACE2', 'ACHE', 'ACSL4', 'ACTA2', 'ACTB', 'ADAM15', 'ADAM17', 'ADAM9', 'ADH6', 'ADM', 'AEBP2', 'AGT', 'AHR', 'AIMP1', 'ALAS1', 'ALAS2', 'ALKBH5', 'ALOX15', 'ANGPT2', 'ANGPTL4', 'ANXA2', 'APIP', 'APOE', 'APOLD1', 'APP', 'AQP4', 'AR', 'ARID5A', 'ARNT', 'ASL', 'ATF3', 'ATF4', 'ATF5', 'ATF6', 'ATM', 'ATP1B2', 'B2M', 'BACH1', 'BAG2', 'BAX', 'BCL2L2', 'BDNF', 'BHLHE40', 'BHLHE41', 'BMAL1', 'BMPR2', 'BRD2', 'BRD4', 'BRINP3', 'CA1', 'CA2', 'CA3', 'CA4', 'CALM1', 'CALR', 'CAMP', 'CANX', 'CAPN3', 'CAST', 'CAT', 'CBLB', 'CBS', 'CD28', 'CD34', 'CD4', 'CD44', 'CD47', 'CD68', 'CDC42', 'CDKAL1', 'CDKN1A', 'CDKN1B', 'CEBPA', 'CFAP206', 'CHAT', 'CHL1', 'CHRM1', 'CHRM2', 'CHRM3', 'CHRM4', 'CHRM5', 'CLASRP', 'CLC', 'CLCA1', 'CLOCK', 'CMC1', 'COL14A1', 'COL1A1', 'COL1A2', 'COL3A1', 'COMMD6', 'COQ7', 'COX10', 'COX7B', 'CPD', 'CRH', 'CRHR1', 'CRYAA', 'CRYZ', 'CXCL10', 'CXCL2', 'CXCR4', 'CYBA', 'CYP11A1', 'CYP11B2', 'CYP19A1', 'CYP1A2', 'CYP24

In [None]:
!pip install mygene

import mygene

mg = mygene.MyGeneInfo()

gene_list = ['ABCB5', 'ABI3', 'ABR', 'ACE', 'ACE2', 'ACHE', 'ACSL4', 'ACTA2', 'ACTB', 'ADAM15', 'ADAM17', 'ADAM9', 'ADH6', 'ADM', 'AEBP2', 'AGT', 'AHR', 'AIMP1', 'ALAS1', 'ALAS2', 'ALKBH5', 'ALOX15', 'ANGPT2', 'ANGPTL4', 'ANXA2', 'APIP', 'APOE', 'APOLD1', 'APP', 'AQP4', 'AR', 'ARID5A', 'ARNT', 'ASL', 'ATF3', 'ATF4', 'ATF5', 'ATF6', 'ATM', 'ATP1B2', 'B2M', 'BACH1', 'BAG2', 'BAX', 'BCL2L2', 'BDNF', 'BHLHE40', 'BHLHE41', 'BMAL1', 'BMPR2', 'BRD2', 'BRD4', 'BRINP3', 'CA1', 'CA2', 'CA3', 'CA4', 'CALM1', 'CALR', 'CAMP', 'CANX', 'CAPN3', 'CAST', 'CAT', 'CBLB', 'CBS', 'CD28', 'CD34', 'CD4', 'CD44', 'CD47', 'CD68', 'CDC42', 'CDKAL1', 'CDKN1A', 'CDKN1B', 'CEBPA', 'CFAP206', 'CHAT', 'CHL1', 'CHRM1', 'CHRM2', 'CHRM3', 'CHRM4', 'CHRM5', 'CLASRP', 'CLC', 'CLCA1', 'CLOCK', 'CMC1', 'COL14A1', 'COL1A1', 'COL1A2', 'COL3A1', 'COMMD6', 'COQ7', 'COX10', 'COX7B', 'CPD', 'CRH', 'CRHR1', 'CRYAA', 'CRYZ', 'CXCL10', 'CXCL2', 'CXCR4', 'CYBA', 'CYP11A1', 'CYP11B2', 'CYP19A1', 'CYP1A2', 'CYP24A1', 'CYP27B1', 'CYP2C9', 'CYP2E1', 'CYP2R1', 'CYP3A4', 'DDX24', 'DGKG', 'DHCR24', 'DIP2B', 'DMD', 'DMRTA1', 'DNM2', 'DNMT1', 'DNMT3B', 'DPM1', 'DPP4', 'DPY30', 'DSG3', 'DUOX2', 'DUOXA1', 'DUSP1', 'DYSF', 'E2F8', 'EDN1', 'EEF1A1', 'EFNB2', 'EGFR', 'EGLN1', 'EIF4A3', 'ENO2', 'ENPEP', 'ENTPD1', 'EP300', 'EPAS1', 'EPHA2', 'EPHB3', 'EPHX1', 'EPM2A', 'EPO', 'EPOR', 'ERBB2', 'EREG', 'ERH', 'ESR1', 'ESR2', 'ETS1', 'F3', 'FAP', 'FASLG', 'FCGBP', 'FDFT1', 'FETUB', 'FGF10', 'FIS1', 'FLI1', 'FLT1', 'FNDC1', 'FOS', 'FOSL2', 'FOXO1', 'FOXO3', 'FOXP4', 'FST', 'FTH1', 'FTO', 'FUNDC1', 'FZD6', 'GAA', 'GALNT13', 'GANC', 'GAPDH', 'GATA1', 'GC', 'GCG', 'GCH1', 'GCK', 'GDNF', 'GFI1B', 'GGT5', 'GJA4', 'GLIS3', 'GNA11', 'GNB3', 'GNG2', 'GP6', 'GP9', 'GPR171', 'GPX1', 'GPX4', 'GSTCD', 'GSTP1', 'HAT1', 'HBA1', 'HBA2', 'HBB', 'HBEGF', 'HECA', 'HES4', 'HIF1A', 'HIF1AN', 'HK2', 'HLA-DOA', 'HLA-DPA1', 'HMBS', 'HMGB1', 'HMGCS1', 'HMOX1', 'HMOX2', 'HNF1B', 'HOXA10', 'HP', 'HPGDS', 'HPRT1', 'HPSE', 'HR', 'HRAS', 'HSD11B1', 'HSD17B2', 'HSF1', 'HSP90AA1', 'ICAM1', 'IGF1R', 'IGFBP3', 'IHH', 'IKZF1', 'IL6', 'IMPACT', 'INHBC', 'INSIG1', 'IPP', 'ITGA1', 'ITGA2B', 'JUN', 'JUNB', 'JUND', 'KCNJ15', 'KCNMB1', 'KDR', 'KEAP1', 'KIF2C', 'KIT', 'KRAS', 'KRT18', 'LEPR', 'LHCGR', 'LONP1', 'LOX', 'LPCAT3', 'LPL', 'LRP2', 'LY96', 'MAPK1', 'MAPK10', 'MB', 'MCTP2', 'MEF2B', 'METTL14', 'METTL3', 'MFN1', 'MFN2', 'MGST1', 'MICE', 'MID1', 'MITF', 'MMP14', 'MMP2', 'MMP9', 'MPO', 'MRPL3', 'MSH6', 'MSTN', 'MTOR', 'MYC', 'MYLK', 'N4BP3', 'NAPG', 'NDP', 'NDRG1', 'NDUFA11', 'NDUFB8', 'NEGR1', 'NF1', 'NFATC3', 'NFE2L2', 'NFKBIA', 'NGFR', 'NKAIN2', 'NLRP3', 'NME4', 'NOS2', 'NOS3', 'NOTCH4', 'NOXA1', 'NPR1', 'NPY', 'NRF1', 'NT5C2', 'NT5E', 'NTS', 'ODC1', 'OGG1', 'OPA1', 'PAH', 'PARG', 'PAX3', 'PAX7', 'PC', 'PCDH12', 'PCNA', 'PDCD11', 'PDGFA', 'PDGFD', 'PDK4', 'PF4', 'PF4V1', 'PGAM2', 'PGK1', 'PGP', 'PGR', 'PHACTR1', 'PIAS4', 'PIK3CA', 'PIK3R1', 'PITRM1', 'PKM', 'PLA2G2E', 'PLCB1', 'PLIN2', 'PLTP', 'PLXNA4', 'PNP', 'PNPLA7', 'POLL', 'POLR2K', 'PON2', 'PPARA', 'PPARG', 'PPIA', 'PPM1B', 'PPP1R15B', 'PPP1R3C', 'PPP3R1', 'PRDM1', 'PRKAA1', 'PRKAG2', 'PRKG1', 'PRL', 'PRMT1', 'PRMT5', 'PROC', 'PRPF8', 'PSMA4', 'PSMB4', 'PSMC4', 'PSMC6', 'PSMD13', 'PTEN', 'PTGS1', 'PTGS2', 'PTPN1', 'PVR', 'RAMP1', 'RAMP2', 'RAMP3', 'RASA1', 'RASGRF2', 'REN', 'RGS1', 'RGS2', 'RHEBL1', 'RHO', 'RHOA', 'ROCK2', 'RPE', 'RPL11', 'RPL30', 'RPL4', 'RPS15', 'RPS23', 'RPS9', 'RTN4', 'RUNX2', 'RXFP2', 'RXRA', 'RYR2', 'SART1', 'SDHB', 'SDHC', 'SDHD', 'SELP', 'SEMA4G', 'SENP1', 'SERPINA1', 'SFI1', 'SFTPD', 'SGCD', 'SGK1', 'SH2B1', 'SH2B3', 'SHH', 'SI', 'SIGLEC8', 'SIRT1', 'SLC10A1', 'SLC16A2', 'SLC16A3', 'SLC22A17', 'SLC22A2', 'SLC22A7', 'SLC22A8', 'SLC22A9', 'SLC24A5', 'SLC25A51', 'SLC28A1', 'SLC29A1', 'SLC29A2', 'SLC2A1', 'SLC2A3', 'SLC30A7', 'SLC39A2', 'SLC3A2', 'SLC40A1', 'SLC5A4', 'SLC7A1', 'SLC7A11', 'SLC7A4', 'SLC7A9', 'SLC8A2', 'SMAD7', 'SOCS2', 'SOCS3', 'SOD1', 'SOD2', 'SP1', 'SP100', 'SP3', 'SPATA9', 'SPR', 'SQLE', 'SRD5A1', 'SREBF1', 'STAT3', 'STIM1', 'TAGLN2', 'TBC1D1', 'TBP', 'TBX5', 'TCF7L1', 'TEKT1', 'TF', 'TFAM', 'TFRC', 'TG', 'TGFA', 'TGFB1', 'TGFB3', 'TH', 'THBS1', 'TIMP1', 'TIMP3', 'TLL1', 'TLR4', 'TNF', 'TNKS', 'TNNI3K', 'TOMM5', 'TP53', 'TRAP1', 'TRHR', 'TRIM21', 'TTR', 'TXNIP', 'UBB', 'UCP2', 'UCP3', 'UGT1A1', 'UQCRB', 'USF1', 'USF2', 'UTRN', 'UXT', 'VBP1', 'VDR', 'VEGFA', 'VEGFB', 'VSIG4', 'WNT7B', 'WTAP', 'XBP1', 'XDH', 'YWHAZ', 'ZFP36']



# Query MyGene to check which ones are recognized genes
results = mg.querymany(gene_list, scopes='symbol', fields='symbol', species='human')

# Count how many are matched
valid_genes = [res['query'] for res in results if not res.get('notfound', False)]

print(f"Valid gene symbols: {valid_genes}")
print(f"Number of valid genes: {len(valid_genes)}")




INFO:biothings.client:querying 1-491 ...
INFO:biothings.client:Finished.
INFO:biothings.client:Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Valid gene symbols: ['ABCB5', 'AEBP2', 'AGT', 'AHR', 'AIMP1', 'ALAS1', 'ALAS2', 'ALKBH5', 'ALOX15', 'ANGPT2', 'ANGPTL4', 'ANXA2', 'APIP', 'APOE', 'APOLD1', 'APP', 'AQP4', 'AR', 'ARID5A', 'ARNT', 'ASL', 'ATF3', 'ATF4', 'ATF5', 'ATF6', 'ATM', 'ATP1B2', 'B2M', 'BACH1', 'BAG2', 'BAX', 'BCL2L2', 'BDNF', 'BHLHE40', 'BHLHE41', 'BMAL1', 'BMPR2', 'BRD2', 'BRD4', 'BRINP3', 'CA1', 'CA2', 'CA3', 'CA4', 'CALM1', 'CALR', 'CAMP', 'CANX', 'CAPN3', 'CAST', 'CAST', 'CAT', 'CBLB', 'CBS', 'CD28', 'CD34', 'CD4', 'CD44', 'CD47', 'CD68', 'CDC42', 'CDKAL1', 'CDKN1A', 'CDKN1B', 'CEBPA', 'CFAP206', 'CHAT', 'CHL1', 'CHRM1', 'CHRM2', 'CHRM3', 'CHRM4', 'CHRM5', 'CLASRP', 'CLC', 'CLCA1', 'CLOCK', 'CMC1', 'COL14A1', 'COL1A1', 'COL1A2', 'COL3A1', 'COMMD6', 'COQ7', 'COX10', 'COX7B', 'CPD', 'CRH', 'CRHR1', 'CRYAA', 'CRYZ', 'CXCL10', 'CXCL2', 'CXCR4', 'CYBA', 'CYP11A1', 'CYP11B2', 'CYP19A1', 'CYP1A2', 'CYP24A1', 'CYP27B1', 'CYP2C9', 'CYP2E1', 'CYP2R1', 'CYP3A4', 'DDX24', 'DGKG', 'DHCR24', 'DIP2B', 'DMD', 'DMRTA1', 'DNM2