# Figure 1b
This notebook reads in the data produced by the figure1 notebook, counts the number of differential gene expression events per sex, and plots a bar chart.

In [9]:
import os
import math
from os import listdir
from os.path import isfile, join
import csv
from collections import defaultdict

In [10]:
dirpath = '../data/'
csvfiles = [f for f in listdir(dirpath) if isfile(join(dirpath, f)) and f.endswith('DGE_refined.csv')]
print("Found %d csvfiles" % len(csvfiles))

Found 46 csvfiles


In [17]:
def parse_male_female_significantly_de_gene_counts(path, degenes):
    """
    Assumption -- if the log fold change is positive the gene is more highly expressed in females
    We count the numbers of genes significantly differential according to sex
    @param path -- path to a CSV file
    @param degenes -- a set with differentially expressed genes
    """
    n_male_sig = 0
    n_female_sig = 0
    with open(path) as csvfile:
        creader = csv.reader(csvfile, delimiter=',')
        header = next(creader)
        print(header)
        for row in creader:
            #print(row)
            if len(row) != 7:
                raise ValueError("Malformed row with %d fields - expected 7: %s" % (len(row), join(",", row)))
            #print(",".join(row))
            logFC = float(row[1])
            adjpval = float(row[5])
            if adjpval <= 0.05 and abs(logFC) > math.log2(1.5):
                gene = row[0]
                if not gene.startswith("ENSG"):
                    raise ValueError("Bad gene:", gene)
                degenes.add(gene)
                if logFC > 0:
                    n_female_sig += 1
                else:
                    n_male_sig += 1
    return n_female_sig, n_male_sig

In [18]:
degenes = set()
for f in csvfiles:
    path = os.path.join(dirpath, f)
    n_female_sig, n_male_sig = parse_male_female_significantly_de_gene_counts(path, degenes)
    print("%s male:%d female:%d" % (f, n_female_sig, n_male_sig))
    print("Total unique DE genes: %d" % len(degenes))

['logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Val', 'B']
Cells-EBV-transformedlymphocytes_DGE.csv male:12 female:19
Total unique DE genes: 31
['logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Val', 'B']
Brain-CerebellarHemisphere_DGE.csv male:32 female:23
Total unique DE genes: 67
['logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Val', 'B']
Breast-MammaryTissue_DGE.csv male:1131 female:680
Total unique DE genes: 1843
['logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Val', 'B']
Esophagus-Mucosa_DGE.csv male:9 female:21
Total unique DE genes: 1844
['logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Val', 'B']
Heart-AtrialAppendage_DGE.csv male:17 female:44
Total unique DE genes: 1862
['logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Val', 'B']
Liver_DGE.csv male:33 female:51
Total unique DE genes: 1898
['logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Val', 'B']
Nerve-Tibial_DGE.csv male:15 female:32
Total unique DE genes: 1901
['logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Val', 'B']
Brain-Anteriorcingulatecortex(BA24)_DGE.csv m