# Figure 1b
This notebook reads in the data produced by the figure1 notebook, counts the number of differential gene expression events per sex, and plots a bar chart.

In [3]:
import os
from os import listdir
from os.path import isfile, join
import csv
from collections import defaultdict

In [8]:
dirpath = '../data/'
csvfiles = [f for f in listdir(dirpath) if isfile(join(dirpath, f)) and f.endswith('csv')]
print("Found %d csvfiles" % len(csvfiles))

Found 46 csvfiles


In [41]:
def parse_male_female_significantly_de_gene_counts(path, degenes):
    """
    Assumption -- if the log fold change is positive the gene is more highly expressed in females
    We count the numbers of genes significantly differential according to sex
    @param path -- path to a CSV file
    @param degenes -- a set with differentially expressed genes
    """
    n_male_sig = 0
    n_female_sig = 0
    with open(path) as csvfile:
        creader = csv.reader(csvfile, delimiter=',')
        next(creader)
        for row in creader:
            #print(row)
            if len(row) != 7:
                raise ValueError("Malformed row with %d fields - expected 7: %s" % (len(row), join(",", row)))
            #print(",".join(row))
            logFC = float(row[1])
            adjpval = float(row[5])
            if adjpval <= 0.05:
                gene = row[0]
                degenes.add(gene)
                if logFC > 0:
                    n_female_sig += 1
                else:
                    n_male_sig += 1
    return n_female_sig, n_male_sig

In [43]:
degenes = set()
for f in csvfiles:
    path = os.path.join(dirpath, f)
    n_female_sig, n_male_sig = parse_male_female_significantly_de_gene_counts(path, degenes)
    print("%s male:%d female:%d" % (f, n_female_sig, n_male_sig))
    print("Total unique DE genes: %d" % len(degenes))

Cells-EBV-transformedlymphocytes_DGE.csv male:27 female:27
Total unique DE genes: 54
Brain-CerebellarHemisphere_DGE.csv male:298 female:271
Total unique DE genes: 585
Breast-MammaryTissue_DGE.csv male:4949 female:5423
Total unique DE genes: 10532
Esophagus-Mucosa_DGE.csv male:166 female:71
Total unique DE genes: 10572
Heart-AtrialAppendage_DGE.csv male:403 female:489
Total unique DE genes: 10811
Liver_DGE.csv male:109 female:124
Total unique DE genes: 10865
Nerve-Tibial_DGE.csv male:300 female:432
Total unique DE genes: 11019
Brain-Anteriorcingulatecortex(BA24)_DGE.csv male:15 female:27
Total unique DE genes: 11019
Brain-Nucleusaccumbens(basalganglia)_DGE.csv male:82 female:177
Total unique DE genes: 11071
Pancreas_DGE.csv male:237 female:164
Total unique DE genes: 11150
Pituitary_DGE.csv male:516 female:646
Total unique DE genes: 11395
Skin-SunExposed(Lowerleg)_DGE.csv male:2199 female:1713
Total unique DE genes: 12329
Adipose-Visceral(Omentum)_DGE.csv male:813 female:709
Total unique