In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os

import pandas as pd

import get_geo
import kraft
import name_biology

In [None]:
directory_path = os.path.expanduser("~/Downloads")

overwrite = False

## [Intertumoral Heterogeneity within Medulloblastoma Subgroups](https://www.cell.com/cancer-cell/fulltext/S1535-6108(17)30201-5)

[GSE85218](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE85218) (GSE85212, GSE85217)

In [None]:
gse_id = "GSE85218"

In [None]:
information_x_sample = pd.read_csv("mmc2.tsv", sep="\t", index_col=0).T

information_x_sample.index.name = "Information"

continuous_x_sample, binary_x_sample = kraft.separate_type(information_x_sample)

In [None]:
continuous_x_sample = kraft.tidy(continuous_x_sample)

continuous_x_sample

In [None]:
binary_x_sample = kraft.tidy(binary_x_sample)

binary_x_sample

In [None]:
gene_x_sample = pd.read_csv(
    kraft.download(
        "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE85nnn/GSE85212/suppl/GSE85212_Methylation_763samples_SubtypeStudy_TaylorLab_beta_values.txt.gz",
        directory_path,
        overwrite=overwrite,
    ),
    sep="\t",
)
gene_x_sample.index = gene_x_sample.index.map(name_biology.ILMNID_GENE)

gene_x_sample.index.name = "Gene"

gene_x_sample = kraft.tidy(kraft.group(gene_x_sample.loc[~gene_x_sample.index.isna()]))

gene_x_sample

In [None]:
gene_x_sample = pd.read_csv(
    kraft.download(
        "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE85nnn/GSE85217/suppl/GSE85217_M_exp_763_MB_SubtypeStudy_TaylorLab.txt.gz",
        directory_path,
        overwrite=overwrite,
    ),
    sep="\t",
    index_col=0,
).iloc[:, 4:]

gene_x_sample.index = gene_x_sample.index.map(name_biology.ENS_GENE)

gene_x_sample.index.name = "Gene"

gene_x_sample = kraft.tidy(kraft.group(gene_x_sample.loc[~gene_x_sample.index.isna()]))

gene_x_sample

## [Subgroup-specific structural variation across 1,000 medulloblastoma genomes](https://www.nature.com/articles/nature11327)

[GSE37385](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE37385) (GSE37382, GSE37384)

In [None]:
gse_id = "GSE37385"

In [None]:
gse_id = "GSE37382"

_x_sample, continuous_x_sample, binary_x_sample = get_geo.get_gse(
    gse_id, directory_path, overwrite=overwrite
)

In [None]:
_x_sample

In [None]:
continuous_x_sample

In [None]:
binary_x_sample

In [None]:
gene_x_sample.to_csv("{}/{}_gene_x_sample.tsv".format(directory_path, gse_id))

continuous_x_sample.to_csv(
    "{}/{}_continuous_x_sample.tsv".format(directory_path, gse_id)
)

binary_x_sample.to_csv("{}/{}_binary_x_sample.tsv".format(directory_path, gse_id))

## [Novel molecular subgroups for clinical classification and outcome prediction in childhood medulloblastoma: a cohort study](https://www.thelancet.com/journals/lanonc/article/PIIS1470-2045(17)30243-7/fulltext)

[GSE93646](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE93646)

In [None]:
gse_id = "GSE93646"

_x_sample, continuous_x_sample, binary_x_sample = get_geo.get_gse(
    gse_id, directory_path, overwrite=overwrite
)

In [None]:
_x_sample

In [None]:
continuous_x_sample

In [None]:
binary_x_sample

In [None]:
gene_x_sample = pd.read_csv(
    kraft.download(
        "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE93nnn/GSE93646/suppl/GSE93646_non_normalized.txt.gz",
        directory_path,
        overwrite=overwrite,
    ),
    sep="\t",
    index_col=1,
)

gene_x_sample

## [Multiple recurrent genetic events converge on control of histone lysine methylation in medulloblastoma](https://www.nature.com/articles/ng.336)

[GSE14437](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14437)

In [None]:
gse_id = "GSE14437"

_x_sample, continuous_x_sample, binary_x_sample = get_geo.get_gse(
    gse_id, directory_path, overwrite=overwrite
)