In [None]:
# Install BiocManager if you haven't already
if (!requireNamespace("BiocManager", quietly = TRUE)) {
  install.packages("BiocManager")
}

# Install GEOquery from Bioconductor
BiocManager::install("GEOquery")


Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

'getOption("repos")' replaces Bioconductor standard repositories, see
'help("repositories", package = "BiocManager")' for details.
Replacement repositories:
    CRAN: https://cran.rstudio.com

Bioconductor version 3.18 (BiocManager 1.30.22), R 4.3.2 (2023-10-31)

Installing package(s) 'BiocVersion', 'GEOquery'

also installing the dependencies ‘BiocGenerics’, ‘statmod’, ‘R.oo’, ‘R.methodsS3’, ‘Biobase’, ‘limma’, ‘R.utils’


Old packages: 'bit', 'curl', 'devtools', 'gargle', 'highr', 'isoband',
  'openssl', 'ragg', 'rvest', 'textshaping', 'whisker', 'xfun', 'zip', 'boot',
  'nlme', 'survival'



In [None]:
# Install if not already installed
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install(c("GEOquery", "limma"))


'getOption("repos")' replaces Bioconductor standard repositories, see
'help("repositories", package = "BiocManager")' for details.
Replacement repositories:
    CRAN: https://cran.rstudio.com

Bioconductor version 3.18 (BiocManager 1.30.22), R 4.3.2 (2023-10-31)

“package(s) not installed when version(s) same as or greater than current; use
  `force = TRUE` to re-install: 'GEOquery' 'limma'”
Old packages: 'bit', 'curl', 'devtools', 'gargle', 'highr', 'isoband',
  'openssl', 'ragg', 'rvest', 'textshaping', 'whisker', 'xfun', 'zip', 'boot',
  'nlme', 'survival'



In [None]:
# Install and load the GEOquery package if you haven't already
#install.packages("GEOquery")
library(GEOquery)

# Define the list of accession numbers
accession_numbers <- c("GSE25066", "GSE20685", "GSE19615", "GSE17907", "GSE16446",
                        "GSE17705", "GSE2603", "GSE11121", "GSE7390", "GSE6532")

# Fetch data for each accession number
for (accession in accession_numbers) {
  # Construct the URL for the GEO dataset
  url <- paste0("https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=", accession)

  # Print the URL
  cat("Fetching data for accession:", accession, "\n")
  cat("URL:", url, "\n")

  # Fetch the data using GEOquery
  gse <- getGEO(accession, destdir = "./", GSEMatrix = TRUE)

  # Print the dataset information
  cat("Dataset information for accession:", accession, "\n")
  print(attr(gse, "title"))
  print(attr(gse, "summary"))

  # Print the expression data matrix
  cat("Expression data matrix for accession:", accession, "\n")
  print(exprs(gse[[1]]))
}


In [None]:
# Install and load the necessary packages
install.packages(c("plyr", "dplyr"))
library(GEOquery)
library(plyr)
library(dplyr)

# Define the list of accession numbers
accession_numbers <- c("GSE25066", "GSE20685", "GSE19615", "GSE17907", "GSE16446",
                        "GSE17705", "GSE2603", "GSE11121", "GSE7390", "GSE6532")

# Function to preprocess gene expression data
preprocess_data <- function(gse) {
  # Keep only samples with metadata on metastasis-free survival
  gse <- gse[grep("metastasis", grepl("free survival", gse$series), ignore.case = TRUE)]

  # Extract expression data matrix
  expr_data <- exprs(gse[[1]])

  # Quantile normalization
  expr_data <- normalizeQuantiles(expr_data)

  # Keep only unique probe IDs with the highest average value
  expr_data <- expr_data[!duplicated(rownames(expr_data)), ]

  return(expr_data)
}

# Fetch data and preprocess for each accession number
processed_data <- lapply(accession_numbers, function(accession) {
  # Fetch the data using GEOquery
  gse <- getGEO(accession, destdir = "./", GSEMatrix = TRUE)

  # Print dataset information
  cat("Dataset information for accession:", accession, "\n")
  print(attr(gse, "title"))
  print(attr(gse, "summary"))

  # Preprocess the data
  processed_expr_data <- preprocess_data(gse)

  return(processed_expr_data)
})

# Combine processed data from all datasets
combined_data <- Reduce(function(x, y) merge(x, y, by = "row.names", all = TRUE), processed_data)

# Rename the row names (gene IDs)
row.names(combined_data) <- paste0("Gene_", seq_len(nrow(combined_data)))

# Print the dimensions of the combined data
cat("Combined data dimensions:", dim(combined_data), "\n")


In [None]:
# Load necessary libraries
library(GEOquery)
library(plyr)  # For data manipulation
library(dplyr)  # For data manipulation
library(limma)  # For normalizeQuantiles function

# Define the list of accession numbers
accession_numbers <- c("GSE25066", "GSE20685", "GSE19615", "GSE17907", "GSE16446",
                       "GSE17705", "GSE2603", "GSE11121", "GSE7390", "GSE6532")

# Function to preprocess gene expression data
preprocess_data <- function(gse) {
  # Filter based on metadata (adjust as needed for actual metadata structure)
  pheno_data <- pData(phenoData(gse[[1]]))
  survival_samples <- pheno_data[grepl("metastasis-free survival", pheno_data$characteristics_ch1, ignore.case = TRUE), ]
  expr_data <- exprs(gse[[1]])[, colnames(exprs(gse[[1]])) %in% rownames(survival_samples)]

  # Quantile normalization
  expr_data <- normalizeQuantiles(expr_data)

  # Handling duplicate probe IDs: keep the one with the highest mean expression
  expr_data <- expr_data[!duplicated(rownames(expr_data)), ]
  highest_mean <- tapply(rowMeans(expr_data), rownames(expr_data), max)
  expr_data <- expr_data[rownames(expr_data) %in% names(highest_mean), ]

  return(expr_data)
}

# Initialize list to store processed data
processed_data <- list()

# Fetch data and preprocess for each accession number
for (accession in accession_numbers) {
  # Fetch the data using GEOquery
  gse <- getGEO(accession, destdir = "./", GSEMatrix = TRUE)

  # Print dataset information
  cat("Dataset information for accession:", accession, "\n")
  print(attr(gse, "title"))
  print(attr(gse, "summary"))

  # Preprocess the data
  processed_expr_data <- tryCatch({
    preprocess_data(gse)
  }, error = function(e) {
    cat("Error in preprocessing", accession, ": ", e$message, "\n")
    NULL  # Return NULL in case of error
  })

  if (!is.null(processed_expr_data)) {
    processed_data[[accession]] = processed_expr_data
  }
}

# Combine processed data from all datasets
combined_data <- Reduce(function(x, y) merge(x, y, by = "row.names", all = TRUE), processed_data)

# Rename the row names (gene IDs)
row.names(combined_data) <- paste0("Gene_", seq_len(nrow(combined_data)))

# Print the dimensions of the combined data
cat("Combined data dimensions:", dim(combined_data), "\n")

# Note: This script assumes a certain structure in the metadata and expression data.
# Adjustments may be needed based on the actual content and format of the GEO datasets.



Attaching package: ‘limma’


The following object is masked from ‘package:BiocGenerics’:

    plotMA


Found 1 file(s)

GSE25066_series_matrix.txt.gz

Using locally cached version: .//GSE25066_series_matrix.txt.gz

Using locally cached version of GPL96 found here:
.//GPL96.soft.gz 



Dataset information for accession: GSE25066 
NULL
NULL
Error in preprocessing GSE25066 :  subscript out of bounds 


Found 1 file(s)

GSE20685_series_matrix.txt.gz

Using locally cached version: .//GSE20685_series_matrix.txt.gz

Using locally cached version of GPL570 found here:
.//GPL570.soft.gz 



Dataset information for accession: GSE20685 
NULL
NULL
Error in preprocessing GSE20685 :  subscript out of bounds 


Found 1 file(s)

GSE19615_series_matrix.txt.gz

Using locally cached version: .//GSE19615_series_matrix.txt.gz

Using locally cached version of GPL570 found here:
.//GPL570.soft.gz 



Dataset information for accession: GSE19615 
NULL
NULL
Error in preprocessing GSE19615 :  subscript out of bounds 


Found 2 file(s)

GSE17907-GPL570_series_matrix.txt.gz

Using locally cached version: .//GSE17907-GPL570_series_matrix.txt.gz

Using locally cached version of GPL570 found here:
.//GPL570.soft.gz 

GSE17907-GPL9128_series_matrix.txt.gz

Using locally cached version: .//GSE17907-GPL9128_series_matrix.txt.gz

Using locally cached version of GPL9128 found here:
.//GPL9128.soft.gz 



Dataset information for accession: GSE17907 
NULL
NULL
Error in preprocessing GSE17907 :  subscript out of bounds 


Found 1 file(s)

GSE16446_series_matrix.txt.gz

Using locally cached version: .//GSE16446_series_matrix.txt.gz

Using locally cached version of GPL570 found here:
.//GPL570.soft.gz 



Dataset information for accession: GSE16446 
NULL
NULL
Error in preprocessing GSE16446 :  subscript out of bounds 


Found 1 file(s)

GSE17705_series_matrix.txt.gz

Using locally cached version: .//GSE17705_series_matrix.txt.gz

Using locally cached version of GPL96 found here:
.//GPL96.soft.gz 



Dataset information for accession: GSE17705 
NULL
NULL
Error in preprocessing GSE17705 :  subscript out of bounds 


Found 1 file(s)

GSE2603_series_matrix.txt.gz

Using locally cached version: .//GSE2603_series_matrix.txt.gz

Using locally cached version of GPL96 found here:
.//GPL96.soft.gz 



Dataset information for accession: GSE2603 
NULL
NULL
Error in preprocessing GSE2603 :  subscript out of bounds 


Found 1 file(s)

GSE11121_series_matrix.txt.gz

Using locally cached version: .//GSE11121_series_matrix.txt.gz

Using locally cached version of GPL96 found here:
.//GPL96.soft.gz 



Dataset information for accession: GSE11121 
NULL
NULL
Error in preprocessing GSE11121 :  subscript out of bounds 


Found 1 file(s)

GSE7390_series_matrix.txt.gz

Using locally cached version: .//GSE7390_series_matrix.txt.gz

Using locally cached version of GPL96 found here:
.//GPL96.soft.gz 



Dataset information for accession: GSE7390 
NULL
NULL
Error in preprocessing GSE7390 :  subscript out of bounds 


Found 3 file(s)

GSE6532-GPL570_series_matrix.txt.gz

Using locally cached version: .//GSE6532-GPL570_series_matrix.txt.gz

Using locally cached version of GPL570 found here:
.//GPL570.soft.gz 

GSE6532-GPL96_series_matrix.txt.gz

Using locally cached version: .//GSE6532-GPL96_series_matrix.txt.gz

Using locally cached version of GPL96 found here:
.//GPL96.soft.gz 

GSE6532-GPL97_series_matrix.txt.gz

Using locally cached version: .//GSE6532-GPL97_series_matrix.txt.gz

Using locally cached version of GPL97 found here:
.//GPL97.soft.gz 



Dataset information for accession: GSE6532 
NULL
NULL
Error in preprocessing GSE6532 :  subscript out of bounds 


“first element used of 'length.out' argument”


ERROR: Error in seq_len(nrow(combined_data)): argument must be coercible to non-negative integer


In [None]:
# # Install and load the GEOquery package if you haven't already
# if (!requireNamespace("BiocManager", quietly = TRUE)) {
#   install.packages("BiocManager")
# }
# BiocManager::install("GEOquery")
# library(GEOquery)

# Define a function to preprocess the data
preprocess_data <- function(gse) {
  tryCatch({
    # Extract expression data
    expr_data <- exprs(gse[[1]])

    # Extract sample information
    sample_info <- pData(gse[[1]])

    # Combine expression data and sample information into a data frame
    expr_df <- cbind(sample_info, expr_data)

    return(expr_df)
  }, error = function(e) {
    cat("Error occurred during preprocessing:", conditionMessage(e), "\n")
    return(NULL)
  })
}

# Define the list of accession numbers
accession_numbers <- c("GSE25066", "GSE20685", "GSE19615", "GSE17907", "GSE16446",
                        "GSE17705", "GSE2603", "GSE11121", "GSE7390", "GSE6532")

# Fetch data for each accession number
for (accession in accession_numbers) {
  # Fetch the data using GEOquery
  gse <- getGEO(accession, destdir = "./", GSEMatrix = TRUE)

  # Print the dataset information
  cat("Dataset information for accession:", accession, "\n")
  print(attr(gse, "title"))
  print(attr(gse, "summary"))

  # Preprocess the data
  processed_expr_data <- preprocess_data(gse)

  if (!is.null(processed_expr_data)) {
    # Print the processed data
    cat("Processed expression data for accession:", accession, "\n")
    print(head(processed_expr_data))
  }
}


Found 1 file(s)

GSE25066_series_matrix.txt.gz

Using locally cached version: .//GSE25066_series_matrix.txt.gz

Using locally cached version of GPL96 found here:
.//GPL96.soft.gz 



Dataset information for accession: GSE25066 
NULL
NULL
Error occurred during preprocessing: arguments imply differing number of rows: 508, 22283 


Found 1 file(s)

GSE20685_series_matrix.txt.gz

Using locally cached version: .//GSE20685_series_matrix.txt.gz

Using locally cached version of GPL570 found here:
.//GPL570.soft.gz 



Dataset information for accession: GSE20685 
NULL
NULL
Error occurred during preprocessing: arguments imply differing number of rows: 327, 54627 


Found 1 file(s)

GSE19615_series_matrix.txt.gz

Using locally cached version: .//GSE19615_series_matrix.txt.gz

Using locally cached version of GPL570 found here:
.//GPL570.soft.gz 



Dataset information for accession: GSE19615 
NULL
NULL
Error occurred during preprocessing: arguments imply differing number of rows: 115, 54675 


Found 2 file(s)

GSE17907-GPL570_series_matrix.txt.gz

Using locally cached version: .//GSE17907-GPL570_series_matrix.txt.gz

Using locally cached version of GPL570 found here:
.//GPL570.soft.gz 

GSE17907-GPL9128_series_matrix.txt.gz

Using locally cached version: .//GSE17907-GPL9128_series_matrix.txt.gz

Using locally cached version of GPL9128 found here:
.//GPL9128.soft.gz 



Dataset information for accession: GSE17907 
NULL
NULL
Error occurred during preprocessing: arguments imply differing number of rows: 55, 24577 


Found 1 file(s)

GSE16446_series_matrix.txt.gz

Using locally cached version: .//GSE16446_series_matrix.txt.gz

Using locally cached version of GPL570 found here:
.//GPL570.soft.gz 



Dataset information for accession: GSE16446 
NULL
NULL
Error occurred during preprocessing: arguments imply differing number of rows: 120, 54675 


Found 1 file(s)

GSE17705_series_matrix.txt.gz

Using locally cached version: .//GSE17705_series_matrix.txt.gz

Using locally cached version of GPL96 found here:
.//GPL96.soft.gz 



Dataset information for accession: GSE17705 
NULL
NULL
Error occurred during preprocessing: arguments imply differing number of rows: 298, 22283 


Found 1 file(s)

GSE2603_series_matrix.txt.gz

Using locally cached version: .//GSE2603_series_matrix.txt.gz

Using locally cached version of GPL96 found here:
.//GPL96.soft.gz 



Dataset information for accession: GSE2603 
NULL
NULL
Error occurred during preprocessing: arguments imply differing number of rows: 121, 22283 


Found 1 file(s)

GSE11121_series_matrix.txt.gz

Using locally cached version: .//GSE11121_series_matrix.txt.gz

Using locally cached version of GPL96 found here:
.//GPL96.soft.gz 



Dataset information for accession: GSE11121 
NULL
NULL
Error occurred during preprocessing: arguments imply differing number of rows: 200, 22283 


Found 1 file(s)

GSE7390_series_matrix.txt.gz

Using locally cached version: .//GSE7390_series_matrix.txt.gz

Using locally cached version of GPL96 found here:
.//GPL96.soft.gz 



Dataset information for accession: GSE7390 
NULL
NULL
Error occurred during preprocessing: arguments imply differing number of rows: 198, 22283 


Found 3 file(s)

GSE6532-GPL570_series_matrix.txt.gz

Using locally cached version: .//GSE6532-GPL570_series_matrix.txt.gz

Using locally cached version of GPL570 found here:
.//GPL570.soft.gz 

GSE6532-GPL96_series_matrix.txt.gz

Using locally cached version: .//GSE6532-GPL96_series_matrix.txt.gz

Using locally cached version of GPL96 found here:
.//GPL96.soft.gz 

GSE6532-GPL97_series_matrix.txt.gz

Using locally cached version: .//GSE6532-GPL97_series_matrix.txt.gz

Using locally cached version of GPL97 found here:
.//GPL97.soft.gz 



Dataset information for accession: GSE6532 
NULL
NULL
Error occurred during preprocessing: arguments imply differing number of rows: 87, 54675 


In [None]:
# Assuming 'gse' is your GEOquery object for a dataset
expr_data <- exprs(gse[[1]])
sample_info <- pData(phenoData(gse[[1]]))

# Check dimensions
cat("Expression data dimensions:", dim(expr_data), "\n")
cat("Sample information dimensions:", dim(sample_info), "\n")

# Ensure alignment
if (ncol(expr_data) != nrow(sample_info)) {
  cat("Mismatch in sample size between expression data and sample information.\n")
  # Further code to handle the mismatch, such as filtering or reordering
}


Expression data dimensions: 54675 87 
Sample information dimensions: 87 61 


In [None]:
# Check if the column names of the expression data match the row names of the sample information
all(colnames(expr_data) == rownames(sample_info))


In [None]:
# Convert expression data to data frame
expr_data_df <- as.data.frame(expr_data)

# Add row names (probe IDs) as a column in the expression data frame
expr_data_df$Probe_ID <- rownames(expr_data_df)

# Combine sample information with expression data
combined_data <- cbind(sample_info, expr_data_df)


ERROR: Error in data.frame(..., check.names = FALSE): arguments imply differing number of rows: 87, 54675


In [None]:
# Subset sample_info for a specific group
specific_group_info <- sample_info[sample_info$Group == "desired_group", ]

# Subset expression data to include only columns (samples) corresponding to the specific group
specific_group_expr_data <- expr_data[, colnames(expr_data) %in% rownames(specific_group_info)]


In [None]:
specific_group_info

“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”


title,geo_accession,status,submission_date,last_update_date,type,channel_count,source_name_ch1,organism_ch1,characteristics_ch1,⋯,er:ch1,grade:ch1,ID:ch1,node:ch1,pgr:ch1,samplename:ch1,series:ch1,size:ch1,t.dmfs:ch1,t.rfs:ch1
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>


In [None]:
# List of sample IDs you're interested in
sample_ids <- c('GSM177885', 'GSM177887', 'GSM177894', 'GSM177895', 'GSM177899',
                'GSM177900', 'GSM177901', 'GSM177902', 'GSM177909', 'GSM177918',
                'GSM615702', 'GSM615703', 'GSM615704', 'GSM615705', 'GSM615761',
                'GSM615763', 'GSM615764', 'GSM615766', 'GSM615768', 'GSM615775')

# List of GEO accession numbers
accession_numbers <- c("GSE25066", "GSE20685", "GSE19615", "GSE17907", "GSE16446",
                       "GSE17705", "GSE2603", "GSE11121", "GSE7390", "GSE6532")


In [None]:
# Check current working directory
getwd()

# If necessary, change it to the directory where your file is located
# Replace '/path/to/your/file' with the actual path to your directory
setwd('/content')

# Try reading the file again with the correct path
data <- read.csv("GEO_HG_PPI.csv")  # Assuming the file is now in the current working directory

# Extract column names
column_names <- colnames(data)

# Remove "probe" from the list
column_names_filtered <- setdiff(column_names, "probe")

# Print the filtered list of column names
print(column_names_filtered)

# Print the column names
#print(column_names)




In [None]:
# List of sample IDs you're interested in
sample_ids <- column_names_filtered
# List of GEO accession numbers
accession_numbers <- c("GSE25066", "GSE20685", "GSE19615", "GSE17907", "GSE16446",
                       "GSE17705", "GSE2603", "GSE11121", "GSE7390", "GSE6532")


In [None]:
library(GEOquery)

# Initialize a list to store phenotype data
phenotype_data_list <- list()

# Loop through each accession number
for (accession in accession_numbers) {
    # Fetch the GEO dataset
    gse <- getGEO(accession, GSEMatrix = TRUE, getGPL = FALSE)

    # Extract the phenotype data for the dataset
    pheno_data <- pData(phenoData(gse[[1]]))

    # Check and extract data for specified sample IDs
    for (sample_id in sample_ids) {
        if (sample_id %in% rownames(pheno_data)) {
            phenotype_data_list[[sample_id]] <- pheno_data[sample_id, ]
        }
    }
}

# Check the extracted phenotype data
phenotype_data_list


Output hidden; open in https://colab.research.google.com to view.

In [None]:
# Convert the list of data frames into a single data frame
phenotype_data_df <- do.call(rbind, phenotype_data_list)

# Check the structure of the new data frame
str(phenotype_data_df)


In [None]:
# Initialize a list to store the selected information
selected_phenotype_data <- list()

# Loop through each element in the phenotype data list
for (sample_id in names(phenotype_data_list)) {
    # Extract the current phenotype data
    current_pheno_data <- phenotype_data_list[[sample_id]]

    # Check if the required columns exist in the current data
    if ("geo_accession" %in% colnames(current_pheno_data) && "PAM50_class" %in% colnames(current_pheno_data)) {
        # Extract the geo_accession and PAM50_class columns
        selected_data <- current_pheno_data[, c("geo_accession", "PAM50_class")]

        # Store the selected data in the new list
        selected_phenotype_data[[sample_id]] <- selected_data
    }
}

# Check the selected phenotype data
selected_phenotype_data


In [None]:
library(GEOquery)

# Define the consistent set of columns you want to extract from each phenotype data
desired_columns <- c("pam50_class:ch1", "geo_accession")  # Add or remove column names as needed

# Initialize an empty data frame with the desired columns
phenotype_data_df <- data.frame(matrix(ncol = length(desired_columns), nrow = 0))
colnames(phenotype_data_df) <- desired_columns

# Loop through each accession number
for (accession in accession_numbers) {
    # Fetch the GEO dataset
    gse <- getGEO(accession, GSEMatrix = TRUE, getGPL = FALSE)

    # Extract the phenotype data for the dataset
    pheno_data <- pData(phenoData(gse[[1]]))

    # Check and extract data for specified sample IDs
    for (sample_id in sample_ids) {
        if (sample_id %in% rownames(pheno_data)) {
            # Extract the row corresponding to sample_id
            sample_row <- pheno_data[sample_id, , drop = FALSE]

            # Ensure the row has all the desired columns, filling in NA for missing columns
            sample_row <- sample_row[, colnames(sample_row) %in% desired_columns, drop = FALSE]
            missing_cols <- setdiff(desired_columns, colnames(sample_row))
            if (length(missing_cols) > 0) {
                sample_row[missing_cols] <- NA
            }

            # Reorder columns to match the desired order
            sample_row <- sample_row[, desired_columns]

            # Append the row to the phenotype_data_df
            phenotype_data_df <- rbind(phenotype_data_df, sample_row)
        }
    }
}

# Check the extracted phenotype data frame
phenotype_data_df


Found 1 file(s)

GSE25066_series_matrix.txt.gz

Using locally cached version: /tmp/Rtmp0BZbjQ/GSE25066_series_matrix.txt.gz

Found 1 file(s)

GSE20685_series_matrix.txt.gz

Using locally cached version: /tmp/Rtmp0BZbjQ/GSE20685_series_matrix.txt.gz

Found 1 file(s)

GSE19615_series_matrix.txt.gz

Using locally cached version: /tmp/Rtmp0BZbjQ/GSE19615_series_matrix.txt.gz

Found 2 file(s)

GSE17907-GPL570_series_matrix.txt.gz

Using locally cached version: /tmp/Rtmp0BZbjQ/GSE17907-GPL570_series_matrix.txt.gz

GSE17907-GPL9128_series_matrix.txt.gz

Using locally cached version: /tmp/Rtmp0BZbjQ/GSE17907-GPL9128_series_matrix.txt.gz

Found 1 file(s)

GSE16446_series_matrix.txt.gz

Using locally cached version: /tmp/Rtmp0BZbjQ/GSE16446_series_matrix.txt.gz

Found 1 file(s)

GSE17705_series_matrix.txt.gz

Using locally cached version: /tmp/Rtmp0BZbjQ/GSE17705_series_matrix.txt.gz

Found 1 file(s)

GSE2603_series_matrix.txt.gz

Using locally cached version: /tmp/Rtmp0BZbjQ/GSE2603_series_matr

Unnamed: 0_level_0,pam50_class:ch1,geo_accession
Unnamed: 0_level_1,<chr>,<chr>
GSM615096,LumA,GSM615096
GSM615100,LumA,GSM615100
GSM615102,Basal,GSM615102
GSM615104,Basal,GSM615104
GSM615105,LumA,GSM615105
GSM615109,Basal,GSM615109
GSM615110,Basal,GSM615110
GSM615111,Basal,GSM615111
GSM615112,Basal,GSM615112
GSM615113,Basal,GSM615113
