In [1]:

import sys
sys.path.append('/home/techt/Desktop/a4s')


### Step 1: Initial Data Loading

In [2]:
from utils.preprocess import *
# 1. Identify the paths to the soft file and the matrix file
cohort_dir = '/media/techt/DATA/GEO/Kidney_stones/GSE123993'
soft_file, matrix_file = geo_get_relevant_filepaths(cohort_dir)

# 2. Read the matrix file to obtain background information and sample characteristics data
background_prefixes = ['!Series_title', '!Series_summary', '!Series_overall_design']
clinical_prefixes = ['!Sample_geo_accession', '!Sample_characteristics_ch1']
background_info, clinical_data = get_background_and_clinical_data(matrix_file, background_prefixes, clinical_prefixes)

# 3. Obtain the sample characteristics dictionary from the clinical dataframe
sample_characteristics_dict = get_unique_values_by_row(clinical_data)

# 4. Explicitly print out all the background information and the sample characteristics dictionary
print("Background Information:")
print(background_info)
print("Sample Characteristics Dictionary:")
print(sample_characteristics_dict)


Background Information:
!Series_title	"No effect of calcifediol supplementation on skeletal muscle transcriptome in vitamin D deficient frail older adults."
!Series_summary	"Vitamin D deficiency is common among older adults and has been linked to muscle weakness. Vitamin D supplementation has been proposed as a strategy to improve muscle function in older adults. The aim of this study was to investigate the effect of calcifediol (25-hydroxycholecalciferol) on whole genome gene expression in skeletal muscle of vitamin D deficient frail older adults. A double-blind placebo controlled trial was conducted in vitamin D deficient frail older adults (aged above 65), characterized by blood 25-hydroxycholecalciferol concentrations between 20 and 50 nmol/L. Subjects were randomized across the placebo group (n=12) and the calcifediol group (n=10, 10 µg per day). Muscle biopsies were obtained before and after six months of calcifediol or placebo supplementation and subjected to whole genome gene e

### Step 2: Dataset Analysis and Clinical Feature Extraction

In [3]:
is_gene_available = False
trait_row = age_row = gender_row = None  # set to different values when applicable
convert_trait = convert_age = convert_gender = None  # define the functions when applicable

# Ensure `background_info` is correctly initialized 
background_info = {
    '!Series_summary': ["Vitamin D deficiency is common among older adults and has been linked to muscle weakness. Vitamin D supplementation has been proposed as a strategy to improve muscle function in older adults. The aim of this study was to investigate the effect of calcifediol (25-hydroxycholecalciferol) on whole genome gene expression in skeletal muscle of vitamin D deficient frail older adults. A double-blind placebo controlled trial was conducted in vitamin D deficient frail older adults (aged above 65), characterized by blood 25-hydroxycholecalciferol concentrations between 20 and 50 nmol/L. Subjects were randomized across the placebo group (n=12) and the calcifediol group (n=10, 10 µg per day). Muscle biopsies were obtained before and after six months of calcifediol or placebo supplementation and subjected to whole genome gene expression profiling using Affymetrix HuGene 2.1ST arrays. Expression of the vitamin D receptor gene was virtually undetectable in human skeletal muscle biopsies. Calcifediol supplementation led to a significant increase in blood 25-hydroxycholecalciferol levels compared to the placebo group. No difference between treatment groups was observed on strength outcomes. The whole transcriptome effects of calcifediol and placebo were very weak. Correcting for multiple testing using false discovery rate did not yield any differentially expressed genes using any sensible cut-offs. P-values were uniformly distributed across all genes, suggesting that low p-values are likely to be false positives. Partial least squares-discriminant analysis and principle component analysis was unable to separate treatment groups. Calcifediol supplementation did not affect the skeletal muscle transcriptome in frail older adults. Our findings indicate that vitamin D supplementation has no effects on skeletal muscle gene expression, suggesting that skeletal muscle may not be a direct target of vitamin D in older adults."]
}

# Check if gene expression data is available based on background information
is_gene_available = "transcriptome" in background_info['!Series_summary'][0].lower()

# Infer variables from sample characteristics dictionary and define rows
sample_characteristics = {
    0: ['tissue: muscle'], 1: ['Sex: Male', 'Sex: Female'], 
    2: ['subject id: 3087', 'subject id: 3088', 'subject id: 3090', 'subject id: 3106', 'subject id: 3178', 
        'subject id: 3241', 'subject id: 3258', 'subject id: 3279', 'subject id: 3283', 'subject id: 3295', 
        'subject id: 3322', 'subject id: 3341', 'subject id: 3360', 'subject id: 3361', 'subject id: 3375', 
        'subject id: 3410', 'subject id: 3430', 'subject id: 3498', 'subject id: 3516', 'subject id: 3614', 
        'subject id: 3695', 'subject id: 3731'], 
    3: ['intervention group: 25-hydroxycholecalciferol (25(OH)D3)', 'intervention group: Placebo'], 
    4: ['time of sampling: before intervention (baseline)', 'time of sampling: after intervention']
}

# Only 'gender' data is explicitly available in the provided sample characteristics dictionary
gender_row = 1 if any('Sex:' in item for item in sample_characteristics[1]) else None

# Define conversion functions
def convert_trait(value): 
    return None  # Since 'trait_row' is None

def convert_age(value): 
    return None  # Since 'age_row' is None

def convert_gender(value):
    value = value.split(":")[1].strip().lower()
    if value == 'female':
        return 0
    elif value == 'male':
        return 1
    else:
        return None

# Save cohort information
save_cohort_info('GSE123993', './preprocessed/Kidney_stones/cohort_info.json', is_gene_available, trait_row is not None)


A new JSON file was created at: ./preprocessed/Kidney_stones/cohort_info.json
