In [1]:

import sys
sys.path.append('/home/techt/Desktop/a4s')


### Step 1: Initial Data Loading

In [2]:
from utils.preprocess import *
# 1. Identify the paths to the soft file and the matrix file
cohort_dir = '/media/techt/DATA/GEO/Longevity_and_Aging/GSE155177'
soft_file, matrix_file = geo_get_relevant_filepaths(cohort_dir)

# 2. Read the matrix file to obtain background information and sample characteristics data
background_prefixes = ['!Series_title', '!Series_summary', '!Series_overall_design']
clinical_prefixes = ['!Sample_geo_accession', '!Sample_characteristics_ch1']
background_info, clinical_data = get_background_and_clinical_data(matrix_file, background_prefixes, clinical_prefixes)

# 3. Obtain the sample characteristics dictionary from the clinical dataframe
sample_characteristics_dict = get_unique_values_by_row(clinical_data)

# 4. Explicitly print out all the background information and the sample characteristics dictionary
print("Background Information:")
print(background_info)
print("Sample Characteristics Dictionary:")
print(sample_characteristics_dict)


Background Information:
!Series_title	"IgG4-ROD and orbital MALT lymphoma"
!Series_summary	"The molecular pathogenesis of orbital lymphoproliferative disorders, such as immunoglobulin G4-related ophthalmic disease (IgG4-ROD) and orbital mucosa-associated lymphoid tissue (MALT) lymphoma, remains essentially unknown. Differentiation between the two disorders, which is important since work-up and treatment can vary greatly, is often challenging due to the lack of specific biomarkers. Although miRNAs play an important role in the regulation of carcinogenesis and inflammation, the relationship between miRNA and orbital lymphoproliferative diseases remains unknown. A comprehensive analysis of 2,565 miRNAs was performed in biopsied specimens and serum of 17 cases with IgG4-ROD and 21 cases with orbital MALT lymphoma. We identified specific miRNA signatures, their miRNA target pathways, and network analysis associated with IgG4-ROD and orbital MALT lymphoma. Machine-learning analysis identifie

### Step 2: Dataset Analysis and Clinical Feature Extraction

In [3]:
is_gene_available = False
trait_row = age_row = gender_row = None  # set to different values when applicable
convert_trait = convert_age = convert_gender = None  # define the functions when applicable

# Check if the dataset contains gene expression data
# This dataset contains miRNA data, therefore no gene expression data is available.
is_gene_available = False

# Determine the availability of data for 'Longevity_and_Aging', 'age', and 'gender'
sample_characteristics_dict = {
    0: ['diagnosis: IgG4 related ophthalmic disease', 'diagnosis: orbital MALT lymphoma', 'diagnosis: orbital MALT lympoma', 'diagnosis: healthy individual'],
    1: ['tissue: biopsied specimen', 'tissue: serum'],
    2: ['age: 75', 'age: 43', 'age: 73', 'age: 60', 'age: 86', 'age: 78', 'age: 67', 'age: 79', 'age: 85', 'age: 59',
        'age: 69', 'age: 89', 'age: 62', 'age: 31', 'age: 77', 'age: 52', 'age: 46', 'age: 65', 'age: 88', 'age: 41',
        'age: 80', 'age: 87', 'age: 84', 'age: 68', 'age: 76', 'age: 56', 'age: 57', 'age: 81', 'age: 38', 'age: 61'],
    3: ['Sex: F', 'Sex: M'],
    4: ['swollen eyelid: P', 'swollen eyelid: N'],
    5: ['diplopia: N', 'diplopia: P'],
    6: ['decrease of vision: N', 'decrease of vision: P'],
    7: ['other organ lesion: submandibular\xa0lymph nodes', 'other organ lesion: N', 'other organ lesion: abdomen\xa0lymph\xa0node',
        'other organ lesion: parotid\xa0gland', 'other organ lesion: submandibular\xa0lymph nodes, bile duct dilation',
        'other organ lesion: mediastinal\xa0lymph\xa0nodes']
}

# Check if 'Longevity_and_Aging' data is available (not directly linkable)
trait_row = None  # No direct annotation for 'Longevity_and_Aging' is found.

# Check if 'age' data is available
age_row = 2  # Available under key 2, with multiple unique values.

# Check if 'gender' data is available
gender_row = 3  # Available under key 3, with male and female values.

# Data Type Conversion Functions
def convert_trait(value):
    return None  # No conversion since no trait data found

def convert_age(value):
    try:
        return float(value.split(': ')[1])
    except:
        return None

def convert_gender(value):
    try:
        gender = value.split(': ')[1]
        if gender == 'F':
            return 0
        elif gender == 'M':
            return 1
        else:
            return None
    except:
        return None

# Save cohort information
save_cohort_info('GSE155177', './preprocessed/Longevity_and_Aging/cohort_info.json', is_gene_available, trait_row is not None)
