In [1]:

import sys
sys.path.append('/home/techt/Desktop/a4s')


### Step 1: Initial Data Loading

In [2]:
from utils.preprocess import *
# 1. Identify the paths to the soft file and the matrix file
cohort_dir = '/media/techt/DATA/GEO/Psoriasis/GSE162998'
soft_file, matrix_file = geo_get_relevant_filepaths(cohort_dir)

# 2. Read the matrix file to obtain background information and sample characteristics data
background_prefixes = ['!Series_title', '!Series_summary', '!Series_overall_design']
clinical_prefixes = ['!Sample_geo_accession', '!Sample_characteristics_ch1']
background_info, clinical_data = get_background_and_clinical_data(matrix_file, background_prefixes, clinical_prefixes)

# 3. Obtain the sample characteristics dictionary from the clinical dataframe
sample_characteristics_dict = get_unique_values_by_row(clinical_data)

# 4. Explicitly print out all the background information and the sample characteristics dictionary
print("Background Information:")
print(background_info)
print("Sample Characteristics Dictionary:")
print(sample_characteristics_dict)


Background Information:
!Series_title	"Differential regulation of apoptotic and key canonical pathways in psoriasis by therapeutic wavelengths of ultraviolet B radiation"
!Series_summary	"Phototherapy is an effective therapy and may induce remission of psoriasis.  Previous studies have established the action spectrum of clearance and that apoptosis is differentially induced in psoriasis plaques by clinically effective wavelengths of ultraviolet B (UVB).  The aim of this study was to investigate the molecular mechanisms regulating psoriasis plaque resolution by studying the transcriptomic response to clinically effective (311nm, narrow band) UVB compared to a clinically ineffective (290nm) wavelength.  We irradiated lesional psoriatic skin in vivo with a single 3 MED (minimal erythemal dose) of 311nm or 290nm wavelength of UVB and performed skin biopsies at 4h or 18h post irradiation and from un-irradiated lesional skin.  Forty-eight micro-dissected epidermal samples were analysed using

### Step 2: Dataset Analysis and Clinical Feature Extraction

In [3]:
is_gene_available = True
trait_row = age_row = gender_row = None  # set to different values when applicable
convert_trait = convert_age = convert_gender = None  # define the functions when applicable

# Setting trait_row, age_row, and gender_row based on data availability
# `trait_row`
trait_row = None  # Psoriasis trait availability needs to be from background info; it's not given in the characteristics dictionary.

# `age_row` - Not available in the sample characteristics
age_row = None

# `gender_row` - Not available in the sample characteristics
gender_row = None

# Define conversion functions
def convert_trait(value):
    header_value = value.split(":")[1].strip()
    if header_value.lower() == "psoriasis":
        return 1
    else:
        return None

def convert_age(value):
    try:
        age = int(value.split(":")[1].strip())
        return age
    except ValueError:
        return None

def convert_gender(value):
    header_value = value.split(":")[1].strip().lower()
    if header_value == 'male':
        return 1
    elif header_value == 'female':
        return 0
    else:
        return None

# Save metadata
save_cohort_info('GSE162998', './preprocessed/Psoriasis/cohort_info.json', is_gene_available, trait_row is not None)
