#### Converting the table content of one particular GSM data to FOL(first order logic) predicate format.
##### 1. Install and import GEOparse python library which helps to download the standard GSE data from GEO(gene expression omnibus) dataset.

In [2]:
!pip install GEOparse
import GEOparse



##### 2. Select 2 different GSM tables which belong to distinct GSEs. Those tables will be used to verify the compatablity of the function( which is going to be defined to convert table to FOL) with different GSM tables.

In [3]:
gse1 = GEOparse.get_GEO(geo="GSE1564", destdir="./")
gse2 = GEOparse.get_GEO(geo="GSE1565", destdir="./")


27-Mar-2025 23:15:31 DEBUG utils - Directory ./ already exists. Skipping.
27-Mar-2025 23:15:31 INFO GEOparse - File already exist: using local version.
27-Mar-2025 23:15:31 INFO GEOparse - Parsing ./GSE1564_family.soft.gz: 
27-Mar-2025 23:15:31 DEBUG GEOparse - DATABASE: GeoMiame
27-Mar-2025 23:15:31 DEBUG GEOparse - SERIES: GSE1564
27-Mar-2025 23:15:31 DEBUG GEOparse - PLATFORM: GPL1345
27-Mar-2025 23:15:31 DEBUG GEOparse - SAMPLE: GSM26919
27-Mar-2025 23:15:31 DEBUG GEOparse - SAMPLE: GSM26920
27-Mar-2025 23:15:31 DEBUG GEOparse - SAMPLE: GSM26921
27-Mar-2025 23:15:31 DEBUG GEOparse - SAMPLE: GSM26922
27-Mar-2025 23:15:31 DEBUG GEOparse - SAMPLE: GSM26923
27-Mar-2025 23:15:31 DEBUG GEOparse - SAMPLE: GSM26924
27-Mar-2025 23:15:31 DEBUG utils - Directory ./ already exists. Skipping.
27-Mar-2025 23:15:31 INFO GEOparse - File already exist: using local version.
27-Mar-2025 23:15:31 INFO GEOparse - Parsing ./GSE1565_family.soft.gz: 
27-Mar-2025 23:15:31 DEBUG GEOparse - DATABASE: GeoMiam

In [4]:
print(gse1.gsms)
print(gse2.gsms)

{'GSM26919': <SAMPLE: GSM26919>, 'GSM26920': <SAMPLE: GSM26920>, 'GSM26921': <SAMPLE: GSM26921>, 'GSM26922': <SAMPLE: GSM26922>, 'GSM26923': <SAMPLE: GSM26923>, 'GSM26924': <SAMPLE: GSM26924>}
{'GSM26954': <SAMPLE: GSM26954>, 'GSM26955': <SAMPLE: GSM26955>, 'GSM26957': <SAMPLE: GSM26957>, 'GSM26958': <SAMPLE: GSM26958>, 'GSM26959': <SAMPLE: GSM26959>, 'GSM26961': <SAMPLE: GSM26961>}


In [5]:
gsm1 = gse1.gsms["GSM26919"]
gsm2 = gse2.gsms["GSM26954"]

print(f"The table entries of GSE1564 - GSM {gsm1} \n")
print(gsm1.table)

print(f"The table entries of GSE1565 - GSM {gse2} \n")
print(gsm2.table)

The table entries of GSE1564 - GSM <SAMPLE: GSM26919> 

       ID_REF     VALUE
0           1  0.018495
1           2  0.095050
2           3  0.044537
3           4  0.092727
4           5 -0.164470
...       ...       ...
13867   13868  0.061160
13868   13869       NaN
13869   13870       NaN
13870   13871       NaN
13871   13872       NaN

[13872 rows x 2 columns]
The table entries of GSE1565 - GSM <SERIES: GSE1565 - 6 SAMPLES, 1 d(s)> 

                ID_REF   VALUE ABS_CALL  DETECTION P-VALUE
0       AFFX-MurIL2_at   126.0        A           0.455413
1      AFFX-MurIL10_at    71.4        A           0.340661
2       AFFX-MurIL4_at    47.6        A           0.095667
3       AFFX-MurFAS_at   263.6        P           0.000754
4       AFFX-BioB-5_at  1749.3        P           0.000081
...                ...     ...      ...                ...
12483      162502_f_at   152.2        A           0.073830
12484      162503_f_at   153.9        A           0.098054
12485        162504_at  

##### 3. Define a function that converts table data to FOL predicate format.

In [6]:
def gsm_table_to_fol_predicates(gsm):
    """
    Converts a GSM table to a list of FOL predicates
    
    Args:
         gsm: A GSM object 
    Returns:
         A list of FOL predicates
    """
    predicates = []
    for index, row in gsm.table.iterrows():
        # dynamically include column names in the predicate
        predicate_parts = []
        for column in gsm.table.columns:
            value = row.get(column, 'unknown')
            predicate_parts.append(f'{column}=({value})')

        #combine all the parts into a single predicate
        predicate = f"data_entry({', '.join(predicate_parts)})."
        predicates.append(predicate)

    return "\n".join(predicates)




##### 4. Print the results

In [7]:
# Example usage 1
fol_predicates = gsm_table_to_fol_predicates(gsm1)
print("FOL Predicates for GSM1:")
print(fol_predicates)

# example usage2
fol_predicates = gsm_table_to_fol_predicates(gsm2)
print("FOL Predicates for GSM2:")
print(fol_predicates)

FOL Predicates for GSM1:
data_entry(ID_REF=(1.0), VALUE=(0.018494772)).
data_entry(ID_REF=(2.0), VALUE=(0.095050194)).
data_entry(ID_REF=(3.0), VALUE=(0.044536682)).
data_entry(ID_REF=(4.0), VALUE=(0.092726881)).
data_entry(ID_REF=(5.0), VALUE=(-0.164469752)).
data_entry(ID_REF=(6.0), VALUE=(-0.227312817)).
data_entry(ID_REF=(7.0), VALUE=(0.082340225)).
data_entry(ID_REF=(8.0), VALUE=(-0.131935833)).
data_entry(ID_REF=(9.0), VALUE=(0.102288797)).
data_entry(ID_REF=(10.0), VALUE=(0.100073638)).
data_entry(ID_REF=(11.0), VALUE=(-0.040899227)).
data_entry(ID_REF=(12.0), VALUE=(-0.052824315)).
data_entry(ID_REF=(13.0), VALUE=(-0.09404235)).
data_entry(ID_REF=(14.0), VALUE=(0.009399024)).
data_entry(ID_REF=(15.0), VALUE=(0.006173221)).
data_entry(ID_REF=(16.0), VALUE=(-0.009387438)).
data_entry(ID_REF=(17.0), VALUE=(0.020996581)).
data_entry(ID_REF=(18.0), VALUE=(-0.085943613)).
data_entry(ID_REF=(19.0), VALUE=(-0.195176735)).
data_entry(ID_REF=(20.0), VALUE=(-0.018025154)).
data_entry(ID_R