In [None]:
import sqlite3 
import pandas as pd
import glob
import os

In [None]:
conn = sqlite3.connect('../pubchem_gcm.db')

### Download GCM cluster summaries 

In [None]:
df = pd.read_sql("""

    SELECT gcs.*
    FROM gcm_cluster_summary gcs 
    WHERE gcs.gcm_conclusion = 'yes'

;""", conn)

In [None]:
df.shape

In [None]:
df.to_csv('gmc.gcm_cluster_summaries.csv', index=False)

### Download GCM cluster assay stats 

In [None]:
df2 = pd.read_sql("""

    SELECT a.*, m.aidname, m.geneids
    FROM gcm_cluster_assay_stat a
    JOIN assay_meta m USING (aid)
    JOIN gcm_cluster_summary gcs USING (gcm_cluster) 
    WHERE a.assay_qualified_for_profile = 'yes'
    AND gcs.gcm_conclusion = 'yes'
    
;""", conn)

In [None]:
df2.shape

In [None]:
df2.to_csv('gcm.gcm_cluster_assay_stat.csv', index=False)

### Download cpds and profile scores from GCM clusters

In [None]:
df3 = pd.read_sql("""

    SELECT s.*
    FROM gcm_cpds_scores s
    JOIN gcm_cluster_summary gcs USING (gcm_cluster)
    WHERE gcs.gcm_conclusion = 'yes'
    --AND s.cpd_profile_score_rank = 1

;""", conn)

In [None]:
df3.shape

In [None]:
df3['cpd_profile_score'] = df3['cpd_profile_score'].fillna(0)

In [None]:
df3.to_csv('gcm.gcm_cpd_profile_scores.csv', index=False)

### Download assay metadata 

In [None]:
df4 = pd.read_sql("""

    SELECT DISTINCT m.aid, m.aidname, m.aiddesc, m.geneids
    FROM assay_meta m
    JOIN gcm_cluster_assay_stat USING (aid)
    JOIN gcm_cluster_summary gcs USING (gcm_cluster)
    WHERE gcs.gcm_conclusion = 'yes'

;""", conn)

In [None]:
df4.shape

In [None]:
df4.to_csv('gcm.assay_meta.csv', index=False)

### Download cpd assay data of GCM cluster cpds

In [None]:
df5 = pd.read_sql("""

    SELECT gcs.gcm_cluster, s.cid, ast.aid, a.rscore, a.pubchem_activity_score
    FROM gcm_cpds_scores s
    JOIN gcm_cluster_summary gcs USING (gcm_cluster)
    JOIN gcm_cluster_assay_stat ast USING (gcm_cluster)
    JOIN assays a ON(a.aid=ast.aid AND a.cid = s.cid)
    WHERE ast.assay_qualified_for_profile = 'yes'
    AND gcs.gcm_conclusion = 'yes'

;""", conn)

In [None]:
df5.shape

In [None]:
df5.to_csv('gcm.gcm_cpds_assay_activity.csv', index=False)