# SNP Grid

Access the SNP data stored in [BigQuery](https://cloud.google.com/bigquery/docs/).

## Import the libraries used in this tutorial

In [1]:
from google.cloud import bigquery

## Initialize a client

In [2]:
client = bigquery.Client(project = "jax-gedi-sandbox-nc-01")

To explicitly specify a project when constructing the client, set the `project` parameter:

## Query Mouse Strains

In [5]:
query = """
    SELECT 
      id, aname, longname, bq_sample_id
    FROM
      `jax-gedi-sandbox-nc-01.snp_v1.mouse_strain`
    LIMIT 10
"""
query_job = client.query(query)  
for row in query_job:
    print(f'{row["id"]} {row["aname"]} {row["longname"]} {row["bq_sample_id"]}')

3381 RIX:CC(003 x 062) (CC003/Unc x CC062/Unc)F1 8146409811145403136
3390 RIX:CC(018 x 009) (CC018/Unc x CC009/Unc)F1 3847618352557362945
3392 RIX:CC(026 x 034) (CC026/GeniUnc x CC034/Unc)F1 1499438695757769217


In [12]:
query = """
    SELECT
      m.id, m.aname, m.longname,
      s.reference_name chr, s.start_position bp38, s.reference_bases major, s.names minor,
      call.genotype, call.GQ, call.GN 
    FROM
      `jax-gedi-sandbox-nc-01.snp_v1.mouse_strain` m,
      `jax-gedi-sandbox-nc-01.snp_v1.chr1__03_23000021_33000021` s, UNNEST(call) as call
    WHERE m.aname = 'RIX:CC(018 x 009)'
        AND m.bq_sample_id = call.sample_id
    LIMIT 1000
"""
query_job = client.query(query)  
for row in query_job:
    print(f'{row["aname"]} {row["chr"]} {row["bp38"]} {row["major"]} {row["genotype"]}')

RIX:CC(018 x 009) 1 23000295 A [5]
RIX:CC(018 x 009) 1 23000411 T [5]
RIX:CC(018 x 009) 1 23000315 T [5]
RIX:CC(018 x 009) 1 23000336 A [5]
RIX:CC(018 x 009) 1 23000192 T [5]
RIX:CC(018 x 009) 1 23000371 C [5]
RIX:CC(018 x 009) 1 23000426 C [5]
RIX:CC(018 x 009) 1 23000286 G [5]
RIX:CC(018 x 009) 1 23000358 C [5]
RIX:CC(018 x 009) 1 23000312 A [5]
RIX:CC(018 x 009) 1 23000404 A [5]
RIX:CC(018 x 009) 1 23000376 G [5]


In [10]:
query = """
    SELECT 
      chr, start_position, end_position, chr_table_name, strain_table_name
    FROM `jax-gedi-sandbox-nc-01.snp_v1.region_table_name`
    WHERE start_position > 13000019 and end_position < 53000023 
"""
query_job = client.query(query)  
for row in query_job:
    print(f'{row["chr"]} {row["start_position"]} {row["end_position"]} {row["chr_table_name"]} {row["strain_table_name"]}')

1 13000020 23000020 chr1__02_13000020_23000020 strain__02_13000020_23000020
1 23000021 33000021 chr1__03_23000021_33000021 strain__03_23000021_33000021
1 33000022 43000022 chr1__04_33000022_43000022 strain__04_33000022_43000022


In [16]:
query = "SELECT start_position FROM `jax-gedi-sandbox-nc-01.snp_v1.chr1__02_13000020_23000020` LIMIT 2"
query_job = client.query(query)  
for row in query_job:
    print(f'{row["start_position"]}')

13000190
13000048


In [22]:
query = "SELECT start_position FROM `jax-gedi-sandbox-nc-01.snp_v1.chr1__03_23000021_33000021` LIMIT 2"
query_job = client.query(query)  
for row in query_job:
    print(f'{row["start_position"]}')

23000295
23000312


In [17]:
query = "SELECT start_position FROM `jax-gedi-sandbox-nc-01.snp_v1.chr1__04_33000022_43000022` LIMIT 2"
query_job = client.query(query)  
for row in query_job:
    print(f'{row["start_position"]}')

33000203
33000471


In [25]:
query = """
    SELECT
      m.id, m.aname, m.longname,
      s.reference_name chr, s.start_position bp38, s.reference_bases major, s.names minor,
      call.genotype, call.GQ, call.GN 
    FROM
      `jax-gedi-sandbox-nc-01.snp_v1.mouse_strain` m,
      (SELECT * FROM `jax-gedi-sandbox-nc-01.snp_v1.chr1__02_13000020_23000020` UNION ALL
      SELECT * FROM `jax-gedi-sandbox-nc-01.snp_v1.chr1__03_23000021_33000021` UNION ALL
      SELECT * FROM `jax-gedi-sandbox-nc-01.snp_v1.chr1__04_33000022_43000022`) s, 
      UNNEST(call) as call
    WHERE m.aname = 'RIX:CC(018 x 009)'
        AND m.bq_sample_id = call.sample_id
        AND start_position in (13000190, 23000295, 33000203)
    order by start_position
    LIMIT 1000
"""
query_job = client.query(query)  
for row in query_job:
    print(f'{row["aname"]} {row["chr"]} {row["bp38"]} {row["major"]} {row["genotype"]}')

RIX:CC(018 x 009) 1 13000190 A [5]
RIX:CC(018 x 009) 1 23000295 A [5]
RIX:CC(018 x 009) 1 33000203 A [5]
