From 776561a123cdb0075a5b63332152bd6f5aa84f6a Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Wed, 12 Dec 2018 16:47:48 -0800 Subject: [PATCH 1/7] adds datalab migration guide samples --- bigquery/datalab-migration/README.md | 4 + bigquery/datalab-migration/requirements.txt | 3 + bigquery/datalab-migration/samples_test.py | 378 ++++++++++++++++++++ 3 files changed, 385 insertions(+) create mode 100644 bigquery/datalab-migration/README.md create mode 100644 bigquery/datalab-migration/requirements.txt create mode 100644 bigquery/datalab-migration/samples_test.py diff --git a/bigquery/datalab-migration/README.md b/bigquery/datalab-migration/README.md new file mode 100644 index 00000000000..bfe697e4f18 --- /dev/null +++ b/bigquery/datalab-migration/README.md @@ -0,0 +1,4 @@ +# Datalab Migration Guide + +This directory contains samples used in the `datalab` to +`google-cloud-bigquery` migration guide. diff --git a/bigquery/datalab-migration/requirements.txt b/bigquery/datalab-migration/requirements.txt new file mode 100644 index 00000000000..a31b9c80c08 --- /dev/null +++ b/bigquery/datalab-migration/requirements.txt @@ -0,0 +1,3 @@ +google-cloud-bigquery[pandas,pyarrow]==1.7.0 +datalab==1.1.4 +ipython==7.2.0 diff --git a/bigquery/datalab-migration/samples_test.py b/bigquery/datalab-migration/samples_test.py new file mode 100644 index 00000000000..d649c9455f5 --- /dev/null +++ b/bigquery/datalab-migration/samples_test.py @@ -0,0 +1,378 @@ +# Copyright 2018 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time + +from google.cloud import bigquery +import pytest +try: + import IPython + from IPython.testing import tools + from IPython.terminal import interactiveshell +except ImportError: # pragma: NO COVER + IPython = None + + +@pytest.fixture +def temp_dataset(): + client = bigquery.Client() + dataset_id = "temp_dataset_{}".format(int(time.time() * 1000)) + dataset_ref = bigquery.DatasetReference(client.project, dataset_id) + dataset = client.create_dataset(bigquery.Dataset(dataset_ref)) + yield dataset + client.delete_dataset(dataset, delete_contents=True) + + +@pytest.fixture(scope='session') +def ipython(): + config = tools.default_config() + config.TerminalInteractiveShell.simple_prompt = True + shell = interactiveshell.TerminalInteractiveShell.instance(config=config) + return shell + + +@pytest.fixture() +def ipython_interactive(request, ipython): + """Activate IPython's builtin hooks + + for the duration of the test scope. + """ + with ipython.builtin_trap: + yield ipython + + +@pytest.fixture +def to_delete(): + from google.cloud import bigquery + client = bigquery.Client() + doomed = [] + yield doomed + for dataset_id in doomed: + dataset = bigquery.Dataset.from_string( + '{}.{}'.format(client.project, dataset_id)) + client.delete_dataset(dataset, delete_contents=True) + + +def _set_up_ipython(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + return ip + + +def _strip_region_tags(sample_text): + """Remove blank lines and region tags from sample text""" + magic_lines = [line for line in sample_text.split('\n') + if len(line) > 0 and '# [' not in line] + return '\n'.join(magic_lines) + + +def _run_magic_sample(sample, ip): + result = ip.run_cell(_strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + + +@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") +def test_query_magic(ipython): + ip = _set_up_ipython() + + # Datalab sample + """ + # [START bigquery_migration_datalab_query_magic] + %%bq + SELECT word, SUM(word_count) as count + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY word + ORDER BY count ASC + # [END bigquery_migration_datalab_query_magic] + """ + + sample = """ + # [START bigquery_migration_client_library_query_magic] + %%bigquery + SELECT word, SUM(word_count) as count + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY word + ORDER BY count ASC + # [END bigquery_migration_client_library_query_magic] + """ + _run_magic_sample(sample, ip) + + +@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") +def test_query_magic_results_variable(ipython): + ip = _set_up_ipython() + + # Datalab sample + """ + # [START bigquery_migration_datalab_query_magic_results_variable] + %%bq --name my_variable + SELECT word, SUM(word_count) as count + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY word + ORDER BY count ASC + # [END bigquery_migration_datalab_query_magic_results_variable] + """ + + sample = """ + # [START bigquery_migration_client_library_query_magic_results_variable] + %%bigquery my_variable + SELECT word, SUM(word_count) as count + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY word + ORDER BY count ASC + # [END bigquery_migration_client_library_query_magic_results_variable] + """ + _run_magic_sample(sample, ip) + + +@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") +def test_query_magic_parameterized_query(ipython): + ip = _set_up_ipython() + + # Datalab samples + """ + # [START bigquery_migration_datalab_magic_parameterized_query_define] + %%bq query -n my_variable + SELECT word, SUM(word_count) as count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = @corpus_name + GROUP BY word + ORDER BY count ASC + # [END bigquery_migration_datalab_magic_parameterized_query_define] + + # [START bigquery_migration_datalab_magic_parameterized_query_execute] + %%bq execute -q endpoint_stats + parameters: + - name: corpus_name + type: STRING + value: hamlet + # [END bigquery_migration_datalab_magic_parameterized_query_execute] + """ + + sample = """ + # [START bigquery_migration_client_library_magic_parameterized_query_define_parameter] + params = {"corpus_name": "hamlet"} + # [END bigquery_migration_client_library_magic_parameterized_query_define_parameter] + """ + _run_magic_sample(sample, ip) + + sample = """ + # [START bigquery_migration_client_library_magic_parameterized_query] + %%bigquery my_variable --params $params + SELECT word, SUM(word_count) as count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = @corpus_name + GROUP BY word + ORDER BY count ASC + # [END bigquery_migration_client_library_magic_parameterized_query] + """ + _run_magic_sample(sample, ip) + + +@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") +def test_command_line_interface(ipython): + ip = IPython.get_ipython() + + # Datalab sample + """ + # [START bigquery_migration_datalab_list_tables_magic] + %bq tables list --dataset bigquery-public-data.samples + # [END bigquery_migration_datalab_list_tables_magic] + """ + + sample = """ + # [START bigquery_migration_datalab_list_tables_magic] + !bq ls bigquery-public-data:samples + # [END bigquery_migration_datalab_list_tables_magic] + """ + _run_magic_sample(sample, ip) + + sample = """ + # [START bigquery_migration_command_line_help] + !bq help + # [END bigquery_migration_command_line_help] + """ + _run_magic_sample(sample, ip) + + +def test_datalab_query(): + # [START bigquery_migration_datalab_query] + import google.datalab.bigquery as bq + sql = """ + SELECT name FROM `bigquery-public-data.usa_names.usa_1910_current` + WHERE state = "TX" + LIMIT 100 + """ + df = bq.Query(sql).execute().result().to_dataframe() + # [END bigquery_migration_datalab_query] + + assert len(df) == 100 + + +def test_client_library_query(): + # [START bigquery_migration_client_library_query] + from google.cloud import bigquery + client = bigquery.Client() + sql = """ + SELECT name FROM `bigquery-public-data.usa_names.usa_1910_current` + WHERE state = "TX" + LIMIT 100 + """ + df = client.query(sql).to_dataframe() + # [END bigquery_migration_client_library_query] + + assert len(df) == 100 + + +def test_datalab_load_table_from_gcs_csv(to_delete): + # [START bigquery_migration_datalab_load_table_from_gcs_csv] + import google.datalab.bigquery as bq + + # Create the dataset + dataset_id = 'import_sample' + # [END bigquery_migration_datalab_load_table_from_gcs_csv] + # Use unique dataset ID to avoid collisions when running tests + dataset_id = 'test_dataset_{}'.format(int(time.time() * 1000)) + to_delete.append(dataset_id) + # [START bigquery_migration_datalab_load_table_from_gcs_csv] + bq.Dataset(dataset_id).create() + + # Create the table + schema = [ + {'name': 'name', 'type': 'STRING'}, + {'name': 'post_abbr', 'type': 'STRING'}, + ] + table = bq.Table( + '{}.us_states'.format(dataset_id)).create(schema=schema) + table.load( + 'gs://cloud-samples-data/bigquery/us-states/us-states.csv', + mode='append', + source_format='csv', + csv_options=bq.CSVOptions(skip_leading_rows = 1) + ) # Waits for the job to complete + # [END bigquery_migration_datalab_load_table_from_gcs_csv] + + assert table.length == 50 + + +def test_client_library_load_table_from_gcs_csv(to_delete): + # [START bigquery_migration_client_library_load_table_from_gcs_csv] + from google.cloud import bigquery + client = bigquery.Client() + + # Create the dataset + dataset_id = 'import_sample' + # [END bigquery_migration_client_library_load_table_from_gcs_csv] + # Use unique dataset ID to avoid collisions when running tests + dataset_id = 'test_dataset_{}'.format(int(time.time() * 1000)) + to_delete.append(dataset_id) + # [START bigquery_migration_client_library_load_table_from_gcs_csv] + dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset.location = 'US' + client.create_dataset(dataset) + + # Create the table + job_config = bigquery.LoadJobConfig( + schema = [ + bigquery.SchemaField('name', 'STRING'), + bigquery.SchemaField('post_abbr', 'STRING') + ], + skip_leading_rows = 1, + # The source format defaults to CSV, so the line below is optional. + source_format = bigquery.SourceFormat.CSV + ) + load_job = client.load_table_from_uri( + 'gs://cloud-samples-data/bigquery/us-states/us-states.csv', + dataset.table('us_states'), + job_config=job_config + ) + load_job.result() # Waits for table load to complete. + # [END bigquery_migration_client_library_load_table_from_gcs_csv] + + table = client.get_table(dataset.table('us_states')) + assert table.num_rows == 50 + + +def test_datalab_load_table_from_dataframe(to_delete): + # [START bigquery_migration_datalab_load_table_from_dataframe] + import google.datalab.bigquery as bq + import pandas + + # Create the dataset + dataset_id = 'import_sample' + # [END bigquery_migration_datalab_load_table_from_dataframe] + # Use unique dataset ID to avoid collisions when running tests + dataset_id = 'test_dataset_{}'.format(int(time.time() * 1000)) + to_delete.append(dataset_id) + # [START bigquery_migration_datalab_load_table_from_dataframe] + bq.Dataset(dataset_id).create() + + # Create the table and load the data + dataframe = pandas.DataFrame([ + {'title': 'The Meaning of Life', 'release_year': 1983}, + {'title': 'Monty Python and the Holy Grail', 'release_year': 1975}, + {'title': 'Life of Brian', 'release_year': 1979}, + { + 'title': 'And Now for Something Completely Different', + 'release_year': 1971 + }, + ]) + schema = bq.Schema.from_data(dataframe) + table = bq.Table('{}.monty_python'.format(dataset_id)).create(schema=schema) + table.insert(dataframe) # Starts steaming insert of data + # [END bigquery_migration_datalab_load_table_from_dataframe] + # The Datalab library uses tabledata().insertAll() to load data from + # pandas DataFrames to tables. Because it can take a long time for the rows + # to be available in the table, this test does not assert on the number of + # rows in the destination table after the job is run. If errors are + # encountered during the insertion, this test will fail. + # See https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability + + +def test_client_library_load_table_from_dataframe(to_delete): + # [START bigquery_migration_client_library_load_table_from_dataframe] + from google.cloud import bigquery + import pandas + + client = bigquery.Client() + + dataset_id = 'import_sample' + # [END bigquery_migration_client_library_load_table_from_dataframe] + # Use unique dataset ID to avoid collisions when running tests + dataset_id = 'test_dataset_{}'.format(int(time.time() * 1000)) + to_delete.append(dataset_id) + # [START bigquery_migration_client_library_load_table_from_dataframe] + dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset.location = 'US' + client.create_dataset(dataset) + + # Create the table and load the data + dataframe = pandas.DataFrame([ + {'title': 'The Meaning of Life', 'release_year': 1983}, + {'title': 'Monty Python and the Holy Grail', 'release_year': 1975}, + {'title': 'Life of Brian', 'release_year': 1979}, + { + 'title': 'And Now for Something Completely Different', + 'release_year': 1971 + }, + ]) + load_job = client.load_table_from_dataframe( + dataframe, dataset.table('monty_python'), location='US') + load_job.result() # Waits for table load to complete. + # [END bigquery_migration_client_library_load_table_from_dataframe] + + table = client.get_table(dataset.table('monty_python')) + assert table.num_rows == 4 + From a45536eaa1229aa9d6d18de40925219c0f9fe48b Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Thu, 13 Dec 2018 11:04:04 -0800 Subject: [PATCH 2/7] test datalab magics and fix lint --- bigquery/datalab-migration/requirements.txt | 1 + bigquery/datalab-migration/samples_test.py | 111 ++++++++++---------- 2 files changed, 57 insertions(+), 55 deletions(-) diff --git a/bigquery/datalab-migration/requirements.txt b/bigquery/datalab-migration/requirements.txt index a31b9c80c08..dc6a580b0df 100644 --- a/bigquery/datalab-migration/requirements.txt +++ b/bigquery/datalab-migration/requirements.txt @@ -1,3 +1,4 @@ google-cloud-bigquery[pandas,pyarrow]==1.7.0 datalab==1.1.4 ipython==7.2.0 +google-cloud-monitoring==0.28.1 diff --git a/bigquery/datalab-migration/samples_test.py b/bigquery/datalab-migration/samples_test.py index d649c9455f5..52fb811395c 100644 --- a/bigquery/datalab-migration/samples_test.py +++ b/bigquery/datalab-migration/samples_test.py @@ -12,10 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import time -from google.cloud import bigquery import pytest try: import IPython @@ -25,16 +23,6 @@ IPython = None -@pytest.fixture -def temp_dataset(): - client = bigquery.Client() - dataset_id = "temp_dataset_{}".format(int(time.time() * 1000)) - dataset_ref = bigquery.DatasetReference(client.project, dataset_id) - dataset = client.create_dataset(bigquery.Dataset(dataset_ref)) - yield dataset - client.delete_dataset(dataset, delete_contents=True) - - @pytest.fixture(scope='session') def ipython(): config = tools.default_config() @@ -43,16 +31,6 @@ def ipython(): return shell -@pytest.fixture() -def ipython_interactive(request, ipython): - """Activate IPython's builtin hooks - - for the duration of the test scope. - """ - with ipython.builtin_trap: - yield ipython - - @pytest.fixture def to_delete(): from google.cloud import bigquery @@ -65,9 +43,9 @@ def to_delete(): client.delete_dataset(dataset, delete_contents=True) -def _set_up_ipython(): +def _set_up_ipython(extension): ip = IPython.get_ipython() - ip.extension_manager.load_extension('google.cloud.bigquery') + ip.extension_manager.load_extension(extension) return ip @@ -84,11 +62,10 @@ def _run_magic_sample(sample, ip): @pytest.mark.skipif(IPython is None, reason="Requires `ipython`") -def test_query_magic(ipython): - ip = _set_up_ipython() +def test_datalab_query_magic(ipython): + ip = _set_up_ipython('google.datalab.kernel') - # Datalab sample - """ + sample = """ # [START bigquery_migration_datalab_query_magic] %%bq SELECT word, SUM(word_count) as count @@ -97,6 +74,12 @@ def test_query_magic(ipython): ORDER BY count ASC # [END bigquery_migration_datalab_query_magic] """ + _run_magic_sample(sample, ip) + + +@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") +def test_client_library_query_magic(ipython): + ip = _set_up_ipython('google.cloud.bigquery') sample = """ # [START bigquery_migration_client_library_query_magic] @@ -111,11 +94,10 @@ def test_query_magic(ipython): @pytest.mark.skipif(IPython is None, reason="Requires `ipython`") -def test_query_magic_results_variable(ipython): - ip = _set_up_ipython() +def test_datalab_query_magic_results_variable(ipython): + ip = _set_up_ipython('google.datalab.kernel') - # Datalab sample - """ + sample = """ # [START bigquery_migration_datalab_query_magic_results_variable] %%bq --name my_variable SELECT word, SUM(word_count) as count @@ -124,6 +106,12 @@ def test_query_magic_results_variable(ipython): ORDER BY count ASC # [END bigquery_migration_datalab_query_magic_results_variable] """ + _run_magic_sample(sample, ip) + + +@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") +def test_client_library_query_magic_results_variable(ipython): + ip = _set_up_ipython('google.cloud.bigquery') sample = """ # [START bigquery_migration_client_library_query_magic_results_variable] @@ -138,33 +126,41 @@ def test_query_magic_results_variable(ipython): @pytest.mark.skipif(IPython is None, reason="Requires `ipython`") -def test_query_magic_parameterized_query(ipython): - ip = _set_up_ipython() +def test_datalab_magic_parameterized_query(ipython): + ip = _set_up_ipython('google.datalab.kernel') - # Datalab samples - """ - # [START bigquery_migration_datalab_magic_parameterized_query_define] + sample = """ + # [START bigquery_migration_datalab_magic_define_parameterized_query] %%bq query -n my_variable SELECT word, SUM(word_count) as count FROM `bigquery-public-data.samples.shakespeare` WHERE corpus = @corpus_name GROUP BY word ORDER BY count ASC - # [END bigquery_migration_datalab_magic_parameterized_query_define] + # [END bigquery_migration_datalab_magic_define_parameterized_query] + """ + _run_magic_sample(sample, ip) - # [START bigquery_migration_datalab_magic_parameterized_query_execute] + sample = """ + # [START bigquery_migration_datalab_magic_execute_parameterized_query] %%bq execute -q endpoint_stats parameters: - name: corpus_name type: STRING value: hamlet - # [END bigquery_migration_datalab_magic_parameterized_query_execute] + # [END bigquery_migration_datalab_magic_execute_parameterized_query] """ + _run_magic_sample(sample, ip) + + +@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") +def test_query_magic_parameterized_query(ipython): + ip = _set_up_ipython('google.cloud.bigquery') sample = """ - # [START bigquery_migration_client_library_magic_parameterized_query_define_parameter] + # [START bigquery_migration_client_library_magic_query_params] params = {"corpus_name": "hamlet"} - # [END bigquery_migration_client_library_magic_parameterized_query_define_parameter] + # [END bigquery_migration_client_library_magic_query_params] """ _run_magic_sample(sample, ip) @@ -182,20 +178,25 @@ def test_query_magic_parameterized_query(ipython): @pytest.mark.skipif(IPython is None, reason="Requires `ipython`") -def test_command_line_interface(ipython): - ip = IPython.get_ipython() +def test_datalab_list_tables_magic(ipython): + ip = _set_up_ipython('google.datalab.kernel') - # Datalab sample - """ + sample = """ # [START bigquery_migration_datalab_list_tables_magic] %bq tables list --dataset bigquery-public-data.samples # [END bigquery_migration_datalab_list_tables_magic] """ + _run_magic_sample(sample, ip) + + +@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") +def test_command_line_interface(ipython): + ip = IPython.get_ipython() sample = """ - # [START bigquery_migration_datalab_list_tables_magic] + # [START bigquery_migration_command_line_list_tables] !bq ls bigquery-public-data:samples - # [END bigquery_migration_datalab_list_tables_magic] + # [END bigquery_migration_command_line_list_tables] """ _run_magic_sample(sample, ip) @@ -260,7 +261,7 @@ def test_datalab_load_table_from_gcs_csv(to_delete): 'gs://cloud-samples-data/bigquery/us-states/us-states.csv', mode='append', source_format='csv', - csv_options=bq.CSVOptions(skip_leading_rows = 1) + csv_options=bq.CSVOptions(skip_leading_rows=1) ) # Waits for the job to complete # [END bigquery_migration_datalab_load_table_from_gcs_csv] @@ -285,13 +286,13 @@ def test_client_library_load_table_from_gcs_csv(to_delete): # Create the table job_config = bigquery.LoadJobConfig( - schema = [ + schema=[ bigquery.SchemaField('name', 'STRING'), bigquery.SchemaField('post_abbr', 'STRING') ], - skip_leading_rows = 1, + skip_leading_rows=1, # The source format defaults to CSV, so the line below is optional. - source_format = bigquery.SourceFormat.CSV + source_format=bigquery.SourceFormat.CSV ) load_job = client.load_table_from_uri( 'gs://cloud-samples-data/bigquery/us-states/us-states.csv', @@ -330,7 +331,8 @@ def test_datalab_load_table_from_dataframe(to_delete): }, ]) schema = bq.Schema.from_data(dataframe) - table = bq.Table('{}.monty_python'.format(dataset_id)).create(schema=schema) + table = bq.Table( + '{}.monty_python'.format(dataset_id)).create(schema=schema) table.insert(dataframe) # Starts steaming insert of data # [END bigquery_migration_datalab_load_table_from_dataframe] # The Datalab library uses tabledata().insertAll() to load data from @@ -338,7 +340,7 @@ def test_datalab_load_table_from_dataframe(to_delete): # to be available in the table, this test does not assert on the number of # rows in the destination table after the job is run. If errors are # encountered during the insertion, this test will fail. - # See https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability + # See https://cloud.google.com/bigquery/streaming-data-into-bigquery def test_client_library_load_table_from_dataframe(to_delete): @@ -375,4 +377,3 @@ def test_client_library_load_table_from_dataframe(to_delete): table = client.get_table(dataset.table('monty_python')) assert table.num_rows == 4 - From e843ee491492ed2021619e2c20a0b782c51a8e20 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Thu, 13 Dec 2018 16:21:00 -0800 Subject: [PATCH 3/7] asserts for all tests --- bigquery/datalab-migration/requirements.txt | 3 +- bigquery/datalab-migration/samples_test.py | 140 +++++++++++--------- 2 files changed, 78 insertions(+), 65 deletions(-) diff --git a/bigquery/datalab-migration/requirements.txt b/bigquery/datalab-migration/requirements.txt index dc6a580b0df..7159c1c3a6c 100644 --- a/bigquery/datalab-migration/requirements.txt +++ b/bigquery/datalab-migration/requirements.txt @@ -1,4 +1,5 @@ google-cloud-bigquery[pandas,pyarrow]==1.7.0 datalab==1.1.4 -ipython==7.2.0 +ipython==7.2.0; python_version > '2.7' +ipython==5.5; python_version == '2.7' google-cloud-monitoring==0.28.1 diff --git a/bigquery/datalab-migration/samples_test.py b/bigquery/datalab-migration/samples_test.py index 52fb811395c..296878e186e 100644 --- a/bigquery/datalab-migration/samples_test.py +++ b/bigquery/datalab-migration/samples_test.py @@ -12,15 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import time - import pytest -try: - import IPython - from IPython.testing import tools - from IPython.terminal import interactiveshell -except ImportError: # pragma: NO COVER - IPython = None +import IPython +from IPython.testing import tools +from IPython.terminal import interactiveshell +import time @pytest.fixture(scope='session') @@ -56,12 +52,6 @@ def _strip_region_tags(sample_text): return '\n'.join(magic_lines) -def _run_magic_sample(sample, ip): - result = ip.run_cell(_strip_region_tags(sample)) - result.raise_error() # Throws an exception if the cell failed. - - -@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") def test_datalab_query_magic(ipython): ip = _set_up_ipython('google.datalab.kernel') @@ -74,10 +64,9 @@ def test_datalab_query_magic(ipython): ORDER BY count ASC # [END bigquery_migration_datalab_query_magic] """ - _run_magic_sample(sample, ip) + ip.run_cell(_strip_region_tags(sample)) -@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") def test_client_library_query_magic(ipython): ip = _set_up_ipython('google.cloud.bigquery') @@ -90,94 +79,132 @@ def test_client_library_query_magic(ipython): ORDER BY count ASC # [END bigquery_migration_client_library_query_magic] """ - _run_magic_sample(sample, ip) + ip.run_cell(_strip_region_tags(sample)) -@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") def test_datalab_query_magic_results_variable(ipython): ip = _set_up_ipython('google.datalab.kernel') sample = """ - # [START bigquery_migration_datalab_query_magic_results_variable] - %%bq --name my_variable - SELECT word, SUM(word_count) as count - FROM `bigquery-public-data.samples.shakespeare` - GROUP BY word - ORDER BY count ASC - # [END bigquery_migration_datalab_query_magic_results_variable] + # [START bigquery_migration_datalab_query_magic_define_query] + %%bq query -n my_query + SELECT name FROM `bigquery-public-data.usa_names.usa_1910_current` + WHERE state = "TX" + LIMIT 100 + # [END bigquery_migration_datalab_query_magic_define_query] """ - _run_magic_sample(sample, ip) + ip.run_cell(_strip_region_tags(sample)) + + sample = """ + # [START bigquery_migration_datalab_execute_query] + import google.datalab.bigquery as bq + + my_variable = my_query.execute().result().to_dataframe() + # [END bigquery_migration_datalab_execute_query] + """ + ip.run_cell(_strip_region_tags(sample)) + + variable_name = "my_variable" + assert variable_name in ip.user_ns # verify that variable exists + my_variable = ip.user_ns[variable_name] + assert len(my_variable) == 100 + ip.user_ns.pop(variable_name) # clean up variable -@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") def test_client_library_query_magic_results_variable(ipython): ip = _set_up_ipython('google.cloud.bigquery') sample = """ # [START bigquery_migration_client_library_query_magic_results_variable] %%bigquery my_variable - SELECT word, SUM(word_count) as count - FROM `bigquery-public-data.samples.shakespeare` - GROUP BY word - ORDER BY count ASC + SELECT name FROM `bigquery-public-data.usa_names.usa_1910_current` + WHERE state = "TX" + LIMIT 100 # [END bigquery_migration_client_library_query_magic_results_variable] """ - _run_magic_sample(sample, ip) + ip.run_cell(_strip_region_tags(sample)) + + variable_name = "my_variable" + assert variable_name in ip.user_ns # verify that variable exists + my_variable = ip.user_ns[variable_name] + assert len(my_variable) == 100 + ip.user_ns.pop(variable_name) # clean up variable -@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") def test_datalab_magic_parameterized_query(ipython): + import pandas + ip = _set_up_ipython('google.datalab.kernel') sample = """ # [START bigquery_migration_datalab_magic_define_parameterized_query] - %%bq query -n my_variable + %%bq query -n my_query SELECT word, SUM(word_count) as count FROM `bigquery-public-data.samples.shakespeare` WHERE corpus = @corpus_name GROUP BY word ORDER BY count ASC + LIMIT @limit # [END bigquery_migration_datalab_magic_define_parameterized_query] """ - _run_magic_sample(sample, ip) + ip.run_cell(_strip_region_tags(sample)) + + sample = """ + # [START bigquery_migration_datalab_magic_query_params] + corpus_name = "hamlet" + limit = 10 + # [END bigquery_migration_datalab_magic_query_params] + """ + ip.run_cell(_strip_region_tags(sample)) sample = """ # [START bigquery_migration_datalab_magic_execute_parameterized_query] - %%bq execute -q endpoint_stats + %%bq execute -q my_query --to-dataframe parameters: - name: corpus_name type: STRING - value: hamlet + value: $corpus_name + - name: limit + type: INTEGER + value: $limit # [END bigquery_migration_datalab_magic_execute_parameterized_query] """ - _run_magic_sample(sample, ip) + ip.run_cell(_strip_region_tags(sample)) + df = ip.user_ns["_"] # Retrieves last returned object in notebook session + assert isinstance(df, pandas.DataFrame) + assert len(df) == 10 -@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") -def test_query_magic_parameterized_query(ipython): +def test_client_library_magic_parameterized_query(ipython): + import pandas + ip = _set_up_ipython('google.cloud.bigquery') sample = """ # [START bigquery_migration_client_library_magic_query_params] - params = {"corpus_name": "hamlet"} + params = {"corpus_name": "hamlet", "limit": 10} # [END bigquery_migration_client_library_magic_query_params] """ - _run_magic_sample(sample, ip) + ip.run_cell(_strip_region_tags(sample)) sample = """ # [START bigquery_migration_client_library_magic_parameterized_query] - %%bigquery my_variable --params $params + %%bigquery --params $params SELECT word, SUM(word_count) as count FROM `bigquery-public-data.samples.shakespeare` WHERE corpus = @corpus_name GROUP BY word ORDER BY count ASC + LIMIT @limit # [END bigquery_migration_client_library_magic_parameterized_query] """ - _run_magic_sample(sample, ip) + ip.run_cell(_strip_region_tags(sample)) + + df = ip.user_ns["_"] # Retrieves last returned object in notebook session + assert isinstance(df, pandas.DataFrame) + assert len(df) == 10 -@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") def test_datalab_list_tables_magic(ipython): ip = _set_up_ipython('google.datalab.kernel') @@ -186,26 +213,11 @@ def test_datalab_list_tables_magic(ipython): %bq tables list --dataset bigquery-public-data.samples # [END bigquery_migration_datalab_list_tables_magic] """ - _run_magic_sample(sample, ip) - + ip.run_cell(_strip_region_tags(sample)) -@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") -def test_command_line_interface(ipython): - ip = IPython.get_ipython() - - sample = """ - # [START bigquery_migration_command_line_list_tables] - !bq ls bigquery-public-data:samples - # [END bigquery_migration_command_line_list_tables] - """ - _run_magic_sample(sample, ip) - - sample = """ - # [START bigquery_migration_command_line_help] - !bq help - # [END bigquery_migration_command_line_help] - """ - _run_magic_sample(sample, ip) + # Retrieves last returned object in notebook session + html_element = ip.user_ns["_"] + assert "shakespeare" in html_element.data def test_datalab_query(): From c8a253085606035d337f9bf5a6711e2cb70a0466 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 14 Dec 2018 10:15:58 -0800 Subject: [PATCH 4/7] fix query magics and add asserts --- bigquery/datalab-migration/samples_test.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/bigquery/datalab-migration/samples_test.py b/bigquery/datalab-migration/samples_test.py index 296878e186e..8769539d107 100644 --- a/bigquery/datalab-migration/samples_test.py +++ b/bigquery/datalab-migration/samples_test.py @@ -53,21 +53,31 @@ def _strip_region_tags(sample_text): def test_datalab_query_magic(ipython): + import google.datalab.bigquery as bq + ip = _set_up_ipython('google.datalab.kernel') sample = """ # [START bigquery_migration_datalab_query_magic] - %%bq + %%bq query SELECT word, SUM(word_count) as count FROM `bigquery-public-data.samples.shakespeare` GROUP BY word ORDER BY count ASC + LIMIT 100 # [END bigquery_migration_datalab_query_magic] """ ip.run_cell(_strip_region_tags(sample)) + results = ip.user_ns["_"] # Last returned object in notebook session + assert isinstance(results, bq.QueryResultsTable) + df = results.to_dataframe() + assert len(df) == 100 + def test_client_library_query_magic(ipython): + import pandas + ip = _set_up_ipython('google.cloud.bigquery') sample = """ @@ -77,10 +87,15 @@ def test_client_library_query_magic(ipython): FROM `bigquery-public-data.samples.shakespeare` GROUP BY word ORDER BY count ASC + LIMIT 100 # [END bigquery_migration_client_library_query_magic] """ ip.run_cell(_strip_region_tags(sample)) + df = ip.user_ns["_"] # Last returned object in notebook session + assert isinstance(df, pandas.DataFrame) + assert len(df) == 100 + def test_datalab_query_magic_results_variable(ipython): ip = _set_up_ipython('google.datalab.kernel') From 02ced538784871358bae745cd3a5e27247bb6185 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Mon, 4 Feb 2019 16:20:09 -0800 Subject: [PATCH 5/7] update dataset creation and fix lint --- bigquery/datalab-migration/requirements.txt | 2 +- bigquery/datalab-migration/samples_test.py | 43 ++++++++++----------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/bigquery/datalab-migration/requirements.txt b/bigquery/datalab-migration/requirements.txt index 7159c1c3a6c..89881693f85 100644 --- a/bigquery/datalab-migration/requirements.txt +++ b/bigquery/datalab-migration/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery[pandas,pyarrow]==1.7.0 +google-cloud-bigquery[pandas,pyarrow]==1.8.1 datalab==1.1.4 ipython==7.2.0; python_version > '2.7' ipython==5.5; python_version == '2.7' diff --git a/bigquery/datalab-migration/samples_test.py b/bigquery/datalab-migration/samples_test.py index 8769539d107..c4a9ff97a60 100644 --- a/bigquery/datalab-migration/samples_test.py +++ b/bigquery/datalab-migration/samples_test.py @@ -12,15 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import time + import pytest import IPython from IPython.testing import tools from IPython.terminal import interactiveshell -import time @pytest.fixture(scope='session') -def ipython(): +def ipython_interactive(): config = tools.default_config() config.TerminalInteractiveShell.simple_prompt = True shell = interactiveshell.TerminalInteractiveShell.instance(config=config) @@ -34,8 +35,7 @@ def to_delete(): doomed = [] yield doomed for dataset_id in doomed: - dataset = bigquery.Dataset.from_string( - '{}.{}'.format(client.project, dataset_id)) + dataset = client.get_dataset(dataset_id) client.delete_dataset(dataset, delete_contents=True) @@ -52,7 +52,7 @@ def _strip_region_tags(sample_text): return '\n'.join(magic_lines) -def test_datalab_query_magic(ipython): +def test_datalab_query_magic(ipython_interactive): import google.datalab.bigquery as bq ip = _set_up_ipython('google.datalab.kernel') @@ -75,7 +75,7 @@ def test_datalab_query_magic(ipython): assert len(df) == 100 -def test_client_library_query_magic(ipython): +def test_client_library_query_magic(ipython_interactive): import pandas ip = _set_up_ipython('google.cloud.bigquery') @@ -97,7 +97,7 @@ def test_client_library_query_magic(ipython): assert len(df) == 100 -def test_datalab_query_magic_results_variable(ipython): +def test_datalab_query_magic_results_variable(ipython_interactive): ip = _set_up_ipython('google.datalab.kernel') sample = """ @@ -126,7 +126,7 @@ def test_datalab_query_magic_results_variable(ipython): ip.user_ns.pop(variable_name) # clean up variable -def test_client_library_query_magic_results_variable(ipython): +def test_client_library_query_magic_results_variable(ipython_interactive): ip = _set_up_ipython('google.cloud.bigquery') sample = """ @@ -146,7 +146,7 @@ def test_client_library_query_magic_results_variable(ipython): ip.user_ns.pop(variable_name) # clean up variable -def test_datalab_magic_parameterized_query(ipython): +def test_datalab_magic_parameterized_query(ipython_interactive): import pandas ip = _set_up_ipython('google.datalab.kernel') @@ -190,7 +190,7 @@ def test_datalab_magic_parameterized_query(ipython): assert len(df) == 10 -def test_client_library_magic_parameterized_query(ipython): +def test_client_library_magic_parameterized_query(ipython_interactive): import pandas ip = _set_up_ipython('google.cloud.bigquery') @@ -220,7 +220,7 @@ def test_client_library_magic_parameterized_query(ipython): assert len(df) == 10 -def test_datalab_list_tables_magic(ipython): +def test_datalab_list_tables_magic(ipython_interactive): ip = _set_up_ipython('google.datalab.kernel') sample = """ @@ -238,6 +238,7 @@ def test_datalab_list_tables_magic(ipython): def test_datalab_query(): # [START bigquery_migration_datalab_query] import google.datalab.bigquery as bq + sql = """ SELECT name FROM `bigquery-public-data.usa_names.usa_1910_current` WHERE state = "TX" @@ -252,6 +253,7 @@ def test_datalab_query(): def test_client_library_query(): # [START bigquery_migration_client_library_query] from google.cloud import bigquery + client = bigquery.Client() sql = """ SELECT name FROM `bigquery-public-data.usa_names.usa_1910_current` @@ -298,7 +300,8 @@ def test_datalab_load_table_from_gcs_csv(to_delete): def test_client_library_load_table_from_gcs_csv(to_delete): # [START bigquery_migration_client_library_load_table_from_gcs_csv] from google.cloud import bigquery - client = bigquery.Client() + + client = bigquery.Client(location='US') # Create the dataset dataset_id = 'import_sample' @@ -307,9 +310,7 @@ def test_client_library_load_table_from_gcs_csv(to_delete): dataset_id = 'test_dataset_{}'.format(int(time.time() * 1000)) to_delete.append(dataset_id) # [START bigquery_migration_client_library_load_table_from_gcs_csv] - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.location = 'US' - client.create_dataset(dataset) + dataset = client.create_dataset(dataset_id) # Create the table job_config = bigquery.LoadJobConfig( @@ -375,7 +376,7 @@ def test_client_library_load_table_from_dataframe(to_delete): from google.cloud import bigquery import pandas - client = bigquery.Client() + client = bigquery.Client(location='US') dataset_id = 'import_sample' # [END bigquery_migration_client_library_load_table_from_dataframe] @@ -383,9 +384,7 @@ def test_client_library_load_table_from_dataframe(to_delete): dataset_id = 'test_dataset_{}'.format(int(time.time() * 1000)) to_delete.append(dataset_id) # [START bigquery_migration_client_library_load_table_from_dataframe] - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.location = 'US' - client.create_dataset(dataset) + dataset = client.create_dataset(dataset_id) # Create the table and load the data dataframe = pandas.DataFrame([ @@ -397,10 +396,10 @@ def test_client_library_load_table_from_dataframe(to_delete): 'release_year': 1971 }, ]) - load_job = client.load_table_from_dataframe( - dataframe, dataset.table('monty_python'), location='US') + table_ref = dataset.table('monty_python') + load_job = client.load_table_from_dataframe(dataframe, table_ref) load_job.result() # Waits for table load to complete. # [END bigquery_migration_client_library_load_table_from_dataframe] - table = client.get_table(dataset.table('monty_python')) + table = client.get_table(table_ref) assert table.num_rows == 4 From 79a791e62dd24188e83506630a11c0caad8ae162 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 8 Feb 2019 10:53:12 -0800 Subject: [PATCH 6/7] set datalab project --- bigquery/datalab-migration/samples_test.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bigquery/datalab-migration/samples_test.py b/bigquery/datalab-migration/samples_test.py index c4a9ff97a60..7d37d2eb80b 100644 --- a/bigquery/datalab-migration/samples_test.py +++ b/bigquery/datalab-migration/samples_test.py @@ -14,12 +14,20 @@ import time +import google.auth +import google.datalab import pytest import IPython from IPython.testing import tools from IPython.terminal import interactiveshell +# Get default project +_, PROJECT_ID = google.auth.default() +# Set Datalab project ID +context = google.datalab.Context.default() +context.set_project_id(PROJECT_ID) + @pytest.fixture(scope='session') def ipython_interactive(): config = tools.default_config() From 0850c005f360fdd77a9b0284b42743679911e832 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 8 Feb 2019 18:02:17 -0800 Subject: [PATCH 7/7] fixes lint --- bigquery/datalab-migration/samples_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bigquery/datalab-migration/samples_test.py b/bigquery/datalab-migration/samples_test.py index 7d37d2eb80b..04cef49ddb0 100644 --- a/bigquery/datalab-migration/samples_test.py +++ b/bigquery/datalab-migration/samples_test.py @@ -28,6 +28,7 @@ context = google.datalab.Context.default() context.set_project_id(PROJECT_ID) + @pytest.fixture(scope='session') def ipython_interactive(): config = tools.default_config()