Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MATERIALIZED_VIEW as table type #192

Merged
merged 37 commits into from
Jun 23, 2020
Merged
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
8aab4c1
move SchemaConverters to scala
Gaurangi94 May 20, 2020
4efed2d
move SchemaConverters to java
Gaurangi94 May 20, 2020
46cdcac
import required libraries
Gaurangi94 May 20, 2020
7790700
import required libraries
Gaurangi94 May 20, 2020
2325d92
resolve conflicts on schemaconverter
Gaurangi94 May 20, 2020
32ebdef
resolve conflicts on schemaconverter
Gaurangi94 May 20, 2020
126c807
Merge branch 'master' of github.com:Gaurangi94/spark-bigquery-connector
Gaurangi94 May 21, 2020
234a2f0
port avro and arrow binary iterators to java
Gaurangi94 May 21, 2020
f6a00c0
merge with upstream
Gaurangi94 May 21, 2020
5a3ee65
add unchecked IO exception
Gaurangi94 May 21, 2020
62abeb3
add unchecked IO exception
Gaurangi94 May 21, 2020
e9b1ae8
refactoring
Gaurangi94 May 21, 2020
7ef8cd9
refactor code
Gaurangi94 May 21, 2020
d742f5c
Merge branch 'master' of github.com:Gaurangi94/spark-bigquery-connector
Gaurangi94 May 26, 2020
03f1262
Merge branch 'master' of https://github.com/GoogleCloudDataproc/spark…
Gaurangi94 May 27, 2020
59bd9ea
fix readme file for partition type
Gaurangi94 May 28, 2020
e136422
fix readme file for partition type
Gaurangi94 May 28, 2020
45208d8
fix readme file
Gaurangi94 May 29, 2020
26ffeeb
Merge remote-tracking branch 'direct/master'
Gaurangi94 Jun 2, 2020
2cf3515
change default read format to avro
Gaurangi94 Jun 3, 2020
3e836d3
fix hasNext method of arrowBinaryIterator
Gaurangi94 Jun 4, 2020
9776d9a
Merge branch 'master' of https://github.com/GoogleCloudDataproc/spark…
Gaurangi94 Jun 4, 2020
e1ddb6d
Merge branch 'master' of https://github.com/GoogleCloudDataproc/spark…
Gaurangi94 Jun 4, 2020
df777bc
Merge branch 'master' of github.com:Gaurangi94/spark-bigquery-connector
Gaurangi94 Jun 4, 2020
c34f1f8
add integration test for caching
Gaurangi94 Jun 4, 2020
8c23e4f
modify test to read from cache
Gaurangi94 Jun 4, 2020
beefd81
add benchmarking script
Gaurangi94 Jun 16, 2020
cf08c55
Merge branch 'master' of https://github.com/GoogleCloudDataproc/spark…
Gaurangi94 Jun 19, 2020
11fe64d
add materialized view as table type
Gaurangi94 Jun 19, 2020
0a1d5e3
add materialized view as table type
Gaurangi94 Jun 19, 2020
3d52374
Merge branch 'master' of github.com:Gaurangi94/spark-bigquery-connector
Gaurangi94 Jun 19, 2020
7694dd4
Merge branch 'master' of github.com:Gaurangi94/spark-bigquery-connector
Gaurangi94 Jun 19, 2020
e2bbe46
Merge branch 'master' of github.com:Gaurangi94/spark-bigquery-connector
Gaurangi94 Jun 19, 2020
6c23842
Merge branch 'master' of github.com:Gaurangi94/spark-bigquery-connector
Gaurangi94 Jun 19, 2020
c0e1fb5
Merge branch 'master' of github.com:Gaurangi94/spark-bigquery-connector
Gaurangi94 Jun 19, 2020
c59ca1f
add support for materialized view
Gaurangi94 Jun 22, 2020
b7505d6
add conditions for Materialized View
Gaurangi94 Jun 23, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public TableInfo getSupportedTable(TableId tableId, boolean viewsEnabled, String
if (TableDefinition.Type.TABLE == tableType) {
return table;
}
if (TableDefinition.Type.VIEW == tableType) {
if (TableDefinition.Type.VIEW == tableType || TableDefinition.Type.MATERIALIZED_VIEW == tableType) {
if (viewsEnabled) {
return table;
} else {
Expand Down Expand Up @@ -167,7 +167,7 @@ public long calculateTableSize(TableInfo tableInfo, Optional<String> filter) {
TableDefinition.Type type = tableInfo.getDefinition().getType();
if (type == TableDefinition.Type.TABLE && !filter.isPresent()) {
return tableInfo.getNumRows().longValue();
} else if (type == TableDefinition.Type.VIEW ||
} else if (type == TableDefinition.Type.VIEW || type == TableDefinition.Type.MATERIALIZED_VIEW ||
(type == TableDefinition.Type.TABLE && filter.isPresent())) {
// run a query
String table = fullTableName(tableInfo.getTableId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ TableInfo getActualTable(
if (TableDefinition.Type.TABLE == tableType) {
return table;
}
if (TableDefinition.Type.VIEW == tableType) {
if (TableDefinition.Type.VIEW == tableType || TableDefinition.Type.MATERIALIZED_VIEW == tableType) {
if (!config.viewsEnabled) {
throw new BigQueryConnectorException(UNSUPPORTED, format(
"Views are not enabled. You can enable views by setting '%s' to true. Notice additional cost may occur.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
package com.google.cloud.spark.bigquery

import com.google.auth.Credentials
import com.google.cloud.bigquery.TableDefinition.Type.{TABLE, VIEW}
import com.google.cloud.bigquery.TableDefinition.Type.{MATERIALIZED_VIEW, TABLE, VIEW}
import com.google.cloud.bigquery.{BigQuery, BigQueryOptions, TableDefinition}
import com.google.cloud.spark.bigquery.direct.DirectBigQueryRelation
import org.apache.spark.sql.sources._
Expand Down Expand Up @@ -57,7 +57,7 @@ class BigQueryRelationProvider(
.getOrElse(sys.error(s"Table $tableName not found"))
table.getDefinition[TableDefinition].getType match {
case TABLE => new DirectBigQueryRelation(opts, table)(sqlContext)
case VIEW => if (opts.viewsEnabled) {
case VIEW | MATERIALIZED_VIEW => if (opts.viewsEnabled) {
new DirectBigQueryRelation(opts, table)(sqlContext)
} else {
sys.error(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ class SparkBigQueryEndToEndITSuite extends FunSuite
countResults should equal(countAfterCollect)
}
*/

test("read data types. DataSource %s".format(dataSourceFormat)) {
val allTypesTable = readAllTypesTable(dataSourceFormat)
val expectedRow = spark.range(1).select(TestConstants.ALL_TYPES_TABLE_COLS: _*).head.toSeq
Expand Down Expand Up @@ -542,6 +542,15 @@ class SparkBigQueryEndToEndITSuite extends FunSuite
assert(df.schema == allTypesTable.schema)
}

test("query materialized view") {
var df = spark.read.format("bigquery")
.option("table", "bigquery-public-data:ethereum_blockchain.live_logs")
.option("viewsEnabled", "true")
.option("viewMaterializationProject", "bigquery-public-data")
Gaurangi94 marked this conversation as resolved.
Show resolved Hide resolved
.option("viewMaterializationDataset", "ethereum_blockchain")
Gaurangi94 marked this conversation as resolved.
Show resolved Hide resolved
.load()
}

test("write to bq - adding the settings to spark.conf" ) {
spark.conf.set("temporaryGcsBucket", temporaryGcsBucket)
val df = initialData
Expand Down