Skip to content
Branch: master
Find file History
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Permalink
Type Name Latest commit message Commit time
..
Failed to load latest commit information.
README.md
csv_to_struct.sql
find_in_set.sql Adds community udfs for dealing with csv and url (#32) Jan 7, 2020
get_array_value.sql
get_value.sql
int.sql
median.sql
multiply_full_scale.sql
nlp_compromise_number.sql
nlp_compromise_people.sql
radians.sql Initial commit Aug 12, 2019
random_int.sql
random_value.sql
translate.sql Changed repo description and added new functions (#18) Nov 14, 2019
url_keys.sql
url_param.sql
url_parse.sql
y4md_to_date.sql
zeronorm.sql Zero-norm a variable (#25) Nov 18, 2019

README.md

Community UDFs

This directory contains community contributed user-defined functions to extend BigQuery for more specialized usage patterns. Each UDF within this directory will be automatically synchronized to the bqutil project within the fn dataset for reference in queries.

For example, if you'd like to reference the int function within your query, you can reference it like the following:

SELECT bqutil.fn.int(1.684)

UDFs

Documentation

csv_to_struct(strList STRING)

Take a list of comma separated key-value pairs and creates a struct. Input: strList: string that has map in the format a:b,c:d.... Output: struct for the above map.

WITH test_cases AS (
  SELECT NULL as s
  UNION ALL
  SELECT '' as s
  UNION ALL
  SELECT ',' as s
  UNION ALL
  SELECT ':' as s
  UNION ALL
  SELECT 'a:b' as s
  UNION ALL
  SELECT 'a:b,c:d' as s
  UNION ALL
  SELECT 'a:b' as s
)
SELECT key, value from test_cases as t, UNNEST(bqutil.fn.csv_to_struct(t.s)) s;

results:

key value
a b
a b
c d
a b

find_in_set(str STRING, strList STRING)

Returns the first occurance of str in strList where strList is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument contains any commas. For example, find_in_set('ab', 'abc,b,ab,c,def') returns 3. Input: str: string to search for. strList: string in which to search. Output: Position of str in strList

WITH test_cases AS (
  SELECT 'ab' as str, 'abc,b,ab,c,def' as strList
  UNION ALL
  SELECT 'ab' as str, 'mobile,tablet,mobile/tablet,phone,text' as strList
  UNION ALL
  SELECT 'mobile' as str, 'mobile,tablet,mobile/tablet,phone,text' as strList
  UNION ALL
  SELECT 'mobile,' as str, 'mobile,tablet,mobile/tablet,phone,text' as strList
)
SELECT bqutil.fn.find_in_set(str, strList) from test_cases

results:

f0_
3
NULL
1
0

get_array_value(k STRING, arr ANY TYPE)

Given a key and a map, returns the ARRAY type value. This is same as get_value except it returns an ARRAY type. This can be used when the map has multiple values for a given key.

WITH test AS (
  SELECT ARRAY(
    SELECT STRUCT('a' AS key, 'aaa' AS value) AS s
    UNION ALL
    SELECT STRUCT('b' AS key, 'bbb' AS value) AS s
    UNION ALL
    SELECT STRUCT('a' AS key, 'AAA' AS value) AS s
    UNION ALL
    SELECT STRUCT('c' AS key, 'ccc' AS value) AS s
  ) AS a
)
SELECT bqutil.fn.get_array_value('b', a), bqutil.fn.get_array_value('a', a), bqutil.fn.get_array_value('c', a) from test;

results:

f0_ f1_ f2_
["bbb"] ["aaa","AAA"] ["ccc"]

get_value(k STRING, arr ANY TYPE)

Given a key and a list of key-value maps in the form [{'key': 'a', 'value': 'aaa'}], returns the SCALAR type value.

WITH test AS (
  SELECT ARRAY(
    SELECT STRUCT('a' AS key, 'aaa' AS value) AS s
    UNION ALL
    SELECT STRUCT('b' AS key, 'bbb' AS value) AS s
    UNION ALL
    SELECT STRUCT('c' AS key, 'ccc' AS value) AS s
  ) AS a
)
SELECT bqutil.fn.get_value('b', a), bqutil.fn.get_value('a', a), bqutil.fn.get_value('c', a) from test;

results:

f0_ f1_ f2_
bbb aaa ccc

int(v ANY TYPE)

Convience wrapper which can be used to convert values to integers in place of the native CAST(x AS INT64).

SELECT bqutil.fn.int(1) int1
  , bqutil.fn.int(2.5) int2
  , bqutil.fn.int('7') int3
  , bqutil.fn.int('7.8') int4

1, 2, 7, 7

Note that CAST(x AS INT64) rounds the number, while this function truncates it. In many cases, that's the behavior users expect.

median(arr ANY TYPE)

Get the median of an array of numbers.

SELECT bqutil.fn.median([1,1,1,2,3,4,5,100,1000]) median_1
  , bqutil.fn.median([1,2,3]) median_2
  , bqutil.fn.median([1,2,3,4]) median_3

3.0, 2.0, 2.5

nlp_compromise_number(str STRING)

Parse numbers from text.

SELECT bqutil.fn.nlp_compromise_number('one hundred fifty seven')
  , bqutil.fn.nlp_compromise_number('three point 5')
  , bqutil.fn.nlp_compromise_number('2 hundred')
  , bqutil.fn.nlp_compromise_number('minus 8')
  , bqutil.fn.nlp_compromise_number('5 million 3 hundred 25 point zero 1')

157, 3.5, 200, -8, 5000325.01

nlp_compromise_people(str STRING)

Extract names out of text.

SELECT bqutil.fn.nlp_compromise_people(
  "hello, I'm Felipe Hoffa and I work with Elliott Brossard - who thinks Jordan Tigani will like this post?"
) names

["felipe hoffa", "elliott brossard", "jordan tigani"]

radians(x ANY TYPE)

Convert values into radian.

SELECT bqutil.fn.radians(180) is_this_pi

3.141592653589793

random_int(min ANY TYPE, max ANY TYPE)

Generate random integers between the min and max values.

SELECT bqutil.fn.random_int(0,10) randint, COUNT(*) c
FROM UNNEST(GENERATE_ARRAY(1,1000))
GROUP BY 1
ORDER BY 1

random_value(arr ANY TYPE)

Returns a random value from an array.

SELECT
  bqutil.fn.random_value(['tino', 'jordan', 'julie', 'elliott', 'felipe']),
  bqutil.fn.random_value(['tino', 'jordan', 'julie', 'elliott', 'felipe']),
  bqutil.fn.random_value(['tino', 'jordan', 'julie', 'elliott', 'felipe'])

'tino', 'julie', 'jordan'

translate(expression STRING, characters_to_replace STRING, characters_to_substitute STRING)

For a given expression, replaces all occurrences of specified characters with specified substitutes. Existing characters are mapped to replacement characters by their positions in the characters_to_replace and characters_to_substitute arguments. If more characters are specified in the characters_to_replace argument than in the characters_to_substitute argument, the extra characters from the characters_to_replace argument are omitted in the return value.

SELECT bqutil.fn.translate('mint tea', 'inea', 'osin')

most tin

url_keys(query STRING)

Get an array of url param keys.

SELECT bqutil.fn.url_keys(
  'https://www.google.com/search?q=bigquery+udf&client=chrome')

["q", "client"]

url_param(query STRING, p STRING)

Get the value of a url param key.

SELECT bqutil.fn.url_param(
  'https://www.google.com/search?q=bigquery+udf&client=chrome', 'client')

"chrome"

url_parse(urlString STRING, partToExtract STRING)

Returns the specified part from the URL. Valid values for partToExtract include HOST, PATH, QUERY, REF, PROTOCOL For example, url_parse('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST') returns 'facebook.com'.

WITH urls AS (
  SELECT 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' as url
  UNION ALL
  SELECT 'rpc://facebook.com/' as url
)
SELECT bqutil.fn.url_parse(url, 'HOST'), bqutil.fn.url_parse(url, 'PATH'), bqutil.fn.url_parse(url, 'QUERY'), bqutil.fn.url_parse(url, 'REF'), bqutil.fn.url_parse(url, 'PROTOCOL') from urls

results:

f0_ f1_ f2_ f3_ f4_
facebook.com path1/p.php k1=v1&k2=v2#Ref1 Ref1 http
facebook.com NULL NULL NULL rpc

y4md_to_date(y4md STRING)

Convert a STRING formatted as a YYYYMMDD to a DATE

SELECT bqutil.fn.y4md_to_date('20201220')

"2020-12-20"

zeronorm(x ANY TYPE, meanx FLOAT64, stddevx FLOAT64)

Normalize a variable so that it has zero mean and unit variance.

with r AS (
  SELECT 10 AS x
  UNION ALL SELECT 20
  UNION ALL SELECT 30
  UNION ALL SELECT 40
  UNION ALL SELECT 50
),
stats AS (
  SELECT AVG(x) AS meanx, STDDEV(x) AS stddevx
  FROM r
)
SELECT x, bqutil.fn.zeronorm(x, meanx, stddevx) AS zeronorm
FROM r, stats;

returns:

Row x zeronorm
1 10 -12.649110640673518
2 20 -6.324555320336759
3 30 0.0
4 40 6.324555320336759
5 50 12.649110640673518
You can’t perform that action at this time.