In [1]:
!pip install bigquery-ml-utils

Collecting bigquery-ml-utils
  Downloading bigquery_ml_utils-1.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.1/8.1 MB[0m [31m48.7 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow~=2.11.0 (from bigquery-ml-utils)
  Downloading tensorflow-2.11.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (588.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m588.3/588.3 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow-text (from bigquery-ml-utils)
  Downloading tensorflow_text-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.5/6.5 MB[0m [31m98.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tzdata (from bigquery-ml-utils)
  Downloading tzdata-2023.3-py2.py3-none-any.whl (341 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m341.8/341.8 kB[0m 

In [2]:
from bigquery_ml_utils import model_generator

text_embedding_model_generator = model_generator.TextEmbeddingModelGenerator()
text_embedding_model_generator.generate_text_embedding_model('swivel', '/content/model_output_swivel')

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [3]:
!gsutil cp -r /content/model_output_swivel gs://bq_models_experiments/swivel

ServiceException: 401 Anonymous caller does not have storage.objects.list access to the Google Cloud Storage bucket. Permission 'storage.objects.list' denied on resource (or it may not exist).


## Carregando os modelo no Bigquery

In [4]:
from google.colab import auth
from google.cloud import bigquery
from google.colab import data_table

In [5]:
project = 'gee-bq-experiments'
location = 'US'
client = bigquery.Client(project=project, location=location)
data_table.enable_dataframe_formatter()

In [6]:
auth.authenticate_user()

In [None]:
%%bigquery --project gee-bq-experiments

CREATE OR REPLACE MODEL `gee-bq-experiments.ml_example.stack_overflow_swivel` OPTIONS(
  model_type='tensorflow',
  model_path='gs://bq_models_experiments/swivel/*'
)

Query is running:   0%|          |

## Reduzindo os dados para o ano de 2022

In [None]:
%%bigquery --project gee-bq-experiments

create table `ml_example.posts_questions_2022` as
select
title,
regexp_replace(
    regexp_replace(
      regexp_replace(body,
        r'<img [^<>]*>',   r'(Image) '),
        r'(&)([^&;]*)(;)', r'<\2>'
      ),r'\<[^<>]*\>',    ''
    ) as text,
tags
from `gee-bq-experiments.stack_overflow.posts_questions`
WHERE EXTRACT(YEAR from creation_date) = 2022

Query is running:   0%|          |

In [7]:
%%bigquery --project gee-bq-experiments

select
  title,
  text,
  tags
from `ml_example.posts_questions_2022` limit 10

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,title,text,tags
0,CCS --Passing Ports to Functions,I am starting to work on CCS and to program on...,c
1,Is this union legal?,I'm reworking and cleaning up an ex-coworker's...,c
2,Why am I receiving a floating point exception ...,Basically it's a code to input the scores and ...,c
3,"why do I get ""Segmentation fault"" when assigni...",I have this peace of C Programming code to tak...,c
4,I need to convert a char array to ULONGLONG (C...,I have an array of char and I need to convert ...,c
5,Can I put a function inside of a function?,Im currently taking a COP 3223 class and im le...,c
6,Are these lines of code in C programming the same,Are these 2 lines of code the same ??\nline 1:...,c
7,How to print the contents of an array in C?,I wanted to know how to print the contents of ...,c
8,Create Colored Text in c,I want to make a text in color in C. I tried u...,c
9,Should a C program release memory upon termina...,"In a C program, if I try to use malloc and for...",c


In [None]:
%%bigquery --project gee-bq-experiments

SELECT * FROM
ML.PREDICT(MODEL `gee-bq-experiments.ml_example.stack_overflow_swivel`,(
SELECT "Long years ago, we made a tryst with destiny; and now the time comes when we shall redeem our pledge, not wholly or in full measure, but very substantially." AS embedding_input ))

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,embedding,embedding_input
0,"[-0.09961678087711334, -1.1282159090042114, 2....","Long years ago, we made a tryst with destiny; ..."


## Definindo o calculo de distância euclidiana quadrada

In [None]:
%%bigquery --project gee-bq-experiments

CREATE FUNCTION `ml_example.td`(a ARRAY<FLOAT64>, b ARRAY<FLOAT64>, idx INT64) AS (
   (a[OFFSET(idx)] - b[OFFSET(idx)]) * (a[OFFSET(idx)] - b[OFFSET(idx)])
);
CREATE FUNCTION `ml_example.term_distance`(a ARRAY<FLOAT64>, b ARRAY<FLOAT64>) AS ((
   SELECT SQRT(SUM( `ml_example.td`(a, b, idx))) FROM UNNEST(GENERATE_ARRAY(0, 19)) idx
));

## Busca semantica com o uso do Swivel

In [None]:
%%bigquery --project gee-bq-experiments

WITH search_term AS (
  SELECT embedding as term_embedding FROM ML.PREDICT(MODEL`gee-bq-experiments.ml_example.stack_overflow_swivel`,
  (SELECT  "\"if\" statement?" AS embedding_input))
)
SELECT

  `gee-bq-experiments.ml_example.term_distance`(term_embedding, embedding) AS termdist,
  embedding_input
FROM ML.PREDICT(MODEL `gee-bq-experiments.ml_example.stack_overflow_swivel`,(
  SELECT LOWER(title) AS embedding_input
  FROM `gee-bq-experiments.ml_example.posts_questions_2022`
)), search_term
ORDER By termdist ASC
LIMIT 10

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,termdist,embedding_input
0,0.616308,shopify if statement
1,0.616308,shortening if statement
2,0.790438,python selenium if statement issue
3,0.892857,if then statement issue
4,0.959237,issue with if/elif statement
5,0.959237,issue with nested if statement
6,0.983701,python if statement optimisation
7,0.983701,sudoku solver if statement
8,1.031533,if statement invalid syntax message
9,1.045714,the if statement wont read vairables


In [10]:
%%bigquery --project gee-bq-experiments
CREATE FUNCTION `ml_example.arr_to_input_10`(arr ARRAY<FLOAT64>)
RETURNS
STRUCT<p1 FLOAT64, p2 FLOAT64, p3 FLOAT64, p4 FLOAT64,
       p5 FLOAT64, p6 FLOAT64, p7 FLOAT64, p8 FLOAT64,
       p9 FLOAT64, p10 FLOAT64>
AS (
STRUCT(
    arr[OFFSET(0)]
    , arr[OFFSET(1)]
    , arr[OFFSET(2)]
    , arr[OFFSET(3)]
    , arr[OFFSET(4)]
    , arr[OFFSET(5)]
    , arr[OFFSET(6)]
    , arr[OFFSET(7)]
    , arr[OFFSET(8)]
    , arr[OFFSET(9)]
));

Query is running:   0%|          |

In [14]:
%%bigquery --project gee-bq-experiments

create or replace model `gee-bq-experiments.ml_example.text_clustering` options(
  model_type='kmeans', num_clusters=5
) as
select
  ml_example.arr_to_input_10(embedding) as text_embend,
  title
from ml.predict(MODEL `gee-bq-experiments.ml_example.stack_overflow_swivel`,(
SELECT title ,lower(title) AS embedding_input FROM `gee-bq-experiments.ml_example.posts_questions_2022` ))

Query is running:   0%|          |

In [19]:
%%bigquery --project gee-bq-experiments

SELECT title FROM ML.PREDICT(MODEL `gee-bq-experiments.ml_example.text_clustering`, (
  select
    ml_example.arr_to_input_10(embedding) as text_embend,
    title
  from ml.predict(MODEL `gee-bq-experiments.ml_example.stack_overflow_swivel`,(
  SELECT title ,lower(title) AS embedding_input FROM `gee-bq-experiments.ml_example.posts_questions_2022` ))
)) where centroid_id = 2

Query is running:   0%|          |

Downloading:   0%|          |



Unnamed: 0,title
0,Is there already a Delphi timer that executes ...
1,Jenkins - Prefix all logs with node name?
2,Changing The Format Of Json File Using String ...
3,Json to excel from devops to storage account w...
4,How to configure CSP in nginx using nonce appr...
...,...
262232,Is there a way of listening/proxying a variabl...
262233,GraphicExtraLarge complication Apple watchOS a...
262234,element in a array contains double quotation
262235,Creating meme command with discord.py
