# <b>Smooke Test</b>

Running queries in created tables from:

* Generated temporal data
* S3 data
* GS data

## <b>Init Context</b>

In [1]:
import dask

In [2]:
from dask.distributed import Client

In [3]:
ip_port_dask_scheduller = '3.89.185.184:8786'
network_interface = 'ens5' 

In [4]:
client = Client(ip_port_dask_scheduller)

In [23]:
client

0,1
Client  Scheduler: tcp://3.89.185.184:8786  Dashboard: http://3.89.185.184:8787/status,Cluster  Workers: 1  Cores: 4  Memory: 16.48 GB


In [6]:
from blazingsql import BlazingContext

In [7]:
bc = BlazingContext(dask_client = client, network_interface='ens5')

BlazingContext ready


## <b>Generated temporal data</b>

In [8]:
import cudf

In [9]:
num_partitions=2
data_size=1000

In [10]:
df2 = cudf.DataFrame()

In [11]:
df2['col1'] = cudf.Series(list(range(data_size)))

In [12]:
df2['col2'] = cudf.Series(list(range(data_size)))

In [13]:
import dask_cudf

In [14]:
ds2 = dask_cudf.from_cudf(df2,npartitions=num_partitions)

In [15]:
df = cudf.DataFrame()

In [16]:
df['col1'] = cudf.Series(list(range(data_size)))

In [17]:
df['col2'] = cudf.Series(list(range(data_size)))

In [18]:
ds = dask_cudf.from_cudf(df,npartitions=num_partitions)

In [19]:
t = bc.create_table("dask_table",ds)

In [20]:
t = bc.create_table("dask_table2",ds2)

In [21]:
test = bc.sql('select * from dask_table inner join dask_table2 on dask_table2.col1 = dask_table.col1')

In [22]:
print(test.head(10))

   col1  col2  col10  col20
0   992   992    992    992
1   993   993    993    993
2   994   994    994    994
3   995   995    995    995
4   996   996    996    996
5   997   997    997    997
6   998   998    998    998
7   999   999    999    999
8   864   864    864    864
9   865   865    865    865


## <b>Data from S3</b>

In [24]:
from blazingsql import S3EncryptionType

In [25]:
authority = "tpch_s3"

In [26]:
bc.s3(authority, bucket_name='blazingsql-bucket', encryption_type=S3EncryptionType.NONE,
	access_key_id='AKIAJGB3SR3IXU3TE5WA', secret_key='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')

(True,
 '',
 OrderedDict([('type', 's3'),
              ('bucket_name', 'blazingsql-bucket'),
              ('access_key_id', 'AKIAJGB3SR3IXU3TE5WA'),
              ('secret_key', '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'),
              ('session_token', ''),
              ('encryption_type', <S3EncryptionType.NONE: 1>),
              ('kms_key_amazon_resource_name', ''),
              ('endpoint_override', ''),
              ('region', '')]))

In [27]:
dir_data_lc = "s3://" + authority + "/" + "DataSet100Mb2part/" + "tpch/"

In [28]:
ext = "parquet"

In [29]:
table_files_nation = ("%s/%s_[0-9]*.%s") % (dir_data_lc, "nation", ext)

In [30]:
table_files_region = ("%s/%s_[0-9]*.%s") % (dir_data_lc, "region", ext)

In [31]:
print("nation files: " + table_files_nation)

nation files: s3://tpch_s3/DataSet100Mb2part/tpch//nation_[0-9]*.parquet


In [32]:
bc.create_table("nation", table_files_nation)
print("nation table created!")

nation table created!


In [33]:
bc.create_table("region", table_files_region)
print("region table created!")

region table created!


In [34]:
query = """select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey 
from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 
where n1.n_nationkey < 10 and n1.n_nationkey > 5"""

In [35]:
test = bc.sql(query)

In [36]:
print(test.compute())

   n1key  n2key  EXPR$2
0      6      0       6
1      7      1       8
2      8      2      10
3      9      3      12


## <b>Data from GS</b>

In [37]:
authority = "tpch_gs"

In [39]:
bc.gs(authority,
        project_id="blazingdb-jenkins",
        bucket_name="blazingsql-test",
        use_default_adc_json_file=True,
        adc_json_file='')

Couldn't create gcs::ClientOptions for Project ID blazingdb-jenkins status=Could not automatically determine credentials. For more information, please see https://developers.google.com/identity/protocols/application-default-credentials with dask worker
tcp://172.31.28.252:32783


(False,
 "Couldn't create gcs::ClientOptions for Project ID blazingdb-jenkins status=Could not automatically determine credentials. For more information, please see https://developers.google.com/identity/protocols/application-default-credentials",
 OrderedDict([('type', 'gs'),
              ('project_id', 'blazingdb-jenkins'),
              ('bucket_name', 'blazingsql-test'),
              ('use_default_adc_json_file', True),
              ('adc_json_file', '')]))