# Big Data Cluster の環境準備

## 1. BDC への接続
1. IP の確認
```
kubectl get service -n mssql-cluster | findstr "31433"
```
2. Azure Data Sutdio で接続  
3. デモ用ディレクトリを開く

## 2. HDFS の環境準備
1. HDFS に clickstream_data ディレクトリを作成  
  
```
$KNOX_ENDPOINT = $ENV:KNOX_ENDPOINT # KNOX Endpoint (xxx.xxx.xxx.xxx:30443)
$KNOX_USER = "root"
$KNOX_PASSWORD = $ENV:KNOX_PASSWORD
$KNOX_PASSWORD = ConvertTo-SecureString -String $KNOX_PASSWORD -AsPlainText -Force

Add-Type @"
    using System.Net;
    using System.Security.Cryptography.X509Certificates;
    public class TrustAllCertsPolicy : ICertificatePolicy {
        public bool CheckValidationResult(
            ServicePoint srvPoint, X509Certificate certificate,
            WebRequest request, int certificateProblem) {
            return true;
        }
    }
"@
[System.Net.ServicePointManager]::CertificatePolicy = New-Object TrustAllCertsPolicy

$cred = New-Object System.Management.Automation.PSCredential($KNOX_USER,$KNOX_PASSWORD)
Invoke-WebRequest -Credential $cred -Method Put -Uri "https://${KNOX_ENDPOINT}/gateway/default/webhdfs/v1/clickstream_data?op=MKDIRS" 

```
  
2. ファイルをアップロード  
```
$FilePath = "C:\Users\decodeadmin\Desktop\Demo\SampleData"
1..3 | %{
    Invoke-WebRequest -Credential $cred -Method Put -Uri "https://${KNOX_ENDPOINT}/gateway/default/webhdfs/v1/clickstream_data/web_clickstream_$(${_}.ToString("00")).csv?op=create&overwrite=true"  -InFile (Join-Path "${FilePath}"  "web_clickstream_$(${_}.ToString("00")).csv")
}
```
3. HDFS 階層化の準備
```
mssqlctl login -n mssql-cluster -u $ENV:MGMTPROXY_LOGIN

mssqlctl cluster storage-pool mount create --remote-uri "abfs://azureblob@$($ENV:ADLGen2_ACCOUNT).dfs.core.windows.net/" --mount-path /mounts/azureblob --credential-file "C:\Users\decodeadmin\Desktop\Demo\00.Setup\01.環境構築\files.creds"

# パーミッションの調整
kubectl exec -n mssql-cluster -it master-0 -c hadoop /bin/bash
hdfs dfs -ls -r /mounts/azureblob
hdfs dfs -chmod -R o+rx /mounts/azureblob
exit

mssqlctl cluster storage-pool mount create --remote-uri "s3a://$($ENV:S3_ACCOUNT)/" --mount-path /mounts/aws --credential-file "C:\Users\decodeadmin\Desktop\Demo\00.Setup\01.環境構築\s3files.creds"

mssqlctl cluster storage-pool mount status
```


## 3. Storage Pool の外部データソースの作成  
https://docs.microsoft.com/en-us/sql/relational-databases/polybase/data-virtualization-csv?view=sqlallproducts-allversions

In [1]:
USE [model];

IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlStoragePool')
BEGIN
    CREATE EXTERNAL DATA SOURCE SqlStoragePool
    WITH (LOCATION = 'sqlhdfs://controller-svc:8080/default');
END
GO

## 4. SQL Data Pool の外部データソースの作成  
https://docs.microsoft.com/en-us/sql/big-data-cluster/tutorial-data-pool-ingest-sql?view=sqlallproducts-allversions

In [2]:
USE [model];

IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlDataPool')
BEGIN
    CREATE EXTERNAL DATA SOURCE SqlDataPool
    WITH (LOCATION = 'sqldatapool://controller-svc:8080/datapools/default');
END
GO

## 5. デモ用データベースの作成
データベースの作成と、作成したデータベースに外部データソース作成のためのマスターキーの作成

In [3]:
USE [master];

-- データ仮想化用 DB
DROP DATABASE IF EXISTS [DataVirtualization];
GO
CREATE DATABASE [DataVirtualization];
GO
USE [DataVirtualization];
CREATE MASTER KEY ENCRYPTION BY PASSWORD = '23987hxJ#KL95234nl0zBe'; 
GO

-- StoragePool 用 DB
DROP DATABASE IF EXISTS [StoragePool];
GO
CREATE DATABASE [StoragePool];
GO
USE [StoragePool];
CREATE MASTER KEY ENCRYPTION BY PASSWORD = '23987hxJ#KL95234nl0zBe'; 
GO

-- Data Pool 用 DB
DROP DATABASE IF EXISTS [DataPool];
GO
CREATE DATABASE [DataPool];
GO
USE [DataPool];
CREATE MASTER KEY ENCRYPTION BY PASSWORD = '23987hxJ#KL95234nl0zBe'; 
GO
-- ML Service 用 DB
DROP DATABASE IF EXISTS [MLService];
GO
CREATE DATABASE [MLService]
GO

## 6. リンクサーバーの作成

In [4]:
USE [master];

EXEC master.dbo.sp_addlinkedserver @server = N'DATA-0-0.DATA-0-SVC.MSSQL-CLUSTER.SVC.CLUSTER.LOCAL', @srvproduct=N'SQL Server'
EXEC master.dbo.sp_addlinkedsrvlogin @rmtsrvname=N'DATA-0-0.DATA-0-SVC.MSSQL-CLUSTER.SVC.CLUSTER.LOCAL',@useself=N'True',@locallogin=NULL,@rmtuser=NULL,@rmtpassword=NULL
GO

EXEC master.dbo.sp_addlinkedserver @server = N'DATA-0-1.DATA-0-SVC.MSSQL-CLUSTER.SVC.CLUSTER.LOCAL', @srvproduct=N'SQL Server'
EXEC master.dbo.sp_addlinkedsrvlogin @rmtsrvname=N'DATA-0-1.DATA-0-SVC.MSSQL-CLUSTER.SVC.CLUSTER.LOCAL',@useself=N'True',@locallogin=NULL,@rmtuser=NULL,@rmtpassword=NULL
GO

In [5]:
SELECT * FROM [DATA-0-0.DATA-0-SVC.MSSQL-CLUSTER.SVC.CLUSTER.LOCAL].master.sys.servers
SELECT * FROM [DATA-0-1.DATA-0-SVC.MSSQL-CLUSTER.SVC.CLUSTER.LOCAL].master.sys.servers

server_id,name,product,provider,data_source,location,provider_string,catalog,connect_timeout,query_timeout,is_linked,is_remote_login_enabled,is_rpc_out_enabled,is_data_access_enabled,is_collation_compatible,uses_remote_collation,collation_name,lazy_schema_validation,is_system,is_publisher,is_subscriber,is_distributor,is_nonsql_subscriber,is_remote_proc_transaction_promotion_enabled,modify_date,is_rda_server
0,data-0-0,SQL Server,SQLNCLI,data-0-0,,,,0,0,0,1,1,0,0,1,,0,0,0,0,0,0,0,2019-05-22 12:19:40.697,0


server_id,name,product,provider,data_source,location,provider_string,catalog,connect_timeout,query_timeout,is_linked,is_remote_login_enabled,is_rpc_out_enabled,is_data_access_enabled,is_collation_compatible,uses_remote_collation,collation_name,lazy_schema_validation,is_system,is_publisher,is_subscriber,is_distributor,is_nonsql_subscriber,is_remote_proc_transaction_promotion_enabled,modify_date,is_rda_server
0,data-0-1,SQL Server,SQLNCLI,data-0-1,,,,0,0,0,1,1,0,0,1,,0,0,0,0,0,0,0,2019-05-22 12:19:39.737,0
