Creating a new database

In [None]:
IF NOT EXISTS (SELECT * FROM master.dbo.sysdatabases WHERE name = 'externalDB')
    BEGIN
        CREATE DATABASE externalDB
END;
SELECT * FROM sys.databases;
GO
USE externalDB;
GO

Create a master key in the database

In [None]:
CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'Password1234';

Create a scoped credential for MongoDB (with login credentials)

In [None]:
CREATE DATABASE SCOPED CREDENTIAL MongoDB
WITH IDENTITY = 'root', SECRET = 'password123'

Create external data source using MongoDB login credentials

In [None]:
CREATE EXTERNAL DATA SOURCE MongoDataSource
WITH (
    LOCATION = 'mongodb://10.10.2.218:27017',
    CREDENTIAL = MongoDB,
    connection_options = 'UseDefaultEncryptionOptions=false'
);

(FOR DEBUGGING)

Verify that scoped scoped credentials and data sources were created

In [None]:
USE externalDB
SELECT * FROM sys.database_scoped_credentials
SELECT * FROM sys.external_data_sources

Create external table for MongoDB

In [4]:
BEGIN TRY
    BEGIN TRANSACTION T8c2ce5a2ebc24717ba31a9e954a480c
        USE [externalDB];
        CREATE EXTERNAL TABLE [hdb_database_1].[resalePrices]
        (
            [_id] NVARCHAR(24) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL,
            [id] INT,
            [year] INT,
            [month] INT,
            [date] NVARCHAR(4000) COLLATE SQL_Latin1_General_CP1_CI_AS,
            [resale_price] INT
        )
        WITH (LOCATION = N'[hdb_database_1].[resalePrices]', DATA_SOURCE = [MongoDataSource]);
    COMMIT TRANSACTION T8c2ce5a2ebc24717ba31a9e954a480c
END TRY
BEGIN CATCH
    IF @@TRANCOUNT > 0
        ROLLBACK TRANSACTION T8c2ce5a2ebc24717ba31a9e954a480c
    DECLARE @ErrorMessage NVARCHAR(max) = ERROR_MESSAGE();
    DECLARE @ErrorSeverity INT = ERROR_SEVERITY();
    DECLARE @ErrorState INT = ERROR_STATE();
    RAISERROR(@ErrorMessage, @ErrorSeverity, @ErrorState);
END CATCH;


Data querying from MongoDB external table

Create external table for SQL Server

In [6]:
SELECT TOP (1000) [_id]
      ,[id]
      ,[year]
      ,[month]
      ,[date]
      ,[resale_price]
  FROM [externalDB].[hdb_database_1].[resalePrices]

_id,id,year,month,date,resale_price
600e6949299b9c1730d2e5aa,17,2017,1,2017-01,300000
600e6949299b9c1730d2e5ab,18,2017,1,2017-01,301000
600e6949299b9c1730d2e5ac,19,2017,1,2017-01,306000
600e6949299b9c1730d2e5ad,20,2017,1,2017-01,312000
600e6949299b9c1730d2e5c6,45,2017,1,2017-01,518000
600e6949299b9c1730d2e5c7,46,2017,1,2017-01,560000
600e6949299b9c1730d2e5c8,47,2017,1,2017-01,688000
600e6949299b9c1730d2e5c9,48,2017,1,2017-01,730000
600e6949299b9c1730d2e5e6,77,2017,1,2017-01,310000
600e6949299b9c1730d2e5e7,78,2017,1,2017-01,310000


In [5]:
BEGIN TRY
    BEGIN TRANSACTION T7edfcf36b59240cea61e2bf10c4132d
        USE [externalDB];
        CREATE EXTERNAL TABLE [dbo].[remainingLease]
        (
            [id] INT NOT NULL,
            [year] INT NOT NULL,
            [month] INT NOT NULL,
            [date] DATETIME2(7) NOT NULL,
            [lease_commence_date] INT NOT NULL,
            [remaining_lease] NVARCHAR(50) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL,
            [remaining_lease_months] INT NOT NULL
        )
        WITH (LOCATION = N'[hdb_database_2].[dbo].[remainingLease]', DATA_SOURCE = [SQL_External]);
    COMMIT TRANSACTION T7edfcf36b59240cea61e2bf10c4132d
END TRY
BEGIN CATCH
    IF @@TRANCOUNT > 0
        ROLLBACK TRANSACTION T7edfcf36b59240cea61e2bf10c4132d
    DECLARE @ErrorMessage NVARCHAR(max) = ERROR_MESSAGE();
    DECLARE @ErrorSeverity INT = ERROR_SEVERITY();
    DECLARE @ErrorState INT = ERROR_STATE();
    RAISERROR(@ErrorMessage, @ErrorSeverity, @ErrorState);
END CATCH;


Query data from external SQL Server table

In [7]:
SELECT TOP (1000) [id]
      ,[year]
      ,[month]
      ,[date]
      ,[lease_commence_date]
      ,[remaining_lease]
      ,[remaining_lease_months]
  FROM [externalDB].[dbo].[remainingLease]

id,year,month,date,lease_commence_date,remaining_lease,remaining_lease_months
1,2017,1,2017-01-01 00:00:00.0000000,1979,61 years 04 months,732
2,2017,1,2017-01-01 00:00:00.0000000,1978,60 years 07 months,720
3,2017,1,2017-01-01 00:00:00.0000000,1980,62 years 05 months,744
4,2017,1,2017-01-01 00:00:00.0000000,1980,62 years 01 month,744
5,2017,1,2017-01-01 00:00:00.0000000,1980,62 years 05 months,744
6,2017,1,2017-01-01 00:00:00.0000000,1981,63 years,756
7,2017,1,2017-01-01 00:00:00.0000000,1979,61 years 06 months,732
8,2017,1,2017-01-01 00:00:00.0000000,1976,58 years 04 months,696
9,2017,1,2017-01-01 00:00:00.0000000,1979,61 years 06 months,732
10,2017,1,2017-01-01 00:00:00.0000000,1979,61 years 04 months,732


Create external table for CSV file (HDFS)

In [15]:
BEGIN TRY
    BEGIN TRANSACTION Ta20dc7c52cbc4b168f681e59a1f645b
        USE [externalDB];
        CREATE EXTERNAL FILE FORMAT [FileFormat_resale-prices-2]
            WITH (FORMAT_TYPE = DELIMITEDTEXT, FORMAT_OPTIONS (FIELD_TERMINATOR = N',', STRING_DELIMITER = N'"', FIRST_ROW = 2));
        CREATE EXTERNAL TABLE [dbo].[hdbAddress]
        (
            [id] INT NOT NULL,
            [year] smallint NOT NULL,
            [month] tinyint NOT NULL,
            [date] nvarchar(50) NOT NULL,
            [town] nvarchar(50) NOT NULL,
            [flat_type] nvarchar(50) NOT NULL,
            [block] nvarchar(50) NOT NULL,
            [street_name] nvarchar(50) NOT NULL,
            [storey_range] nvarchar(50) NOT NULL,
            [floor_area_sqm] float NOT NULL,
            [flat_model] nvarchar(50) NOT NULL
        )
        WITH (LOCATION = N'/hdb_csv_files/resale-prices-3.csv', DATA_SOURCE = [SqlStoragePool], FILE_FORMAT = [FileFormat_resale-prices-3]);
    COMMIT TRANSACTION Ta20dc7c52cbc4b168f681e59a1f645b
END TRY
BEGIN CATCH
    IF @@TRANCOUNT > 0
        ROLLBACK TRANSACTION Ta20dc7c52cbc4b168f681e59a1f645b
    DECLARE @ErrorMessage NVARCHAR(max) = ERROR_MESSAGE();
    DECLARE @ErrorSeverity INT = ERROR_SEVERITY();
    DECLARE @ErrorState INT = ERROR_STATE();
    RAISERROR(@ErrorMessage, @ErrorSeverity, @ErrorState);
END CATCH;


Query data from external table (HDFS CSV)

In [16]:
SELECT TOP (1000) [id]
      ,[year]
      ,[month]
      ,[date]
      ,[town]
      ,[flat_type]
      ,[block]
      ,[street_name]
      ,[storey_range]
      ,[floor_area_sqm]
      ,[flat_model]
  FROM [externalDB].[dbo].[hdbAddress]

id,year,month,date,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model
83,2017,1,2017-01,BEDOK,3 ROOM,709,BEDOK RESERVOIR RD,07 TO 09,68,New Generation
84,2017,1,2017-01,BEDOK,3 ROOM,425,BEDOK NTH RD,10 TO 12,67,New Generation
85,2017,1,2017-01,BEDOK,3 ROOM,117,BEDOK NTH RD,04 TO 06,73,New Generation
86,2017,1,2017-01,BEDOK,3 ROOM,423,BEDOK NTH AVE 1,10 TO 12,82,New Generation
87,2017,1,2017-01,BEDOK,3 ROOM,42,CHAI CHEE ST,01 TO 03,82,New Generation
88,2017,1,2017-01,BEDOK,3 ROOM,533,BEDOK NTH ST 3,04 TO 06,82,New Generation
89,2017,1,2017-01,BEDOK,4 ROOM,149,BEDOK RESERVOIR RD,04 TO 06,84,Simplified
90,2017,1,2017-01,BEDOK,4 ROOM,35,BEDOK STH AVE 2,13 TO 15,90,Improved
91,2017,1,2017-01,BEDOK,4 ROOM,42,BEDOK STH RD,04 TO 06,84,Improved
92,2017,1,2017-01,BEDOK,4 ROOM,34,BEDOK STH AVE 2,10 TO 12,90,Improved


Joining data from the three tables

In [19]:
SELECT TOP (1000)
[dbo].[hdbAddress].[id], [dbo].[hdbAddress].[year], [dbo].[hdbAddress].[month], [dbo].[hdbAddress].[date],
[dbo].[hdbAddress].[town], [dbo].[hdbAddress].[flat_type], [dbo].[hdbAddress].[block], [dbo].[hdbAddress].[street_name],
[dbo].[hdbAddress].[storey_range], [dbo].[hdbAddress].[floor_area_sqm], [dbo].[hdbAddress].[flat_model],
[dbo].[remainingLease].[lease_commence_date], [dbo].[remainingLease].[remaining_lease], [dbo].[remainingLease].[remaining_lease_months],
[hdb_database_1].[resalePrices].[resale_price]
FROM [dbo].[hdbAddress]
JOIN [dbo].[remainingLease]
ON [dbo].[hdbAddress].[id] = [dbo].[remainingLease].[id]
JOIN [hdb_database_1].[resalePrices]
ON [dbo].[hdbAddress].[id] = [hdb_database_1].[resalePrices].[id]
ORDER BY [dbo].[hdbAddress].[id] ASC

id,year,month,date,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,remaining_lease_months,resale_price
1,2017,1,2017-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,10 TO 12,44,Improved,1979,61 years 04 months,732,232000
2,2017,1,2017-01,ANG MO KIO,3 ROOM,108,ANG MO KIO AVE 4,01 TO 03,67,New Generation,1978,60 years 07 months,720,250000
3,2017,1,2017-01,ANG MO KIO,3 ROOM,602,ANG MO KIO AVE 5,01 TO 03,67,New Generation,1980,62 years 05 months,744,262000
4,2017,1,2017-01,ANG MO KIO,3 ROOM,465,ANG MO KIO AVE 10,04 TO 06,68,New Generation,1980,62 years 01 month,744,265000
5,2017,1,2017-01,ANG MO KIO,3 ROOM,601,ANG MO KIO AVE 5,01 TO 03,67,New Generation,1980,62 years 05 months,744,265000
6,2017,1,2017-01,ANG MO KIO,3 ROOM,150,ANG MO KIO AVE 5,01 TO 03,68,New Generation,1981,63 years,756,275000
7,2017,1,2017-01,ANG MO KIO,3 ROOM,447,ANG MO KIO AVE 10,04 TO 06,68,New Generation,1979,61 years 06 months,732,280000
8,2017,1,2017-01,ANG MO KIO,3 ROOM,218,ANG MO KIO AVE 1,04 TO 06,67,New Generation,1976,58 years 04 months,696,285000
9,2017,1,2017-01,ANG MO KIO,3 ROOM,447,ANG MO KIO AVE 10,04 TO 06,68,New Generation,1979,61 years 06 months,732,285000
10,2017,1,2017-01,ANG MO KIO,3 ROOM,571,ANG MO KIO AVE 3,01 TO 03,67,New Generation,1979,61 years 04 months,732,285000


Dropping the tables

In [None]:
IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[airportData].[mailVolume]') AND type in (N'U'))
DROP EXTERNAL TABLE [airportData].[mailVolume]
GO
IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[MonthlyArrivals]') AND type in (N'U'))
DROP EXTERNAL TABLE [dbo].[MonthlyArrivals]
GO

Dropping the database

In [None]:
USE master;
GO
ALTER DATABASE externalDB SET SINGLE_USER WITH ROLLBACK IMMEDIATE;
GO
DROP DATABASE externalDB;
GO