Creating a new database

In [None]:
IF NOT EXISTS (SELECT * FROM master.dbo.sysdatabases WHERE name = 'externalDB')
    BEGIN
        CREATE DATABASE externalDB
END;
SELECT * FROM sys.databases;
GO
USE externalDB;
GO

Create a master key in the database

In [None]:
CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'Password1234';

Create a scoped credential for MongoDB (with login credentials)

In [None]:
CREATE DATABASE SCOPED CREDENTIAL MongoDB
WITH IDENTITY = 'root', SECRET = 'password123'

Create external data source using MongoDB login credentials

In [None]:
CREATE EXTERNAL DATA SOURCE MongoDataSource
WITH (
    LOCATION = 'mongodb://10.10.2.218:27017',
    CREDENTIAL = MongoDB,
    connection_options = 'UseDefaultEncryptionOptions=false'
);

(FOR DEBUGGING)

Verify that scoped scoped credentials and data sources were created

In [None]:
USE externalDB
SELECT * FROM sys.database_scoped_credentials
SELECT * FROM sys.external_data_sources

Create external table for MongoDB

In [None]:
BEGIN TRY
    BEGIN TRANSACTION T8c2ce5a2ebc24717ba31a9e954a480c
        USE [externalDB];
        CREATE EXTERNAL TABLE [hdb_database_1].[resalePrices]
        (
            [_id] NVARCHAR(24) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL,
            [id] INT,
            [year] INT,
            [month] INT,
            [date] NVARCHAR(4000) COLLATE SQL_Latin1_General_CP1_CI_AS,
            [resale_price] INT
        )
        WITH (LOCATION = N'[hdb_database_1].[resalePrices]', DATA_SOURCE = [MongoDataSource]);
    COMMIT TRANSACTION T8c2ce5a2ebc24717ba31a9e954a480c
END TRY
BEGIN CATCH
    IF @@TRANCOUNT > 0
        ROLLBACK TRANSACTION T8c2ce5a2ebc24717ba31a9e954a480c
    DECLARE @ErrorMessage NVARCHAR(max) = ERROR_MESSAGE();
    DECLARE @ErrorSeverity INT = ERROR_SEVERITY();
    DECLARE @ErrorState INT = ERROR_STATE();
    RAISERROR(@ErrorMessage, @ErrorSeverity, @ErrorState);
END CATCH;


Data querying from MongoDB external table

In [None]:
SELECT TOP (1000) [_id]
      ,[id]
      ,[year]
      ,[month]
      ,[date]
      ,[resale_price]
  FROM [externalDB].[hdb_database_1].[resalePrices]
  ORDER BY [id] ASC

Create scoped credential for SQL Server

In [None]:
CREATE DATABASE SCOPED CREDENTIAL Ext_SQL_Cred
WITH IDENTITY = 'SA', SECRET = 'Password1234'

Create data source for SQL Server

In [None]:
CREATE EXTERNAL DATA SOURCE Ext_SQL_Source
WITH (
    LOCATION = '10.10.0.178:1430',
    CREDENTIAL = SQL_External,
);

Create external table for SQL Server

In [None]:
BEGIN TRY
    BEGIN TRANSACTION T7edfcf36b59240cea61e2bf10c4132d
        USE [externalDB];
        CREATE EXTERNAL TABLE [dbo].[remainingLease]
        (
            [id] INT NOT NULL,
            [year] INT NOT NULL,
            [month] INT NOT NULL,
            [date] DATETIME2(7) NOT NULL,
            [lease_commence_date] INT NOT NULL,
            [remaining_lease] NVARCHAR(50) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL,
            [remaining_lease_months] INT NOT NULL
        )
        WITH (LOCATION = N'[hdb_database_2].[dbo].[remainingLease]', DATA_SOURCE = [Ext_SQL_Source]);
    COMMIT TRANSACTION T7edfcf36b59240cea61e2bf10c4132d
END TRY
BEGIN CATCH
    IF @@TRANCOUNT > 0
        ROLLBACK TRANSACTION T7edfcf36b59240cea61e2bf10c4132d
    DECLARE @ErrorMessage NVARCHAR(max) = ERROR_MESSAGE();
    DECLARE @ErrorSeverity INT = ERROR_SEVERITY();
    DECLARE @ErrorState INT = ERROR_STATE();
    RAISERROR(@ErrorMessage, @ErrorSeverity, @ErrorState);
END CATCH;


Query data from external SQL Server table

In [None]:
SELECT TOP (1000) [id]
      ,[year]
      ,[month]
      ,[date]
      ,[lease_commence_date]
      ,[remaining_lease]
      ,[remaining_lease_months]
  FROM [externalDB].[dbo].[remainingLease]
  ORDER BY [id] ASC

Create external table for CSV file (HDFS)

In [None]:
BEGIN TRY
    BEGIN TRANSACTION Ta20dc7c52cbc4b168f681e59a1f645b
        USE [externalDB];
        CREATE EXTERNAL FILE FORMAT [FileFormat_resale-prices-4]
            WITH (FORMAT_TYPE = DELIMITEDTEXT, FORMAT_OPTIONS (FIELD_TERMINATOR = N',', STRING_DELIMITER = N'"', FIRST_ROW = 2));
        CREATE EXTERNAL TABLE [dbo].[hdbAddress]
        (
            [id] INT NOT NULL,
            [year] smallint NOT NULL,
            [month] tinyint NOT NULL,
            [date] nvarchar(50) NOT NULL,
            [town] nvarchar(50) NOT NULL,
            [flat_type] nvarchar(50) NOT NULL,
            [block] nvarchar(50) NOT NULL,
            [street_name] nvarchar(50) NOT NULL,
            [storey_range] nvarchar(50) NOT NULL,
            [floor_area_sqm] float NOT NULL,
            [flat_model] nvarchar(50) NOT NULL
        )
        WITH (LOCATION = N'/hdb_csv_files/resale-prices-3.csv', DATA_SOURCE = [SqlStoragePool], FILE_FORMAT = [FileFormat_resale-prices-3]);
    COMMIT TRANSACTION Ta20dc7c52cbc4b168f681e59a1f645b
END TRY
BEGIN CATCH
    IF @@TRANCOUNT > 0
        ROLLBACK TRANSACTION Ta20dc7c52cbc4b168f681e59a1f645b
    DECLARE @ErrorMessage NVARCHAR(max) = ERROR_MESSAGE();
    DECLARE @ErrorSeverity INT = ERROR_SEVERITY();
    DECLARE @ErrorState INT = ERROR_STATE();
    RAISERROR(@ErrorMessage, @ErrorSeverity, @ErrorState);
END CATCH;


Query data from external table (HDFS CSV)

In [None]:
SELECT TOP (1000) [id]
      ,[year]
      ,[month]
      ,[date]
      ,[town]
      ,[flat_type]
      ,[block]
      ,[street_name]
      ,[storey_range]
      ,[floor_area_sqm]
      ,[flat_model]
  FROM [externalDB].[dbo].[hdbAddress]
  ORDER BY [id] ASC

Joining data from the three tables

In [None]:
USE externalDB
GO

SELECT TOP (1000)
[dbo].[hdbAddress].[id], [dbo].[hdbAddress].[year], [dbo].[hdbAddress].[month], [dbo].[hdbAddress].[date],
[dbo].[hdbAddress].[town], [dbo].[hdbAddress].[flat_type], [dbo].[hdbAddress].[block], [dbo].[hdbAddress].[street_name],
[dbo].[hdbAddress].[storey_range], [dbo].[hdbAddress].[floor_area_sqm], [dbo].[hdbAddress].[flat_model],
[dbo].[remainingLease].[lease_commence_date], [dbo].[remainingLease].[remaining_lease], [dbo].[remainingLease].[remaining_lease_months],
[hdb_database_1].[resalePrices].[resale_price]
FROM [dbo].[hdbAddress]
JOIN [dbo].[remainingLease]
ON [dbo].[hdbAddress].[id] = [dbo].[remainingLease].[id]
JOIN [hdb_database_1].[resalePrices]
ON [dbo].[hdbAddress].[id] = [hdb_database_1].[resalePrices].[id]
ORDER BY [dbo].[hdbAddress].[id] ASC

Create a new table to store all the data

In [None]:
Use externalDB;

CREATE TABLE dbo.hdb_resale_prices(
    [id] INT NOT NULL PRIMARY KEY,
    [year] SMALLINT NOT NULL,
    [month] SMALLINT NOT NULL,
    [date] VARCHAR(50) NOT NULL,
    [town] VARCHAR(50) NOT NULL,
    [flat_type] VARCHAR(50) NOT NULL,
    [block] VARCHAR(50) NOT NULL,
    [street_name] VARCHAR(50) NOT NULL,
    [storey_range] VARCHAR(50) NOT NULL,
    [floor_area_sqm] FLOAT NOT NULL,
    [flat_model] VARCHAR(50) NOT NULL,
    [lease_commence_date] SMALLINT NOT NULL,
    [remaining_lease] VARCHAR(50) NOT NULL,
    [remaining_lease_months] INT NOT NULL,
    [resale_price] INT NOT NULL
);
GO

Insert data into new table

In [None]:
INSERT INTO [dbo].[hdb_resale_prices]
SELECT
[dbo].[hdbAddress].[id], [dbo].[hdbAddress].[year], [dbo].[hdbAddress].[month], [dbo].[hdbAddress].[date],
[dbo].[hdbAddress].[town], [dbo].[hdbAddress].[flat_type], [dbo].[hdbAddress].[block], [dbo].[hdbAddress].[street_name],
[dbo].[hdbAddress].[storey_range], [dbo].[hdbAddress].[floor_area_sqm], [dbo].[hdbAddress].[flat_model],
[dbo].[remainingLease].[lease_commence_date], [dbo].[remainingLease].[remaining_lease], [dbo].[remainingLease].[remaining_lease_months],
[hdb_database_1].[resalePrices].[resale_price]
FROM [dbo].[hdbAddress]
JOIN [dbo].[remainingLease]
ON [dbo].[hdbAddress].[id] = [dbo].[remainingLease].[id]
JOIN [hdb_database_1].[resalePrices]
ON [dbo].[hdbAddress].[id] = [hdb_database_1].[resalePrices].[id] 

Query data from new table

In [None]:
SELECT * FROM [dbo].[hdb_resale_prices]

Clean up

In [None]:
IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[airportData].[mailVolume]') AND type in (N'U'))
DROP EXTERNAL TABLE [airportData].[mailVolume]
GO
IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[MonthlyArrivals]') AND type in (N'U'))
DROP EXTERNAL TABLE [dbo].[MonthlyArrivals]
GO