Change database context to externalDB

In [None]:
USE externalDB;
GO

Create schema for external databases

In [None]:
IF NOT EXISTS (SELECT * FROM sys.schemas WHERE NAME = 'mongodb')
BEGIN
    EXEC('CREATE SCHEMA mongodb')
END;

IF NOT EXISTS (SELECT * FROM sys.schemas WHERE NAME = 'extsql')
BEGIN
    EXEC('CREATE SCHEMA extsql')
END;

IF NOT EXISTS (SELECT * FROM sys.schemas WHERE NAME = 'hdfs')
BEGIN
    EXEC('CREATE SCHEMA hdfs')
END;

IF NOT EXISTS (SELECT * FROM sys.schemas WHERE NAME = 'bdc')
BEGIN
    EXEC('CREATE SCHEMA bdc')
END;

Check if desired schemas have been created

In [None]:
SELECT * 
FROM sys.schemas 
WHERE NAME IN ('extsql', 'hdfs', 'mongodb', 'bdc')

Create external table for MongoDB

In [None]:
BEGIN TRY
    BEGIN TRANSACTION T8c2ce5a2ebc24717ba31a9e954a480c
        USE [externalDB];
        CREATE EXTERNAL TABLE [mongodb].[resalePrices]
        (
            [_id] NVARCHAR(24) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL,
            [id] INT,
            [year] INT,
            [month] INT,
            [date] NVARCHAR(4000) COLLATE SQL_Latin1_General_CP1_CI_AS,
            [resale_price] INT
        )
        WITH (LOCATION = N'[hdb_database_1].[resalePrices]', DATA_SOURCE = [MongoDataSource]);
    COMMIT TRANSACTION T8c2ce5a2ebc24717ba31a9e954a480c
END TRY
BEGIN CATCH
    IF @@TRANCOUNT > 0
        ROLLBACK TRANSACTION T8c2ce5a2ebc24717ba31a9e954a480c
    DECLARE @ErrorMessage NVARCHAR(max) = ERROR_MESSAGE();
    DECLARE @ErrorSeverity INT = ERROR_SEVERITY();
    DECLARE @ErrorState INT = ERROR_STATE();
    RAISERROR(@ErrorMessage, @ErrorSeverity, @ErrorState);
END CATCH;


Data querying from MongoDB external table

In [None]:
SELECT TOP (1000) [_id]
      ,[id]
      ,[year]
      ,[month]
      ,[date]
      ,[resale_price]
  FROM [externalDB].[mongodb].[resalePrices]
  ORDER BY [id] ASC

Create external table for SQL Server

In [None]:
BEGIN TRY
    BEGIN TRANSACTION T7edfcf36b59240cea61e2bf10c4132d
        USE [externalDB];
        CREATE EXTERNAL TABLE [extsql].[remainingLease]
        (
            [id] INT NOT NULL,
            [year] INT NOT NULL,
            [month] INT NOT NULL,
            [date] DATETIME2(7) NOT NULL,
            [lease_commence_date] INT NOT NULL,
            [remaining_lease] NVARCHAR(50) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL,
            [remaining_lease_months] INT NOT NULL
        )
        WITH (LOCATION = N'[hdb_database_2].[dbo].[remainingLease]', DATA_SOURCE = [SQL_External]);
    COMMIT TRANSACTION T7edfcf36b59240cea61e2bf10c4132d
END TRY
BEGIN CATCH
    IF @@TRANCOUNT > 0
        ROLLBACK TRANSACTION T7edfcf36b59240cea61e2bf10c4132d
    DECLARE @ErrorMessage NVARCHAR(max) = ERROR_MESSAGE();
    DECLARE @ErrorSeverity INT = ERROR_SEVERITY();
    DECLARE @ErrorState INT = ERROR_STATE();
    RAISERROR(@ErrorMessage, @ErrorSeverity, @ErrorState);
END CATCH;


Query data from external SQL Server table

In [None]:
SELECT TOP (1000) [id]
      ,[year]
      ,[month]
      ,[date]
      ,[lease_commence_date]
      ,[remaining_lease]
      ,[remaining_lease_months]
  FROM [externalDB].[extsql].[remainingLease]
  ORDER BY [id] ASC

Create external table for CSV file (HDFS)

In [None]:
BEGIN TRY
    BEGIN TRANSACTION Ta20dc7c52cbc4b168f681e59a1f645b
        USE [externalDB];
        IF NOT EXISTS (SELECT * FROM sys.external_file_formats WHERE name = 'FileFormat_resale-prices-3')
            CREATE EXTERNAL FILE FORMAT [FileFormat_resale-prices-3]
            WITH (FORMAT_TYPE = DELIMITEDTEXT, FORMAT_OPTIONS (FIELD_TERMINATOR = N',', STRING_DELIMITER = N'"', FIRST_ROW = 2));
        CREATE EXTERNAL TABLE [hdfs].[hdbAddress]
        (
            [id] INT NOT NULL,
            [year] smallint NOT NULL,
            [month] tinyint NOT NULL,
            [date] nvarchar(50) NOT NULL,
            [town] nvarchar(50) NOT NULL,
            [flat_type] nvarchar(50) NOT NULL,
            [block] nvarchar(50) NOT NULL,
            [street_name] nvarchar(50) NOT NULL,
            [storey_range] nvarchar(50) NOT NULL,
            [floor_area_sqm] float NOT NULL,
            [flat_model] nvarchar(50) NOT NULL
        )
        WITH (LOCATION = N'/hdb_csv_files/resale-prices-3.csv', DATA_SOURCE = [SqlStoragePool], FILE_FORMAT = [FileFormat_resale-prices-3]);
    COMMIT TRANSACTION Ta20dc7c52cbc4b168f681e59a1f645b
END TRY
BEGIN CATCH
    IF @@TRANCOUNT > 0
        ROLLBACK TRANSACTION Ta20dc7c52cbc4b168f681e59a1f645b
    DECLARE @ErrorMessage NVARCHAR(max) = ERROR_MESSAGE();
    DECLARE @ErrorSeverity INT = ERROR_SEVERITY();
    DECLARE @ErrorState INT = ERROR_STATE();
    RAISERROR(@ErrorMessage, @ErrorSeverity, @ErrorState);
END CATCH;


Query data from external table (HDFS CSV)

In [None]:
SELECT TOP (1000) [id]
      ,[year]
      ,[month]
      ,[date]
      ,[town]
      ,[flat_type]
      ,[block]
      ,[street_name]
      ,[storey_range]
      ,[floor_area_sqm]
      ,[flat_model]
  FROM [externalDB].[hdfs].[hdbAddress]
  ORDER BY [id] ASC

Joining data from the three tables

In [None]:
USE externalDB
GO

SELECT TOP (1000)
[hdfs].[hdbAddress].[id], [hdfs].[hdbAddress].[year], [hdfs].[hdbAddress].[month], [hdfs].[hdbAddress].[date],
[hdfs].[hdbAddress].[town], [hdfs].[hdbAddress].[flat_type], [hdfs].[hdbAddress].[block], [hdfs].[hdbAddress].[street_name],
[hdfs].[hdbAddress].[storey_range], [hdfs].[hdbAddress].[floor_area_sqm], [hdfs].[hdbAddress].[flat_model],
[extsql].[remainingLease].[lease_commence_date], [extsql].[remainingLease].[remaining_lease], [extsql].[remainingLease].[remaining_lease_months],
[mongodb].[resalePrices].[resale_price]
FROM [hdfs].[hdbAddress]
JOIN [extsql].[remainingLease]
ON [hdfs].[hdbAddress].[id] = [extsql].[remainingLease].[id]
JOIN [mongodb].[resalePrices]
ON [hdfs].[hdbAddress].[id] = [mongodb].[resalePrices].[id]
ORDER BY [hdfs].[hdbAddress].[id] ASC

Create a new table to store all the data

In [None]:
Use externalDB;

CREATE TABLE [bdc].[hdb_resale_prices](
    [id] INT NOT NULL PRIMARY KEY,
    [year] SMALLINT NOT NULL,
    [month] SMALLINT NOT NULL,
    [date] VARCHAR(50) NOT NULL,
    [town] VARCHAR(50) NOT NULL,
    [flat_type] VARCHAR(50) NOT NULL,
    [block] VARCHAR(50) NOT NULL,
    [street_name] VARCHAR(50) NOT NULL,
    [storey_range] VARCHAR(50) NOT NULL,
    [floor_area_sqm] FLOAT NOT NULL,
    [flat_model] VARCHAR(50) NOT NULL,
    [lease_commence_date] SMALLINT NOT NULL,
    [remaining_lease] VARCHAR(50) NOT NULL,
    [remaining_lease_months] INT NOT NULL,
    [resale_price] INT NOT NULL
);
GO

Insert data into new table

In [None]:
INSERT INTO [bdc].[hdb_resale_prices]
SELECT
[hdfs].[hdbAddress].[id], [hdfs].[hdbAddress].[year], [hdfs].[hdbAddress].[month], [hdfs].[hdbAddress].[date],
[hdfs].[hdbAddress].[town], [hdfs].[hdbAddress].[flat_type], [hdfs].[hdbAddress].[block], [hdfs].[hdbAddress].[street_name],
[hdfs].[hdbAddress].[storey_range], [hdfs].[hdbAddress].[floor_area_sqm], [hdfs].[hdbAddress].[flat_model],
[extsql].[remainingLease].[lease_commence_date], [extsql].[remainingLease].[remaining_lease], [extsql].[remainingLease].[remaining_lease_months],
[mongodb].[resalePrices].[resale_price]
FROM [hdfs].[hdbAddress]
JOIN [extsql].[remainingLease]
ON [hdfs].[hdbAddress].[id] = [extsql].[remainingLease].[id]
JOIN [mongodb].[resalePrices]
ON [hdfs].[hdbAddress].[id] = [mongodb].[resalePrices].[id] 

Query data from new table

In [None]:
SELECT * FROM [bdc].[hdb_resale_prices]

Clean up

In [None]:
IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[bdc].[hdb_resale_prices]') AND type in (N'U'))
DROP EXTERNAL TABLE [bdc].[hdb_resale_prices]
GO
IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[extsql].[remainingLease]') AND type in (N'U'))
    DROP EXTERNAL TABLE [extsql].[remainingLease]
    GO
IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[hdfs].[hdbAddress]') AND type in (N'U'))
    DROP EXTERNAL TABLE [hdfs].[hdbAddress]
    GO    
IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[mongodb].[resalePrices]') AND type in (N'U'))
    DROP EXTERNAL TABLE [mongodb].[resalePrices]
    GO    