In [None]:
DROP PROCEDURE IF EXISTS generate_model;
GO

CREATE PROCEDURE generate_model (@trained_model varbinary(max) OUTPUT)
AS 
BEGIN    
    EXECUTE sp_execute_external_script
            @language = N'Python',
            @script = N'
import pandas
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import pickle

raw_dataframe = input_data

index_start = 0
index_stop = 9999

independent_variables = [''month_no'',''total_traffic''] #input any number of variables
dependent_variable = [''total_passengers''] #only input one variable

independent_dataset = raw_dataframe[independent_variables][index_start:index_stop]
dependent_dataset = raw_dataframe[dependent_variable][index_start:index_stop]

# Further splitting dataset into training and testing subsets
test_ratio=0.2 #splits the data into testing and training sets with ratio 0.2
indp_train_set, indp_test_set, dep_train_set, dep_test_set = train_test_split(independent_dataset, 
                                                                              dependent_dataset, 
                                                                              test_size=test_ratio)

linear_model = LinearRegression()
linear_model.fit(indp_train_set, dep_train_set)

trained_model = pickle.dumps(linear_model)
'
, @input_data_1 = N'SELECT * FROM [externalDB].[dbo].[data]'
, @input_data_1_name = N'input_data'
, @params = N'@trained_model varbinary(max) OUTPUT'
, @trained_model = @trained_model OUTPUT;
END; 
GO

In [None]:
Use externalDB;
DROP TABLE IF EXISTS dbo.airport_py_models;
GO
CREATE TABLE dbo.airport_py_models(
    model_name VARCHAR(30) NOT NULL DEFAULT('default model') PRIMARY KEY,
    model VARBINARY(MAX) NOT NULL
);
GO

In [None]:
-- Execute if model has not been created
DECLARE @model VARBINARY(MAX);
EXECUTE generate_airport_py_model @model OUTPUT;

INSERT INTO [dbo].[airport_py_models] (model_name, model) VALUES ('linear_model', @model)

In [None]:
--Execute if model has already been created
DECLARE @model VARBINARY(MAX);
EXECUTE generate_airport_py_model @model OUTPUT;

UPDATE [dbo].[airport_py_models] SET model_name = 'linear_model', model = @model WHERE model_name = 'linear_model'

In [None]:
SELECT model FROM dbo.airport_py_models WHERE model_name = 'linear_model'

In [None]:
DROP PROCEDURE IF EXISTS py_predict_airport;
GO
CREATE PROCEDURE py_predict_airport (@model varchar(100))   
AS
BEGIN
    DECLARE @py_model varbinary(max) = (SELECT model FROM dbo.airport_py_models WHERE model_name = @model)

    EXECUTE sp_execute_external_script
        @language = N'Python',
        @script = N'
    
import pandas
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import pickle

airport_model = pickle.loads(py_model)

raw_dataframe = input_data

index_start = 0
index_stop = 9999

independent_variables = [''month_no'',''total_traffic''] #input any number of variables
dependent_variable = [''total_passengers''] #only input one variable

independent_dataset = raw_dataframe[independent_variables][index_start:index_stop]
dependent_dataset = raw_dataframe[dependent_variable][index_start:index_stop]

linear_predictions = airport_model.predict(independent_dataset)     
predictions_dataframe = pandas.DataFrame(linear_predictions)

OutputDataSet = predictions_dataframe
'
, @input_data_1 = N'SELECT * FROM [externalDB].[dbo].[data]'
, @input_data_1_name = N'input_data'
, @params = N'@py_model varbinary(max)'
, @py_model = @py_model
WITH RESULT SETS ( ([predicted_passenger_count] INT NOT NULL) )

END;
GO

In [None]:
DROP TABLE IF EXISTS [dbo].[py_airport_predictions]
GO

CREATE TABLE [dbo].[py_airport_predictions](
    [ID][INT] IDENTITY(1,1) NOT NULL,
    [predicted_passenger_count] [INT] NOT NULL
) ON [PRIMARY]
GO

In [None]:
INSERT INTO [dbo].[py_airport_predictions]
EXEC py_predict_airport 'linear_model'

In [None]:
SELECT [dbo].[py_airport_predictions].[predicted_passenger_count], [dbo].[data].[total_passengers]
FROM [dbo].[py_airport_predictions]
JOIN [dbo].[data]
ON [dbo].[py_airport_predictions].[id] = [dbo].[data].[id]