# <span style="font-size: 28px;"><b>Demo: Deploying and Evaluating an Advanced Model</b></span>

## **Step 1: Load the Dataset**

In [13]:
from sqlalchemy import create_engine

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from joblib import dump, load

import warnings
warnings.filterwarnings("ignore")

# Connection string components
server = 'DESKTOP'                     # Server name
database = 'Transactions'              # Database name
driver= 'SQL Server'                   

# SQLAlchemy connection string
connection_string = f'mssql+pyodbc://{server}/{database}?driver={driver}&trusted_connection=yes'

# Create the engine
engine = create_engine(connection_string)

query = '''
SELECT transaction_id, customer_id, 
       CAST(date AS DATE) as date, 
       CAST(time AS TIME) as time, 
       product_name, category, quantity, price
FROM dbo.Transactions
'''

# Use the engine to connect and execute the query
df = pd.read_sql_query(query, engine)

## **Step 2:** **Feature Preprocessing**

In [15]:
# Prepare the datetime and other features
df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])
df['day_of_week'] = df['datetime'].dt.dayofweek
df['hour'] = df['datetime'].dt.hour

features = ['customer_id', 'product_name', 'category', 'quantity', 'day_of_week', 'hour']
X = df[features]
y = df['price']

# Encoding categorical variables
categorical_features = ['customer_id', 'product_name', 'category', 'day_of_week', 'hour']
preprocessor = ColumnTransformer(transformers=[
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
])

# Splitting dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44)

## **Step 3:** **Pipeline Creation**

In [16]:
# Define the model pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Train the model
pipeline.fit(X_train, y_train)

## **Step 4:** **Serialize Model**

In [17]:
model_filename = 'linear_regression_model.joblib'
dump(pipeline, model_filename)
print(f"Model saved to {model_filename}")

Model saved to linear_regression_model.joblib


## **Step 5:** **Load and Test the Serialized Model**

In [18]:
loaded_model = load(model_filename)
predictions = loaded_model.predict(X_test)

## **Step 6:** **Evaluation**

In [19]:
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

Mean Absolute Error: 5.523001209975755
Mean Squared Error: 609.7621531590165
R^2 Score: 0.9914685656528119
