# <span style="font-size: 28px;"><b>Demo: Analyzing and Forecasting Industry-specific Data</b></span>

## **Step 1: Load the Dataset**

In [1]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import GridSearchCV
from flask import Flask, request, jsonify
from joblib import load
from threading import Thread
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.pipeline import Pipeline
from joblib import dump
import warnings
warnings.filterwarnings("ignore")

# Connection string components
server = 'DESKTOP'                     # Server name
database = 'Transactions'              # Database name
driver= 'SQL Server'                   

# SQLAlchemy connection string
connection_string = f'mssql+pyodbc://{server}/{database}?driver={driver}&trusted_connection=yes'

# Create the engine
engine = create_engine(connection_string)

query = '''
SELECT transaction_id, customer_id, 
       CAST(date AS DATE) as date, 
       CAST(time AS TIME) as time, 
       product_name, category, quantity, price
FROM dbo.Transactions
'''

# Use the engine to connect and execute the query
df = pd.read_sql_query(query, engine)

## **Step 2: Feature Engineering**

In [2]:
# Convert date and time into datetime, extract day of the week and hour as features
df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])
df['day_of_week'] = df['datetime'].dt.dayofweek
df['hour'] = df['datetime'].dt.hour

# Target variable and features selection
X = df[['customer_id', 'product_name', 'category', 'quantity', 'day_of_week', 'hour']]
y = df['price']

# Encoding categorical features
categorical_features = ['customer_id', 'product_name', 'category', 'day_of_week', 'hour']
preprocessor = ColumnTransformer(transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)])

## **Step 3:** **Train Model**

In [3]:
# Define the model pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('regressor', LinearRegression())])

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
pipeline.fit(X_train, y_train)

# Model evaluation
predictions = pipeline.predict(X_test)
print("Mean Absolute Error:", mean_absolute_error(y_test, predictions))
print("Mean Squared Error:", mean_squared_error(y_test, predictions))
print("R^2 Score:", r2_score(y_test, predictions))

Mean Absolute Error: 5.523001209975755
Mean Squared Error: 609.7621531590165
R^2 Score: 0.9914685656528119


## **Step 4:** **Save Model**

In [4]:
# Save the trained model
dump(pipeline, 'model.joblib')

['model.joblib']

## **Step 5:** **Application Setup**

In [5]:
app = Flask(__name__)
model = load('model.joblib')

@app.route('/predict', methods=['GET'])
def predict():
   # Extract query parameters for all features
    customer_id = request.args.get('customer_id', default='default_customer_id', type=str)
    product_name = request.args.get('product_name', default='default_product_name', type=str)
    category = request.args.get('category', default='default_category', type=str)
    quantity = request.args.get('quantity', default=1, type=int)
    day_of_week = request.args.get('day_of_week', default=1, type=int)
    hour = request.args.get('hour', default=12, type=int)

    # Create a DataFrame for the input features
    input_data = pd.DataFrame([{
        'customer_id': customer_id,
        'product_name': product_name,
        'category': category,
        'quantity': quantity,
        'day_of_week': day_of_week,
        'hour': hour
    }])

    # Predict using the model
    prediction = model.predict(input_data)

    # Return the prediction as a JSON response
    return jsonify({'prediction': prediction.tolist()})

## **Step 6:** **Run App**

In [6]:
def run_app():
    app.run(port=5000, debug=True, use_reloader=False)

# Running the Flask app in a separate thread
thread = Thread(target=run_app)
thread.start()

 * Serving Flask app '__main__'


 * Debug mode: on
