[Reference](https://medium.com/@nomannayeem/mlops-for-beginners-to-advanced-from-model-building-to-scalable-deployment-using-flask-and-docker-ec65bd8cf07f)

# Synthetic Dataset Generation

In [1]:
# Import necessary libraries
import pandas as pd  # To handle data in tabular form
import numpy as np   # To generate random data

# Step 1: Define the number of users and products
# Let's assume we have 1000 users and 500 products in our ecommerce platform.
num_users = 1000
num_products = 500

# Step 2: Generating the Users Data
# Each user has an ID, age, gender, and location.
user_data = {
    'user_id': np.arange(1, num_users + 1),  # Generate user IDs from 1 to 1000
    'age': np.random.randint(18, 70, size=num_users),  # Random ages between 18 and 70
    'gender': np.random.choice(['M', 'F'], size=num_users),  # Randomly assign gender as Male (M) or Female (F)
    'location': np.random.choice(['Urban', 'Suburban', 'Rural'], size=num_users)  # Randomly assign location type
}

# Convert the user data dictionary into a pandas DataFrame
users_df = pd.DataFrame(user_data)

# Step 3: Generating the Products Data
# Each product has an ID, category, price, and rating.
product_data = {
    'product_id': np.arange(1, num_products + 1),  # Generate product IDs from 1 to 500
    'category': np.random.choice(['Electronics', 'Clothing', 'Home', 'Books'], size=num_products),  # Randomly assign product category
    'price': np.round(np.random.uniform(5, 500, size=num_products), 2),  # Random prices between $5 and $500, rounded to 2 decimal places
    'rating': np.round(np.random.uniform(1, 5, size=num_products), 1)  # Random ratings between 1 and 5, rounded to 1 decimal place
}

# Convert the product data dictionary into a pandas DataFrame
products_df = pd.DataFrame(product_data)

# Step 4: Generating the User-Product Interaction Data (Purchase History or Ratings)
# We simulate how users interact with products. For example, users can rate or buy products.

interaction_data = {
    'user_id': np.random.choice(users_df['user_id'], size=5000),  # Randomly select users who interacted with products
    'product_id': np.random.choice(products_df['product_id'], size=5000),  # Randomly select products that were interacted with
    'rating': np.random.randint(1, 6, size=5000),  # Assign random ratings (1 to 5 stars) for these interactions
    'timestamp': pd.date_range(start='2023-01-01', periods=5000, freq='T')  # Generate random timestamps for interactions, 1 minute apart
}

# Convert the interaction data dictionary into a pandas DataFrame
interactions_df = pd.DataFrame(interaction_data)

# Let's check the first few rows of each dataset
users_df.head(), products_df.head(), interactions_df.head()

  'timestamp': pd.date_range(start='2023-01-01', periods=5000, freq='T')  # Generate random timestamps for interactions, 1 minute apart


(   user_id  age gender  location
 0        1   34      M  Suburban
 1        2   29      F     Urban
 2        3   39      F  Suburban
 3        4   53      M  Suburban
 4        5   32      F  Suburban,
    product_id     category   price  rating
 0           1  Electronics  386.90     3.7
 1           2     Clothing  318.84     4.6
 2           3         Home  437.29     1.3
 3           4  Electronics  441.81     3.9
 4           5         Home  177.80     2.5,
    user_id  product_id  rating           timestamp
 0      723          79       1 2023-01-01 00:00:00
 1      411         197       1 2023-01-01 00:01:00
 2      262         277       4 2023-01-01 00:02:00
 3      950         114       5 2023-01-01 00:03:00
 4      140         478       3 2023-01-01 00:04:00)

# Data Preprocessing

In [4]:
# Import necessary libraries for pre-processing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Step 1: Handle missing values
# Checking for missing values in all datasets
print("Missing values in users data:\n", users_df.isnull().sum())
print("Missing values in products data:\n", products_df.isnull().sum())
print("Missing values in interactions data:\n", interactions_df.isnull().sum())

# Step 2: Encoding categorical variables
label_encoder = LabelEncoder()

# Encode the gender column in users data (M -> 0, F -> 1)
users_df['gender_encoded'] = label_encoder.fit_transform(users_df['gender'])

# Encode the location column in users data
users_df['location_encoded'] = label_encoder.fit_transform(users_df['location'])

# Encode the category column in products data
products_df['category_encoded'] = label_encoder.fit_transform(products_df['category'])

# Step 3: Create a User-Product Rating Matrix
user_product_matrix = interactions_df.pivot_table(index='user_id', columns='product_id', values='rating').fillna(0)

# Step 4: Train-test split
train_data, test_data = train_test_split(interactions_df, test_size=0.2, random_state=42)

# Display the first few rows of the pre-processed data to verify
print("User-Product Matrix:\n", user_product_matrix.head())
print("Train Data Sample:\n", train_data.head())
print("Test Data Sample:\n", test_data.head())

Missing values in users data:
 user_id     0
age         0
gender      0
location    0
dtype: int64
Missing values in products data:
 product_id    0
category      0
price         0
rating        0
dtype: int64
Missing values in interactions data:
 user_id       0
product_id    0
rating        0
timestamp     0
dtype: int64
User-Product Matrix:
 product_id  1    2    3    4    5    6    7    8    9    10   ...  491  492  \
user_id                                                       ...             
1           0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
2           0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
3           0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
4           0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
6           0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   

product_id  493  494  495  496  497  498  499  500  
user_id                                      

# Model Building and Training

In [6]:
# Install the Surprise library
!pip install scikit-surprise

# Import necessary libraries
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split as surprise_train_test_split
from surprise.model_selection import cross_validate
from surprise import accuracy

# Step 1: Prepare the data for Surprise
reader = Reader(rating_scale=(1, 5))  # The rating scale in our dataset is from 1 to 5
data = Dataset.load_from_df(interactions_df[['user_id', 'product_id', 'rating']], reader)

# Step 2: Train-test split
trainset, testset = surprise_train_test_split(data, test_size=0.2)

# Step 3: Train the SVD model
model = SVD()  # Initialize the SVD model
model.fit(trainset)  # Train the model on the training set

# Step 4: Test the model on the test set
predictions = model.test(testset)

# Step 5: Evaluate the performance using RMSE
rmse = accuracy.rmse(predictions)

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m153.6/154.4 kB[0m [31m4.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357266 sha256=a5a0561c3b28dd1d7338aec14c07c09246d93ede870c0c561f8c2ace4279777a
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a

# Saving the Model

In [7]:
import pickle

# Step 1: Save the trained SVD model to a file
model_filename = 'svd_model.pkl'
with open(model_filename, 'wb') as model_file:
    pickle.dump(model, model_file)

print(f"Model saved to {model_filename}")

# If working in Google Colab, download the saved model file
from google.colab import files
files.download(model_filename)

# Monitoring Model Performance

In [8]:
# Step 1: Calculate MAE (Mean Absolute Error)
mae = accuracy.mae(predictions)

# Step 2: Generate a basic performance report
performance_report = {
    'RMSE': rmse,
    'MAE': mae
}

# Display the performance report
print("Model Performance Report:")
for metric, score in performance_report.items():
    print(f"{metric}: {score:.4f}")

MAE:  1.2443
Model Performance Report:
RMSE: 1.4467
MAE: 1.2443


# Flask Application Development

```
📂 MLOps_Flask_Recommendation_System
│
├── 📂 model
│   └── svd_model.pkl              # Trained SVD model file
│
├── 📂 templates                    # HTML Templates for Flask app
│   ├── index.html                  # Home page for selecting user/product
│   └── result.html                 # Page displaying predicted recommendation
│
├── 📂 venv                         # Virtual environment directory (optional)
│
├── .dockerignore                   # Files/directories to ignore in Docker build
├── .gitignore                      # Files/directories to ignore in Git repository
├── app.py                          # Main Flask application file
├── Dockerfile                      # Docker configuration file for Flask app
├── docker-compose.yml              # Docker Compose file for running app in containers
├── requirements.txt                # Python dependencies
├── README.md                       # Project description and instructions
```

app.py:

In [9]:
from flask import Flask, render_template, request
import pickle
import os

# Initialize Flask app
app = Flask(__name__)

# Load the saved SVD model
model_path = os.path.join('model', 'svd_model.pkl')
with open(model_path, 'rb') as model_file:
    model = pickle.load(model_file)

# Dummy data for user and product IDs
users = list(range(1, 1001))
products = list(range(1, 501))

@app.route('/')
def index():
    return render_template('index.html', users=users, products=products)

@app.route('/recommend', methods=['POST'])
def recommend():
    user_id = int(request.form['user_id'])
    product_id = int(request.form['product_id'])

    prediction = model.predict(user_id, product_id)
    return render_template('result.html', user_id=user_id, product_id=product_id, predicted_rating=prediction.est)

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)

index.html:


```
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Product Recommendation</title>
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha3/dist/css/bootstrap.min.css" rel="stylesheet">
</head>
<body>
    <div class="container mt-5">
        <h1 class="text-center">Product Recommendation System</h1>
        <form action="/recommend" method="POST" class="mt-4">
            <div class="mb-3">
                <label for="user_id" class="form-label">Select User ID</label>
                <select name="user_id" class="form-select">
                    {% for user in users %}
                    <option value="{{ user }}">{{ user }}</option>
                    {% endfor %}
                </select>
            </div>
            <div class="mb-3">
                <label for="product_id" class="form-label">Select Product ID</label>
                <select name="product_id" class="form-select">
                    {% for product in products %}
                    <option value="{{ product }}">{{ product }}</option>
                    {% endfor %}
                </select>
            </div>
            <button type="submit" class="btn btn-primary">Get Recommendation</button>
        </form>
    </div>
    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha3/dist/js/bootstrap.bundle.min.js"></script>
</body>
</html>
```

result.html:

```
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Recommendation Result</title>
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha3/dist/css/bootstrap.min.css" rel="stylesheet">
</head>
<body>
    <div class="container mt-5">
        <h1 class="text-center">Recommendation Result</h1>
        <p class="text-center">Predicted Rating for User ID <strong>{{ user_id }}</strong> and Product ID <strong>{{ product_id }}</strong> is <strong>{{ predicted_rating }}</strong>.</p>
        <div class="text-center">
            <a href="/" class="btn btn-primary">Go Back</a>
        </div>
    </div>
    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha3/dist/js/bootstrap.bundle.min.js"></script>
</body>
</html>
```

# Dockerization

```
# Use an official Python runtime as a parent image
FROM python:3.10-slim

# Set the working directory in the container
WORKDIR /app

# Install build dependencies and Python dev tools
RUN apt-get update && apt-get install -y \
    gcc \
    build-essential \
    python3-dev \
    && rm -rf /var/lib/apt/lists/*

# Copy the current directory contents into the container at /app
COPY . /app

# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Expose port 5000 to the outside world
EXPOSE 5000

# Define environment variable
ENV FLASK_APP=app.py

# Run the application
CMD ["flask", "run", "--host=0.0.0.0"]
```

docker-compose.yml:

```
version: '3.8'

services:
  flask_app:
    build: .
    ports:
      - "5000:5000"
    volumes:
      - .:/app
    environment:
      FLASK_APP: app.py
    command: flask run --host=0.0.0.0
```

requirements.txt:

```
Flask
scikit-surprise
numpy==1.21.6
```

# Running the App with Docker Compose

Build the Docker Image:

```
docker-compose build
```

Start the Container:

```
docker-compose up
```