In [4]:
#Importing the Libraries 
import os
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
from flask import Flask, request, jsonify
import threading


In [5]:
# Dataset loading and preprocessing
file_path = 'E:/recommendation system/E-commerce Dataset.csv'  
data = pd.read_csv(file_path)
#Dropping the null values and checking the datatype of columns
data = data.dropna(subset=['Customer_Id', 'Product', 'Quantity'])
data['Quantity'] = data['Quantity'].astype(int)

In [6]:
# Encoding Customer_id and Product columns
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

data['user_id'] = user_encoder.fit_transform(data['Customer_Id'])
data['item_id'] = item_encoder.fit_transform(data['Product'])

print("User Encoder Classes:", user_encoder.classes_) 

User Encoder Classes: [10000 10002 10004 ... 99992 99993 99999]


In [7]:
# Defining Matrix Factorization Model
class MatrixFactorization(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=50):
        super(MatrixFactorization, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)

    def forward(self, user, item):
        user_emb = self.user_embedding(user)
        item_emb = self.item_embedding(item)
        return (user_emb * item_emb).sum(1)

In [8]:
# Model Setup
num_users = data['user_id'].nunique()
num_items = data['item_id'].nunique()
model = MatrixFactorization(num_users, num_items)


In [9]:
# Pre-trained weights loaded, if present
if os.path.exists('model.pth'):
    try:
        model.load_state_dict(torch.load('model.pth', weights_only=True))
        print("Loaded pre-trained model.")
    except Exception as e:
        print(f"Error loading pre-trained model: {e}")
else:
    print("Pre-trained model not found. Train the model first.")

Pre-trained model not found. Train the model first.


In [10]:
# Flask API
app = Flask(__name__)

@app.route('/recommend', methods=['POST'])
def recommend():
    """
    Recommend products based on user_id. 
    Expected Input (JSON): {"user_id": <Customer_Id>, "num_recommendations": 5}
    """
#Enter the encoded customer_id (i.e. encoded user classes from the previous thread) in the API
    data = request.get_json()
    customer_id = data.get("user_id")
    num_recommendations = data.get("num_recommendations", 5)

    if customer_id not in user_encoder.classes_:
        return jsonify({"error": "Customer ID not found"}), 400

    # Fetching internal user index
    user_idx = user_encoder.transform([customer_id])[0]
    # Generating recommendations
    model.eval()
    with torch.no_grad():
        all_items = torch.arange(num_items)
        scores = model(torch.tensor([user_idx]), all_items)
        top_items = scores.argsort(descending=True)[:num_recommendations]
        recommended_products = item_encoder.inverse_transform(top_items.numpy())

    return jsonify({"user_id": customer_id, "recommendations": recommended_products.tolist()})




In [11]:
# Training Function
def train_model(data, model, epochs=10, batch_size=256, learning_rate=0.01):
    """
    Train the matrix factorization model.
    """
    # Creating DataLoader
    class InteractionDataset(torch.utils.data.Dataset):
        def __init__(self, df):
            self.users = torch.tensor(df['user_id'].values, dtype=torch.long)
            self.items = torch.tensor(df['item_id'].values, dtype=torch.long)
            self.ratings = torch.tensor(df['Quantity'].values, dtype=torch.float)

        def __len__(self):
            return len(self.ratings)

        def __getitem__(self, idx):
            return self.users[idx], self.items[idx], self.ratings[idx]

    train_dataset = InteractionDataset(data)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Defining optimizer and loss function
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    # Training loop
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for user, item, rating in train_loader:
            optimizer.zero_grad()
            predictions = model(user, item)
            loss = criterion(predictions, rating)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")

    
    torch.save(model.state_dict(), 'model.pth')
    print("Model training completed and saved as 'model.pth'.")

In [12]:
# Defining Function to run Flask 
def run_flask():
    app.run(debug=True, use_reloader=False)

if __name__ == '__main__':
    # Training the model if no pre-trained weights are found
    if not os.path.exists('model.pth'):
        print("No pre-trained model found. Training the model...")
        train_model(data, model)

    # Starting Flask server 
    flask_thread = threading.Thread(target=run_flask)
    flask_thread.start()

No pre-trained model found. Training the model...
Epoch 1/10, Loss: 7122.2872
Epoch 2/10, Loss: 1948.1904
Epoch 3/10, Loss: 1067.7831
Epoch 4/10, Loss: 597.2843
Epoch 5/10, Loss: 313.9673
Epoch 6/10, Loss: 182.2318
Epoch 7/10, Loss: 135.7687
Epoch 8/10, Loss: 121.9405
Epoch 9/10, Loss: 108.1276
Epoch 10/10, Loss: 89.5284
Model training completed and saved as 'model.pth'.
 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
