In [4]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import torch
import pickle
import numpy as np
import os
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
class StandaloneRecommender:
    def __init__(self, model_path, artifacts_path, assets_path, user_cat_cols, user_num_cols, item_cat_cols, device='cpu'):
        
        self.device = torch.device(device)
        self.user_cat_cols = user_cat_cols
        self.user_num_cols = user_num_cols
        self.item_cat_cols = item_cat_cols
        
        # Load preprocessing artifacts
        print("Loading preprocessing artifacts...")
        with open(artifacts_path, "rb") as f:
            artifacts = pickle.load(f)
        
        self.mappings = artifacts["mappings"]
        self.scalers = artifacts["scalers"]
        vocab_sizes = artifacts["vocab_sizes"]
        
        # Load recommendation assets
        print("Loading recommendation assets...")
        with open(f"{assets_path}/recommendation_assets.pkl", "rb") as f:
            assets = pickle.load(f)
        
        self.item_catalog = assets["item_catalog"]
        self.user_profiles = assets["user_profiles"]
        self.user_histories = assets["user_histories"]
        self.rating_stats = assets["rating_stats"]
        
        # Load and initialize model
        print("Loading trained model...")
        user_cat_sizes = {c + "_idx": vocab_sizes[c] for c in user_cat_cols}
        item_cat_sizes = {c + "_idx": vocab_sizes[c] for c in item_cat_cols}
        
        self.model = TwoTowerHybrid(
            user_cat_sizes=user_cat_sizes,
            user_num_dims=len(user_num_cols),
            item_cat_sizes=item_cat_sizes,
            emb_dim=64,
            hidden_dims=[128, 64]
        ).to(self.device)
        
        self.model.load_state_dict(torch.load(model_path, map_location=self.device))
        self.model.eval()
        
        print(f"Recommender ready!")
        print(f"- {len(self.item_catalog)} items in catalog")
        print(f"- {len(self.user_profiles)} known users")
    
    def _preprocess_user_features(self, user_data):
        # Categorical Features
        user_cat_indices = []
        for col in self.user_cat_cols:
            if col in user_data and user_data[col] in self.mappings[col]:
                user_cat_indices.append(self.mappings[col][user_data[col]])
            else:
                user_cat_indices.append(0)  
        
        user_cat_tensor = torch.tensor(user_cat_indices, dtype=torch.long).unsqueeze(0)
        
        # Numerical features  
        user_num_values = []
        for col in self.user_num_cols:
            if col == "Age" and "Age" in user_data:
                scaled_val = self.scalers["Age"].transform([[user_data["Age"]]])[0][0]
                user_num_values.append(scaled_val)
            elif col == "Total_Purchases" and "Total_Purchases" in user_data:
                # Use min-max scaling (adjust min/max based on your data)
                min_tp, max_tp = 1.0, 10.0
                scaled_val = (user_data["Total_Purchases"] - min_tp) / (max_tp - min_tp + 1e-9)
                user_num_values.append(scaled_val)
            elif col == "Amount" and "Amount" in user_data:
                log_val = np.log1p(user_data["Amount"])
                scaled_val = self.scalers["Amount_log"].transform([[log_val]])[0][0]
                user_num_values.append(scaled_val)
            else:
                user_num_values.append(0.0)
        
        user_num_tensor = torch.tensor(user_num_values, dtype=torch.float).unsqueeze(0)
        return user_cat_tensor, user_num_tensor
    
    def _preprocess_item_features(self, item_data):
        """Convert item data to model inputs"""
        item_cat_indices = []
        for col in self.item_cat_cols:
            if col in item_data and item_data[col] in self.mappings[col]:
                item_cat_indices.append(self.mappings[col][item_data[col]])
            else:
                item_cat_indices.append(0)
        
        item_cat_tensor = torch.tensor(item_cat_indices, dtype=torch.long).unsqueeze(0)
        return item_cat_tensor
    
    def predict_rating(self, user_features, item_features):
        """Predict rating for user-item pair"""
        user_cat, user_num = self._preprocess_user_features(user_features)
        item_cat = self._preprocess_item_features(item_features)
        
        user_cat = user_cat.to(self.device)
        user_num = user_num.to(self.device)
        item_cat = item_cat.to(self.device)
        
        with torch.no_grad():
            score = self.model(user_cat, user_num, item_cat)
            rating = score.item() * 4 + 1  # Convert 0-1 to 1-5 scale
        
        return rating
    
    def recommend_for_user(self, customer_id, top_k=10, exclude_purchased=True):
    
        if customer_id not in self.user_profiles:
            raise ValueError(f"Customer {customer_id} not found. Use recommend_for_new_user() instead.")
        
        user_features = self.user_profiles[customer_id]
        
        # Get purchase history
        purchased_items = set()
        if exclude_purchased and customer_id in self.user_histories:
            purchased_items = set(self.user_histories[customer_id])
        
        return self._generate_recommendations(user_features, top_k, purchased_items)
    
    def recommend_for_new_user(self, user_features, top_k=10):
    
        return self._generate_recommendations(user_features, top_k, set())
    
    def _generate_recommendations(self, user_features, top_k, exclude_items):

        recommendations = []
        
        # Filter catalog
        candidate_items = [item for item in self.item_catalog 
                          if item['products'] not in exclude_items]
        
        # Batch processing for efficiency
        batch_size = 100
        user_cat, user_num = self._preprocess_user_features(user_features)
        
        for i in range(0, len(candidate_items), batch_size):
            batch_items = candidate_items[i:i+batch_size]
            
            # Prepare batch
            user_cat_batch = user_cat.repeat(len(batch_items), 1)
            user_num_batch = user_num.repeat(len(batch_items), 1)
            
            item_cat_batch = []
            for item in batch_items:
                item_cat = self._preprocess_item_features(item)
                item_cat_batch.append(item_cat)
            
            item_cat_batch = torch.cat(item_cat_batch, dim=0)
            
            # Move to device
            user_cat_batch = user_cat_batch.to(self.device)
            user_num_batch = user_num_batch.to(self.device)
            item_cat_batch = item_cat_batch.to(self.device)
            
            # Predict ratings
            with torch.no_grad():
                scores = self.model(user_cat_batch, user_num_batch, item_cat_batch)
                ratings = scores.cpu().numpy() * 4 + 1
            
            # Collect results
            for item, rating in zip(batch_items, ratings):
                recommendations.append((item, float(rating)))
        
        # Sort and return top-k
        recommendations.sort(key=lambda x: x[1], reverse=True)
        return recommendations[:top_k]
    
    def get_user_info(self, customer_id):
        
        if customer_id not in self.user_profiles:
            return None
        
        profile = self.user_profiles[customer_id]
        history = self.user_histories.get(customer_id, [])
        
        return {
            'profile': profile,
            'purchase_history': history,
            'num_purchases': len(history)
        }
    
    def search_items(self, query, limit=20):
        
        query = query.lower()
        matches = []
        
        for item in self.item_catalog:
            if (query in item['products'].lower() or 
                query in item['Product_Category'].lower() or
                query in item['Product_Brand'].lower()):
                matches.append(item)
                if len(matches) >= limit:
                    break
        
        return matches


class TwoTowerHybrid(nn.Module):
    def __init__(self, user_cat_sizes, user_num_dims, item_cat_sizes, emb_dim=64, hidden_dims=[128, 64]):
        super().__init__()
        
        self.user_embs = nn.ModuleDict({
            col: nn.Embedding(size, emb_dim) for col, size in user_cat_sizes.items()
        })
        
        user_input_dim = len(user_cat_sizes) * emb_dim + user_num_dims
        self.user_fc = nn.Sequential(
            nn.Linear(user_input_dim, hidden_dims[0]),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dims[0], hidden_dims[1]),
            nn.ReLU(),
            nn.Dropout(0.1)
        )

        self.item_embs = nn.ModuleDict({
            col: nn.Embedding(size, emb_dim) for col, size in item_cat_sizes.items()
        })
        
        item_input_dim = len(item_cat_sizes) * emb_dim
        self.item_fc = nn.Sequential(
            nn.Linear(item_input_dim, hidden_dims[1]),
            nn.ReLU(),
            nn.Dropout(0.1)
        )
        
        self.global_bias = nn.Parameter(torch.zeros(1))
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            nn.init.xavier_uniform_(module.weight)
            if module.bias is not None:
                nn.init.constant_(module.bias, 0)
        elif isinstance(module, nn.Embedding):
            nn.init.xavier_uniform_(module.weight)

    def forward(self, user_cat, user_num, item_cat):
        user_emb_list = [self.user_embs[col](user_cat[:, i]) for i, col in enumerate(self.user_embs.keys())]
        user_emb = torch.cat(user_emb_list + [user_num], dim=1)
        user_vec = self.user_fc(user_emb)

        item_emb_list = [self.item_embs[col](item_cat[:, i]) for i, col in enumerate(self.item_embs.keys())]
        item_emb = torch.cat(item_emb_list, dim=1)
        item_vec = self.item_fc(item_emb)

        user_vec_norm = F.normalize(user_vec, p=2, dim=1)
        item_vec_norm = F.normalize(item_vec, p=2, dim=1)
        
        cosine_sim = (user_vec_norm * item_vec_norm).sum(dim=1)
        score = cosine_sim + self.global_bias
        score = torch.sigmoid(score)
        return score


# Demo functions
def demo_recommendations():

    
    recommender = StandaloneRecommender(
    model_path="best_two_tower_model.pth",
    artifacts_path="preproc_artifacts.pkl", 
    assets_path="recommendation_assets",
    user_cat_cols=["Customer_ID", "City", "State", "Country", "Gender", 
                  "Income", "Customer_Segment", "Feedback", 
                  "Shipping_Method", "Payment_Method", "Order_Status"],
    user_num_cols=["Age", "Total_Purchases", "Amount"],
    item_cat_cols=["products", "Product_Category", "Product_Brand", "Product_Type"]
)
    
    # Example 1: Recommendations for existing user
    print("=== EXISTING USER RECOMMENDATIONS ===")
    customer_id = 37249
    try:
        user_info = recommender.get_user_info(customer_id)
        if user_info:
            print(f"User {customer_id} profile:")
            print(f"- Age: {user_info['profile']['Age']}")
            print(f"- Gender: {user_info['profile']['Gender']}")
            print(f"- Country: {user_info['profile']['Country']}")
            print(f"- Previous purchases: {user_info['num_purchases']} items")
            
        recommendations = recommender.recommend_for_user(customer_id, top_k=5)
        print(f"\nTop 5 recommendations:")
        for i, (item, rating) in enumerate(recommendations, 1):
            print(f"{i}. {item['products']} ({item['Product_Category']}) - {rating:.2f}/5.0")
            
    except ValueError as e:
        print(f"Error: {e}")
    
    # Example 2: Recommendations for new user
    print(f"\n=== NEW USER RECOMMENDATIONS ===")
    new_user = {
        'Age': 28,
        'Gender': 'Female',
        'Country': 'UK',
        'City': 'London',
        'State': 'England',
        'Income': 'High',
        'Customer_Segment': 'Premium',
        'Feedback': 'Excellent',
        'Shipping_Method': 'Express',
        'Payment_Method': 'Credit Card',
        'Order_Status': 'Delivered',
        'Total_Purchases': 3,
        'Amount': 300.0
    }
    
    recommendations = recommender.recommend_for_new_user(new_user, top_k=5)
    print("Recommendations for new user (28yo female from UK):")
    for i, (item, rating) in enumerate(recommendations, 1):
        print(f"{i}. {item['products']} ({item['Product_Category']}) - {rating:.2f}/5.0")
    
    # Example 3: Predict specific rating
    print(f"\n=== RATING PREDICTION ===")
    item_features = {
        'products': 'iPhone 14',
        'Product_Category': 'Electronics',
        'Product_Brand': 'Apple', 
        'Product_Type': 'Smartphone'
    }
    
    rating = recommender.predict_rating(new_user, item_features)
    print(f"Predicted rating for iPhone 14: {rating:.2f}/5.0")


if __name__ == "__main__":
    demo_recommendations()

Loading preprocessing artifacts...
Loading recommendation assets...
Loading trained model...
Recommender ready!
- 318 items in catalog
- 67892 known users
=== EXISTING USER RECOMMENDATIONS ===
User 37249 profile:
- Age: 19.0
- Gender: Female
- Country: UK
- Previous purchases: 2 items

Top 5 recommendations:
1. Literary fiction (Books) - 2.49/5.0
2. Adventure (Books) - 2.45/5.0
3. Psychology (Books) - 2.45/5.0
4. Mystery (Books) - 2.45/5.0
5. Cooking (Books) - 2.44/5.0

=== NEW USER RECOMMENDATIONS ===
Recommendations for new user (28yo female from UK):
1. Inverter AC (Electronics) - 2.77/5.0
2. Window AC (Electronics) - 2.77/5.0
3. Portable AC (Electronics) - 2.77/5.0
4. Ductless AC (Electronics) - 2.76/5.0
5. Split AC (Electronics) - 2.76/5.0

=== RATING PREDICTION ===
Predicted rating for iPhone 14: 2.38/5.0
