In [10]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, auc, accuracy_score, roc_curve
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset

from torch.optim.lr_scheduler import StepLR, MultiplicativeLR, ExponentialLR
import torch.nn.functional as F
import os
import pickle


### Model and Dataset class Definition

In [11]:
class RecommendationModel(nn.Module):
    # def __init__(self, num_products, num_categories, num_subcategories, embedding_dim=10):
    def __init__(self, num_categories, num_subcategories, embedding_dim=10):

        super(RecommendationModel, self).__init__()
        
        # Embedding layers for categorical features
        # self.product_embedding = nn.Embedding(num_products, embedding_dim)
        self.category_embedding = nn.Embedding(num_categories, embedding_dim // 2)
        self.subcategory_embedding = nn.Embedding(num_subcategories, embedding_dim // 2)
        
        # Define fully connected layers
        # self.fc1 = nn.Linear(embedding_dim + (embedding_dim // 2) * 2 + 4, 64)
        self.fc1 = nn.Linear((embedding_dim // 2) * 2 + 4, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 3)  # Output layer with 3 units for 3 classes

    # def forward(self, product_id, category_id, subcategory_id, ratings, no_of_ratings, actual_price, discount_price):
    def forward(self, category_id, subcategory_id, ratings, no_of_ratings, actual_price, discount_price):

        # Pass inputs through embeddings
        # product_embedded = self.product_embedding(product_id)
        category_embedded = self.category_embedding(category_id)
        subcategory_embedded = self.subcategory_embedding(subcategory_id)
        
        # Flatten the embedding outputs
        # product_embedded = product_embedded.view(-1, self.product_embedding.embedding_dim)
        category_embedded = category_embedded.view(-1, self.category_embedding.embedding_dim)
        subcategory_embedded = subcategory_embedded.view(-1, self.subcategory_embedding.embedding_dim)
        
        # Concatenate all features (embeddings + numerical features)
        # concatenated = torch.cat((product_embedded, category_embedded, subcategory_embedded, ratings, no_of_ratings, actual_price, discount_price), dim=1)
   
        concatenated = torch.cat((category_embedded, subcategory_embedded, ratings, no_of_ratings, actual_price, discount_price), dim=1)
        
        # Pass through fully connected layers
        x = torch.relu(self.fc1(concatenated))
        x = torch.relu(self.fc2(x))
        output = self.fc3(x)  # Output 3 units (for 3 classes) without activation
        
        return output

In [12]:
# Initialize metrics trackers
def compute_accuracy(predictions, labels):
    _, predicted = torch.max(predictions, 1)
    return accuracy_score(labels.cpu().numpy(), predicted.cpu().numpy())

def compute_auc(predictions, labels, num_classes):
    predictions_prob = F.softmax(predictions, dim=1).cpu().numpy()
    labels = labels.cpu().numpy()
    if num_classes == 2:
        return roc_auc_score(labels, predictions_prob[:, 1])  # For binary classification
    else:
        return roc_auc_score(labels, predictions_prob, multi_class='ovr')  # For multi-class classification
    
def logging(log_path, log_text):
    with open(log_path, 'a') as file:
        file.write(log_text + '\n')


In [21]:
main_dir = r'C:\Users\Data_Science\Desktop\trend_pulse_data'
experiment = 'Exp_2'
exp_dir = os.path.join(main_dir, experiment)
weights_dir = os.path.join(exp_dir, 'model_weights')
log_path = os.path.join(exp_dir, 'Training_log.txt')
os.makedirs(weights_dir, exist_ok=True)

In [26]:
df = pd.read_excel(os.path.join(exp_dir, 'Train_Data_3_classes_encoded.xlsx'))

### Creating Data Loaders

### Model initialization and User Testing

In [27]:
# Dictionary mappings for text to encoded IDs (assuming these are already available)

# Load the main category encoding dictionary
with open(os.path.join(exp_dir,'main_category_to_index.pkl'), 'rb') as f:
    main_category_to_index = pickle.load(f)

# Load the subcategory encoding dictionary
with open(os.path.join(exp_dir,'sub_category_to_index.pkl'), 'rb') as f:
    sub_category_to_index = pickle.load(f)

# Function to encode user inputs
def process_user_input(main_category, sub_category, rating, no_of_ratings, actual_price, discount_price):
    # Encode the main category and subcategory from text to ID
    category_id = main_category_to_index.get(main_category, -1)  # Default to -1 if not found
    subcategory_id = sub_category_to_index.get(sub_category, -1)  # Default to -1 if not found
    
    # Convert the rest of the inputs to the correct tensor types
    test_product_id = torch.tensor([0], dtype=torch.long)  # Placeholder product ID
    test_category_id = torch.tensor([category_id], dtype=torch.long)
    test_subcategory_id = torch.tensor([subcategory_id], dtype=torch.long)
    test_ratings = torch.tensor([[rating]], dtype=torch.float32)
    test_no_of_ratings = torch.tensor([[no_of_ratings]], dtype=torch.float32)
    test_actual_price = torch.tensor([[actual_price]], dtype=torch.float32)
    test_discount_price = torch.tensor([[discount_price]], dtype=torch.float32)
    
    return test_product_id, test_category_id, test_subcategory_id, test_ratings, test_no_of_ratings, test_actual_price, test_discount_price

# Function to apply the post-processing strategy
def recommend_top_products(predicted_class, category_id, subcategory_id, df, num_recommendations=5):
    # Filter products by predicted class and matching category/subcategory
    filtered_df = df[(df['int_rating'] == predicted_class) &
                     (df['encoded_main_category'] == category_id) &
                     (df['encoded_sub_category'] == subcategory_id)]
    
    # Rank the filtered products based on criteria (e.g., ratings, no_of_ratings, price, discount_price)
    filtered_df['rank_score'] = (
        0.4 * filtered_df['ratings'] +
        0.3 * filtered_df['no_of_ratings'] +
        0.2 * filtered_df['discount_price'] +
        0.1 * filtered_df['actual_price']
    )
    
    # Sort by rank_score and select top products
    top_products = filtered_df.sort_values(by='rank_score', ascending=False).head(num_recommendations)
    
    # Return the recommended product names
    return top_products['product'].values




In [28]:
# Initialize the model
# Initialize the model
# num_products = 28588    # len(df['encoded_product'].unique())
num_categories = 10     # len(df['encoded_main_category'].unique())
num_subcategories = 12  #  len(df['encoded_sub_category'].unique())


# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the model (assuming the model is saved as 'best_model.pth')
model = RecommendationModel(num_categories, num_subcategories)
model.load_state_dict(torch.load(os.path.join(weights_dir, 'best_model.pth')))
model = model.to(device)  # Move model to the appropriate device



In [29]:
# Get user input for the features except product name
main_category = input("Enter the main category (e.g., 'Electronics'): ")
sub_category = input("Enter the subcategory (e.g., 'Mobile Phones'): ")
rating = float(input("Enter the rating: "))
no_of_ratings = float(input("Enter the number of ratings: "))
actual_price = float(input("Enter the actual price (in USD): "))
discount_price = float(input("Enter the discount price (in USD): "))

# Process user inputs
test_product_id, test_category_id, test_subcategory_id, test_ratings, test_no_of_ratings, test_actual_price, test_discount_price = process_user_input(
    main_category, sub_category, rating, no_of_ratings, actual_price, discount_price)

# Pass the inputs through the model
model.eval()
with torch.no_grad():
    # outputs = model(test_product_id, test_category_id, test_subcategory_id, test_ratings, test_no_of_ratings, test_actual_price, test_discount_price)
    outputs = model(test_category_id, test_subcategory_id, test_ratings, test_no_of_ratings, test_actual_price, test_discount_price)
    predicted_probs = F.softmax(outputs, dim=1)
    predicted_class = predicted_probs.argmax(dim=1).item()

print(f"\nPredicted class (int_rating): {predicted_class}")

# Apply post-processing to recommend products
recommended_products = recommend_top_products(predicted_class, test_category_id.item(), test_subcategory_id.item(), df)

print("\nRecommended Products:")
for i, product in enumerate(recommended_products, 1):
    print(f"{i}. {product}")


Predicted class (int_rating): 2

Recommended Products:
1. M MOOHAM Cross Necklace for Men, Silver Black Gold Stainless Steel Plain Cross Pendant Necklace for Men Box Chain 16-30 Inch
2. Sukkhi Rakhi Cluster Rakhi Combo with Roli Chawal and Raksha Bandhan For Men
3. Miabella Solid 925 Sterling Silver Italian 3.5mm Diamond Cut Cuban Link Curb Chain Necklace for Women Men, 16"-18"-20"-2...
4. ThunderFit Silicone Rings for Men - 7 Rings / 4 Rings / 1 Ring - Breathable Patterned Design Wedding Bands 8mm Wide - 2.5m...
5. King Will Basic 10mm Tungsten Carbide Ring for Men Matte Polished Engagement Band Comfort Fit


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['rank_score'] = (


### match original product in suggestions

In [None]:
def evaluate_recommendation(product_name, predicted_class, category_id, subcategory_id, num_recommendations=5):
    # Filter products by predicted class and matching category/subcategory
    filtered_df = df[(df['int_rating'] == predicted_class) &
                     (df['encoded_main_category'] == category_id) &
                     (df['encoded_sub_category'] == subcategory_id)]
    
    # Rank the filtered products based on criteria (e.g., ratings, no_of_ratings, price, discount_price)
    filtered_df['rank_score'] = (
        0.4 * filtered_df['ratings'] +
        0.3 * filtered_df['no_of_ratings'] +
        0.2 * filtered_df['discount_price'] +
        0.1 * filtered_df['actual_price']
    )
    
    # Sort by rank_score and select top products
    top_products = filtered_df.sort_values(by='rank_score', ascending=False).head(num_recommendations)
    
    # Check if the provided product name is in the recommended list
    if product_name in top_products['product'].values:
        print("Recommendation accurate")
    else:
        print("Recommendation varied")

# Function to test the model with user input and evaluate the recommendation
def test_model_with_product_name():
    # Get user input for the features including product name
    product_name = input("Enter the product name: ")
    main_category = input("Enter the main category (e.g., 'Electronics'): ")
    sub_category = input("Enter the subcategory (e.g., 'Mobile Phones'): ")
    rating = float(input("Enter the rating: "))
    no_of_ratings = float(input("Enter the number of ratings: "))
    actual_price = float(input("Enter the actual price (in USD): "))
    discount_price = float(input("Enter the discount price (in USD): "))
    
    # Process user inputs
    test_product_id, test_category_id, test_subcategory_id, test_ratings, test_no_of_ratings, test_actual_price, test_discount_price = process_user_input(
        main_category, sub_category, rating, no_of_ratings, actual_price, discount_price)
    
    # Pass the inputs through the model
    model.eval()
    with torch.no_grad():
        outputs = model(test_product_id, test_category_id, test_subcategory_id, test_ratings, test_no_of_ratings, test_actual_price, test_discount_price)
        predicted_probs = F.softmax(outputs, dim=1)
        predicted_class = predicted_probs.argmax(dim=1).item()
    
    print(f"\nPredicted class (int_rating): {predicted_class}")
    
    # Apply post-processing to evaluate if the provided product name is in the recommended list
    evaluate_recommendation(product_name, predicted_class, test_category_id.item(), test_subcategory_id.item())

# Example call to test the model with product name
test_model_with_product_name()
