In [59]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, auc, accuracy_score, roc_curve
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset

from torch.optim.lr_scheduler import StepLR, MultiplicativeLR, ExponentialLR
import torch.nn.functional as F
import os
import pickle


### Model and Dataset class Definition

In [60]:
# class RecommendationModel(nn.Module):
#     def __init__(self, input_dim):
#         super(RecommendationModel, self).__init__()
        
#         self.fc1 = nn.Linear(input_dim, 512)
        
#         self.fc2 = nn.Linear(512, 512)
        
#         self.fc3 = nn.Linear(512, 256)
        
#         self.fc4 = nn.Linear(256, 256)
        
#         self.fc5 = nn.Linear(256, 128)
        
#         self.fc6 = nn.Linear(128, 128)
        
#         self.fc7 = nn.Linear(128, 64)
        
#         self.fc8 = nn.Linear(64, 64)
        
#         self.fc9 = nn.Linear(64, 3)
        
#     def forward(self, features):
#         x = torch.relu(self.fc1(features))
#         x = x + torch.relu(self.fc2(x))  # Residual connection
#         x = torch.relu(self.fc3(x))
#         x = x + torch.relu(self.fc4(x))  # Residual connection
#         x = torch.relu(self.fc5(x))
#         x = x + torch.relu(self.fc6(x))  # Residual connection
#         x = torch.relu(self.fc7(x))
#         x = x + torch.relu(self.fc8(x))  # Residual connection
#         output = self.fc9(x)
#         return output


class RecommendationModel(nn.Module):
    def __init__(self, input_dim):
        super(RecommendationModel, self).__init__()
        
        self.fc1 = nn.Linear(input_dim, 256)
        self.dropout1 = nn.Dropout(p=0.3)
        
        self.fc2 = nn.Linear(256, 128)
        self.dropout2 = nn.Dropout(p=0.3)
        
        self.fc3 = nn.Linear(128, 64)
        self.dropout3 = nn.Dropout(p=0.3)
        
        self.fc4 = nn.Linear(64, 32)
        self.dropout4 = nn.Dropout(p=0.3)
        
        self.fc5 = nn.Linear(32, 3)
        
    def forward(self, features):
        x = torch.relu(self.fc1(features))
        x = self.dropout1(x)
        
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        
        x = torch.relu(self.fc3(x))
        x = self.dropout3(x)
        
        x = torch.relu(self.fc4(x))
        x = self.dropout4(x)
        
        output = self.fc5(x)
        return output

In [61]:
# Initialize metrics trackers
def compute_accuracy(predictions, labels):
    _, predicted = torch.max(predictions, 1)
    return accuracy_score(labels.cpu().numpy(), predicted.cpu().numpy())

def compute_auc(predictions, labels, num_classes):
    predictions_prob = F.softmax(predictions, dim=1).cpu().numpy()
    labels = labels.cpu().numpy()
    if num_classes == 2:
        return roc_auc_score(labels, predictions_prob[:, 1])  # For binary classification
    else:
        return roc_auc_score(labels, predictions_prob, multi_class='ovr')  # For multi-class classification
    
def logging(log_path, log_text):
    with open(log_path, 'a') as file:
        file.write(log_text + '\n')


In [62]:
main_dir = 'new_exp'
experiment = 'lable_encoded_exp_2_feat_engineer'
exp_dir = os.path.join(main_dir, experiment)
weights_dir = os.path.join(exp_dir, 'model_weights')
# log_path = os.path.join(exp_dir, 'Training_log.txt')
# os.makedirs(weights_dir, exist_ok=True)

In [63]:
df = pd.read_excel(os.path.join(exp_dir, 'Train_Data_3_classes_encoded.xlsx'))

In [136]:
df['main_category'].unique()

array(['accessories', 'appliances', 'home & kitchen', 'car & motorbike',
       'tv, audio & cameras', 'sports & fitness', 'beauty & health',
       'bags & luggage', 'toys & baby products', 'pet supplies'],
      dtype=object)

### Creating Data Loaders

### Model initialization and User Testing

In [140]:
# Dictionary mappings for text to encoded IDs (assuming these are already available)

with open(os.path.join(exp_dir, 'main_category_label_encoder.pkl'), 'rb') as f:
    main_category_label_encoder = pickle.load(f)

with open(os.path.join(exp_dir, 'sub_category_label_encoder.pkl'), 'rb') as f:
    sub_category_label_encoder = pickle.load(f)

# Function to process user input and create the full feature tensor
def process_user_input(rating, no_of_ratings, actual_price, main_category):
    
    # Encode the main category from text to ID using transform
    category_id = main_category_label_encoder.transform([main_category])[0]  # Extract the scalar value from the array

    # Placeholder values for features not provided by the user
    subcategory_id = -1  # No subcategory input from the user
    discount_price = -1  # Placeholder for missing discount price
    
    # Placeholder or computed values for engineered features
    rating_density = -1  # Placeholder
    avg_rating_per_category = -1  # Placeholder
    weighted_discount_price = rating * discount_price if discount_price != -1 else -1
    weighted_actual_price = rating * actual_price
    rating_adjusted_discount_price = -1  # Placeholder
    rating_importance = rating * no_of_ratings
    
    # Create the feature array
    data = np.array([
        float(category_id), 
        float(subcategory_id), 
        float(no_of_ratings), 
        float(actual_price), 
        float(discount_price), 
        float(rating_density), 
        float(avg_rating_per_category), 
        float(weighted_discount_price), 
        float(weighted_actual_price), 
        float(rating_adjusted_discount_price), 
        float(rating_importance)
    ], dtype=np.float32)

    # Convert the numpy array to a PyTorch tensor and add the batch dimension
    features = torch.tensor(data).unsqueeze(0)
    
    return features

# # Function to apply the post-processing strategy
# def recommend_top_products(predicted_class, main_category, df, num_recommendations=5):
#     # Filter products by predicted class and matching category/subcategory
#     # filtered_df = df[(df['int_rating'] == predicted_class) &
#     #                  (df['encoded_main_category'] == category_id) &
#     #                  (df['encoded_sub_category'] == subcategory_id)]
#     filtered_df = df[(df['int_rating'] == predicted_class) &
#                      (df['main_category'] == main_category)].copy()
#     # Rank the filtered products based on criteria (e.g., ratings, no_of_ratings, price, discount_price)
#     filtered_df['rank_score'] = (
#         0.7 * filtered_df['ratings'] +
#         0.1 * filtered_df['no_of_ratings'] +
#         0.1 * filtered_df['discount_price'] +
#         0.1 * filtered_df['actual_price']
#     )
#     # Sort by rank_score and select top products
#     top_products = filtered_df.sort_values(by='rank_score', ascending=True).head(num_recommendations)
#     # Return the recommended product names
#     return top_products

def recommend_top_products(predicted_class, df, input_values, num_recommendations=5):
    # Unpack input values
    input_rating, input_no_of_rating, input_actual_price, input_main_category = input_values
    
    # Filter products by predicted class and matching category/subcategory
    filtered_df = df[(df['int_rating'] == predicted_class) &
                     (df['main_category'] == input_main_category)].copy()

    # Calculate the proximity scores
    filtered_df['rating_diff'] = (filtered_df['ratings'] - float(input_rating)).abs()
    filtered_df['no_of_ratings_diff'] = (filtered_df['no_of_ratings'] - float(input_no_of_rating)).abs()
    filtered_df['actual_price_diff'] = (filtered_df['actual_price'] - float(input_actual_price)).abs()

    # Rank the filtered products based on a combination of similarity to input values and other criteria
    filtered_df['rank_score'] = (
        0.5 * filtered_df['rating_diff'] +  # Closer ratings to input preferred
        0.2 * filtered_df['no_of_ratings_diff'] +  # Closer no_of_ratings to input preferred
        0.3 * filtered_df['actual_price_diff']) # +  # Closer actual price to input preferred
        # 0.1 * filtered_df['ratings']  # Higher ratings still preferred
    # )

    # Sort by rank_score (lower scores are better) and select top products
    top_products = filtered_df.sort_values(by='rank_score', ascending=True).head(num_recommendations)

    # Return the recommended product names and links
    return top_products



In [142]:
# Get user input for the features except product name
main_category = input("Enter the category name from the list below:---'accessories'---'appliances'---'home & kitchen'---'car & motorbike'---'tv, audio & cameras'---'sports & fitness'---'beauty & health'---'bags & luggage'---'toys & baby products'---'pet supplies'")
# sub_category = input("Enter the subcategory (e.g., 'Mobile Phones'): ")
rating = float(input("Enter the rating: "))
no_of_ratings = float(input("Enter the number of ratings: "))
actual_price = float(input("Enter the actual price (in USD): "))
# discount_price = float(input("Enter the discount price (in USD): "))

input_values = np.array([rating, no_of_ratings, actual_price, main_category])

# Process user inputs
features= process_user_input(rating, no_of_ratings, actual_price, main_category)

input_dim = features.size(1)

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the model (assuming the model is saved as 'best_model.pth')
model = RecommendationModel(input_dim)
model.load_state_dict(torch.load(os.path.join(weights_dir, 'best_model.pth'), map_location=device))
model = model.to(device)  # Move model to the appropriate device

features = features.to(device)
# Pass the inputs through the model
model.eval()
with torch.no_grad():
    outputs = model(features)
    predicted_probs = F.softmax(outputs, dim=1)
    predicted_class = predicted_probs.argmax(dim=1).item()

print(f"\nPredicted class (int_rating): {predicted_class}")

# Apply post-processing to recommend products
recommended_products = recommend_top_products(predicted_class, df, input_values, 10)

print("\nRecommended Products:")
for i, (index, row) in enumerate(recommended_products.iterrows(), 1):
    print(f"{i}. {row['product']}\n{row['link']}\n")


Predicted class (int_rating): 2

Recommended Products:
1. USI Universal Set of 2 Pieces SLAM Ball(Dead Bounce) (Colour May Vary)
https://www.amazon.in/USI-Pieces-SLAM-Bounce-Colour/dp/B01N001N3S/ref=sr_1_5464?qid=1679217758&s=sports&sr=1-5464

2. USI UNIVERSAL THE UNBEATABLE Knee Wraps 203cm(80"), 733KW3 Line Weight Lifting Knee Wraps Training Straps Power Lifting Gy...
https://www.amazon.in/USI-LINE-KNEE-WRAPS/dp/B01NBKVB3I/ref=sr_1_498?qid=1679217664&s=sports&sr=1-498

3. Generic Imported Crossfit Gym Hand Grip Guard Palm Protector Leather Glove Pull Up Lift M
https://www.amazon.in/Generic-Crossfit-Guard-Protector-Leather/dp/B01FWB3HHO/ref=sr_1_4718?qid=1679217747&s=sports&sr=1-4718

4. Monex 1.5" Width Avail. in 20FT Length Undulation Rope Exercise Fitness Workout Strength Training Gym Climbing Battle Rop...
https://www.amazon.in/Monex-Undulation-Exercise-Strength-Training/dp/B079WYDCGK/ref=sr_1_1496?qid=1679217686&s=sports&sr=1-1496

5. Amazon Brand - Symactive Aluminium Arm Blast

### match original product in suggestions

In [None]:
def evaluate_recommendation(product_name, predicted_class, category_id, subcategory_id, num_recommendations=5):
    # Filter products by predicted class and matching category/subcategory
    filtered_df = df[(df['int_rating'] == predicted_class) &
                     (df['encoded_main_category'] == category_id) &
                     (df['encoded_sub_category'] == subcategory_id)]
    
    # Rank the filtered products based on criteria (e.g., ratings, no_of_ratings, price, discount_price)
    filtered_df['rank_score'] = (
        0.4 * filtered_df['ratings'] +
        0.3 * filtered_df['no_of_ratings'] +
        0.2 * filtered_df['discount_price'] +
        0.1 * filtered_df['actual_price']
    )
    
    # Sort by rank_score and select top products
    top_products = filtered_df.sort_values(by='rank_score', ascending=False).head(num_recommendations)
    
    # Check if the provided product name is in the recommended list
    if product_name in top_products['product'].values:
        print("Recommendation accurate")
    else:
        print("Recommendation varied")

# Function to test the model with user input and evaluate the recommendation
def test_model_with_product_name():
    # Get user input for the features including product name
    product_name = input("Enter the product name: ")
    main_category = input("Enter the main category (e.g., 'Electronics'): ")
    sub_category = input("Enter the subcategory (e.g., 'Mobile Phones'): ")
    rating = float(input("Enter the rating: "))
    no_of_ratings = float(input("Enter the number of ratings: "))
    actual_price = float(input("Enter the actual price (in USD): "))
    discount_price = float(input("Enter the discount price (in USD): "))
    
    # Process user inputs
    test_product_id, test_category_id, test_subcategory_id, test_ratings, test_no_of_ratings, test_actual_price, test_discount_price = process_user_input(
        main_category, sub_category, rating, no_of_ratings, actual_price, discount_price)
    
    # Pass the inputs through the model
    model.eval()
    with torch.no_grad():
        outputs = model(test_product_id, test_category_id, test_subcategory_id, test_ratings, test_no_of_ratings, test_actual_price, test_discount_price)
        predicted_probs = F.softmax(outputs, dim=1)
        predicted_class = predicted_probs.argmax(dim=1).item()
    
    print(f"\nPredicted class (int_rating): {predicted_class}")
    
    # Apply post-processing to evaluate if the provided product name is in the recommended list
    evaluate_recommendation(product_name, predicted_class, test_category_id.item(), test_subcategory_id.item())

# Example call to test the model with product name
test_model_with_product_name()
