In [73]:
import numpy as np
import pandas as pd
import os
import torch
import pickle  # Import the pickle module
from transformers import DistilBertTokenizer, DistilBertModel
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [74]:
# Load all CSV files from the specified folder
def load_all_data(folder_path):
    all_data = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path)
            # Convert price-related columns to numeric, coerce errors to handle invalid data
            df['discount_price'] = pd.to_numeric(df['discount_price'], errors='coerce')
            df['actual_price'] = pd.to_numeric(df['actual_price'], errors='coerce')
            all_data.append(df)
    return pd.concat(all_data, ignore_index=True)

In [75]:
# Function to get DistilBERT embeddings
def get_embeddings(data, tokenizer, model):
    embeddings = []
    for desc in data['name']:  # Assuming 'name' or other descriptive fields are available
        inputs = tokenizer(desc, return_tensors='pt', padding=True, truncation=True)
        with torch.no_grad():
            outputs = model(**inputs)
        embeddings.append(outputs.last_hidden_state[:, 0, :].numpy())  # Use [CLS] token
    return np.vstack(embeddings)  # Stack all embeddings into a numpy array

In [76]:
# Train a simple model for product recommendation
def train_model(data, embeddings):
    # Convert embeddings to a numpy array if not already done
    embeddings = np.array(embeddings)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(embeddings, data['main_category'], test_size=0.2)

    # Logistic Regression model training
    model = LogisticRegression(max_iter=1000)
    
    # Reshape the embeddings and train the model
    model.fit(X_train, y_train)

    # Evaluate model
    predictions = model.predict(X_test)
    print(classification_report(y_test, predictions))

    return model

In [77]:
def recommend_gifts(model, data, gender, age, relationship, budget, tokenizer, bert_model):
    # Creating a dummy input based on user preferences
    user_input = f"{gender} {age} {relationship} {budget}"
    user_embedding = tokenizer(user_input, return_tensors='pt', padding=True, truncation=True)
    
    with torch.no_grad():
        user_embeds = bert_model(**user_embedding).last_hidden_state[:, 0, :].numpy()  # Use [CLS] token
    
    # Predict categories for the user's inputs
    predicted_category = model.predict(user_embeds.reshape(1, -1))[0]
    
    # Filter recommended gifts based on predicted category and budget
    recommended_gifts = data[(data['main_category'] == predicted_category) & (data['discount_price'] <= budget)]
    #-----------------------------------------------------
    
    return recommended_gifts

In [78]:
def main():
    folder_path = 'D:\gift_recomm\Temp_dtst'  # Update with your actual path
    data = load_all_data(folder_path)
    # print("DATA",data)

    # Initialize DistilBERT tokenizer and model
    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
    bert_model = DistilBertModel.from_pretrained('distilbert-base-uncased')

    # Get embeddings for products
    embeddings = get_embeddings(data, tokenizer, bert_model)
    
    # Train the model on product embeddings
    trained_model = train_model(data, embeddings)

    with open('trained_model.pkl', 'wb') as f:
        pickle.dump(trained_model, f)
    with open('item_embeddings.pkl', 'wb') as f:
        pickle.dump(embeddings, f)

    # Get user input
    print("Welcome to the Gift Recommendation System!")
    
    recipient_gender = input("Enter recipient's gender (male/female): ").strip().lower()
    recipient_age = int(input("Enter recipient's age: ").strip())
    relation_with_recipient = input("Enter your relation with the recipient (friend/family/etc.): ").strip().lower()
    user_budget = float(input("Enter your budget: ").strip())

    # Get recommendations
    recommended_gifts = recommend_gifts(trained_model, data, recipient_gender, recipient_age, relation_with_recipient, user_budget, tokenizer, bert_model)

    # Display recommendations
    if not recommended_gifts.empty:
        print("\nRecommended Gifts:")
        for index, row in recommended_gifts.iterrows():
            if index < 5:
                print(f"\n\nName: {row['name']}, \nCategory: {row['main_category']}, \nPrice: ${row['discount_price']:.2f}, \nImage:{row['image']}, \nLink: {row['link']}")
            else:
                break
    else:
        print("No gifts found that match your criteria.")

  folder_path = 'D:\gift_recomm\Temp_dtst'  # Update with your actual path


In [79]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

def train_model(data, embeddings):
    # Get the target column ('main_category')
    categories = data['main_category'].unique()
    
    # Check if there are at least two unique categories
    if len(categories) < 2:
        raise ValueError(f"Not enough classes to train the model. Only one class found: {categories[0]}")
    
    # Proceed if we have more than one class
    X_train, X_test, y_train, y_test = train_test_split(embeddings, data['main_category'], test_size=0.2)
    model = LogisticRegression(max_iter=1000)


    # Reshape the embeddings and train the model
    model.fit(X_train, y_train)
    
    # Evaluate model
    predictions = model.predict(X_test)
    
    return model


In [80]:
if __name__ == "__main__":
    main()

Welcome to the Gift Recommendation System!

Recommended Gifts:


Name: Skybags Brat Black 46 Cms Casual Backpack, 
Category: bags & luggage, 
Price: $659.00, 
Image:https://m.media-amazon.com/images/I/810s53kR8tL._AC_UL320_.jpg, 
Link: https://www.amazon.in/Skybags-Brat-Black-Casual-Backpack/dp/B08Z1HHHTD/ref=sr_1_1?qid=1679218544&s=luggage&sr=1-1


Name: SAFARI 15 Ltrs Sea Blue Casual/School/College Backpack (DAYPACKNEO15CBSEB), 
Category: bags & luggage, 
Price: $299.00, 
Image:https://m.media-amazon.com/images/I/61kmCas5OCL._AC_UL320_.jpg, 
Link: https://www.amazon.in/SAFARI-Ltrs-Casual-Backpack-DAYPACKNEO15CBSEB/dp/B07Q7CNPMV/ref=sr_1_2?qid=1679218544&s=luggage&sr=1-2


Name: Wesley Milestone 2.0 Casual Waterproof Laptop Backpack/Office Bag/School Bag/College Bag/Business Bag/Travel Backpack (Dim..., 
Category: bags & luggage, 
Price: $565.00, 
Image:https://m.media-amazon.com/images/I/811AxL+qTpL._AC_UL320_.jpg, 
Link: https://www.amazon.in/Wesley-Milestone-Waterproof-Backpack-Bus