# Retrieval Augmented Generation

David Fischanger<br>
Import Libraries and Load Dataset

In [7]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [8]:
# Load sample dataset
try:
    df = pd.read_csv('pinterest-fashion-dataset.csv')
except FileNotFoundError:
    print("File not found. Please check the file path.")
except pd.errors.ParserError:
    print("Error parsing the file. Please check the file format.")

## Define Retrieval Function

In [9]:
def retrieve_function(user_query, df):
    # Vectorize the descriptions
    tfidf_vectorizer = TfidfVectorizer()
    product_descriptions = df["image_description"].fillna('').astype(str).apply(lambda x: x.lower())
    tfidf_matrix = tfidf_vectorizer.fit_transform(product_descriptions)

    # Vectorize the user query
    query_vec = tfidf_vectorizer.transform([user_query.lower()])

    # Compute cosine similarity
    cosine_similarities = cosine_similarity(query_vec, tfidf_matrix).flatten()

    # Get the top 5 most similar products
    top_indices = cosine_similarities.argsort()[-5:][::-1]
    relevant_products = df.iloc[top_indices]

    return relevant_products

## RAG System with Product Recommendation

### Define User Profile Input / testmode only - not for scaling!

In [13]:
def get_user_profile():
    while True:
        try:
            age = int(input("Please enter your age: "))
            if age < 0 or age > 120:
                raise ValueError("Age must be between 0 and 120.")
            break
        except ValueError as e:
            print(f"Invalid input: {e}. Please try again.")

    while True:
        gender = input("Please enter your gender (M/F/Other): ").strip().upper()
        if gender in ['M', 'F', 'OTHER']:
            break
        else:
            print("Invalid input. Please enter M, F, or Other.")

    location = input("Please enter your location: ").strip()
    interests = input("What kind of fashion items are you interested in? (e.g., Shoes, Pants, Sunglasses): ").strip()

    return age, gender, location, interests

## Utilize the Retrieval Model and Generate Recommendations

incorporate the retrieval function to filter products and then justify the recommendations based on the user’s profile

In [14]:
import random

def rag_system(age, gender, location, interests, df):
    # Generate a more specific query based on user input
    user_query = f"{interests} fashion items for {age} year old {gender} in {location}"
    relevant_products = retrieve_function(user_query, df)

    # Filter products based on user profile
    filtered_products = relevant_products[
        (relevant_products['age'].astype(str).str.contains(str(age), case=False, na=False)) &
        (relevant_products['gender'] == gender)
    ]

    # If no products match the exact criteria, return the top relevant products
    if filtered_products.empty:
        filtered_products = relevant_products

    # Introduce randomness: randomly select up to 5 products from the filtered list
    num_recommendations = min(5, len(filtered_products))
    selected_products = filtered_products.sample(n=num_recommendations)

    # Generate recommendations
    recommendations = []
    for _, product in selected_products.iterrows():
        justification = f"Based on your interest in {interests}, and considering your location in {location}, we recommend a {product['brand']} item priced at ${product['price in $']:.2f}. It has a rating of {product['ratings']:.1f} and is described as: {product['image_description']}."
        recommendations.append(justification)

    return recommendations

# RAG Execution

In [None]:
# execution of the task
if __name__ == "__main__":
    print("Welcome to the Fashion Recommendation System!")
    age, gender, location, interests = get_user_profile()
    
    recommendations = rag_system(age, gender, location, interests, df)

    print("\nHere are your personalized fashion recommendations:")
    for i, rec in enumerate(recommendations, 1):
        print(f"\nRecommendation {i}:")
        print(rec)

Welcome to the Fashion Recommendation System!


Please enter your age:  


Invalid input: invalid literal for int() with base 10: ''. Please try again.


***

# End of code

***