# Task 1: Concept Development & Mock-Up Design

In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.impute import SimpleImputer

In [19]:
# Load the luxury apparel dataset
url = 'Luxury_Products_Apparel_Data.csv'  # Replace with the actual dataset URL
df = pd.read_csv(url)

# Display the first few rows of the dataset
print(df.head())

   Unnamed: 0     Category SubCategory  \
0        8037  Accessories        Bags   
1       13670  Accessories       Socks   
2       13983        Suits     Tuxedos   
3       12081  Accessories      Gloves   
4       15617  Accessories   Cufflinks   

                                         ProductName  \
0                     "Prada Striped Shell Belt Bag"   
1  "Falke - Lhasa Wool And Cashmere-blend Socks -...   
2                    "peak lapel tuxedo suit jacket"   
3                "Thom Browne Navy 4-Bar Rib Gloves"   
4  "Alice Made This - Bayley Round Patina-brass C...   

                                         Description  
0  "One of Prada's most functional designs, this ...  
1  "Falke - Casual yet luxurious, Falke's dark na...  
2  "White and black linen blend peak lapel tuxedo...  
3  "Pair of rib knit cashmere gloves in navy. Sig...  
4  "Alice Made This - Made in the UK, these teal ...  


In [20]:
# Preprocess the data
df['Description'] = df['Description'].fillna('')

# Combine relevant features
df['combined_features'] = df['ProductName'] + ' ' + df['Description']


In [21]:
# Get all category and subcategory columns
category_cols = [col for col in df.columns if col.startswith('Category_') or col.startswith('SubCategory_')]


In [22]:
# Create TF-IDF vectorizer for text features
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['combined_features'])

In [23]:
def get_recommendations(user_input, top_n=5):
    # Transform user input
    user_vector = tfidf.transform([user_input])
    
    # Calculate cosine similarity for text features
    text_similarities = cosine_similarity(user_vector, tfidf_matrix).flatten()
    
    # If category columns exist, incorporate them into the recommendation
    if category_cols:
        category_matrix = df[category_cols].values
        category_query = np.zeros(len(category_cols))
        for i, col in enumerate(category_cols):
            if any(keyword in user_input.lower() for keyword in col.lower().split('_')[1:]):
                category_query[i] = 1
        
        category_similarities = cosine_similarity(category_query.reshape(1, -1), category_matrix).flatten()
        
        # Combine similarities (you can adjust the weights)
        combined_similarities = 0.7 * text_similarities + 0.3 * category_similarities
    else:
        combined_similarities = text_similarities
    
    # Get top N similar items
    related_product_indices = combined_similarities.argsort()[:-top_n-1:-1]
    
    # Return recommendations
    return df.iloc[related_product_indices][['ProductName', 'Description'] + category_cols]

In [24]:

def interactive_recommendation():
    while True:
        user_query = input("Enter your clothing query (or 'quit' to exit): ")
        if user_query.lower() == 'quit':
            break
        recommendations = get_recommendations(user_query)
        print("\nRecommendations:")
        for i, (_, item) in enumerate(recommendations.iterrows(), 1):
            print(f"{i}. {item['ProductName']}")
            print(f"   Description: {item['Description'][:100]}...")
            if category_cols:
                print(f"   Categories: {', '.join([col.split('_')[1] for col in category_cols if item[col] == 1.0])}")
            print()

In [25]:
if __name__ == "__main__":
    # Example usage
    user_query = "luxury evening gown for formal dinner"
    recommendations = get_recommendations(user_query)
    print(f"Recommendations for '{user_query}':")
    print(recommendations)
    
    print("\nInteractive recommendation system:")
    interactive_recommendation()

Recommendations for 'luxury evening gown for formal dinner':
                                            ProductName  \
4637      "Men's Textured Zebra Jacquard Dinner Jacket"   
1327                    "Evening birds swimming shorts"   
1436  "Men's Micro-Weave Dinner Jacket w/ Satin Lape...   
1309                     "Men's Formal Tuxedo Trousers"   
1419                     "Men's Formal Tuxedo Trousers"   

                                            Description  
4637  "TOM FORD tuxedo/dinner jacket in zebra jacqua...  
1327  "Designer Vilebrequin presents these fresh and...  
1436  "Giorgio Armani dinner jacket in micro weave. ...  
1309  "Brunello Cucinelli formal tuxedo trousers. Co...  
1419  "Brunello Cucinelli formal tuxedo trousers. Co...  

Interactive recommendation system:


# Task 2: Data Analysis Plan & User Query Simulation:

In [6]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import nltk

In [7]:
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\amiku\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\amiku\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\amiku\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [8]:
# Load the dataset
df = pd.read_csv('Luxury_Products_Apparel_Data.csv')

# # Step 1: Data Preprocessing


In [9]:
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove special characters
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Tokenize
    tokens = word_tokenize(text)
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    # Lemmatize
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(tokens)

In [10]:
# Apply preprocessing to relevant columns
df['processed_name'] = df['ProductName'].fillna('').apply(preprocess_text)
df['processed_description'] = df['Description'].fillna('').apply(preprocess_text)

In [11]:
# Combine processed text
df['combined_text'] = df['processed_name'] + ' ' + df['processed_description']

## Step 2: Feature Extraction


In [12]:
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df['combined_text'])

## Step 3: Search Function


In [13]:

def search_luxury_apparel(query, top_n=5):
    # Preprocess the query
    processed_query = preprocess_text(query)
    
    # Transform the query using the fitted TF-IDF vectorizer
    query_vector = tfidf.transform([processed_query])
    
    # Calculate cosine similarity
    cosine_similarities = cosine_similarity(query_vector, tfidf_matrix).flatten()
    
    # Get top N similar items
    top_indices = cosine_similarities.argsort()[:-top_n-1:-1]
    
    # Prepare results
    results = df.iloc[top_indices][['ProductName', 'Description']]
    results['Similarity'] = cosine_similarities[top_indices]
    
    return results

## Step 4: Generate LLM-like response


In [14]:
def generate_llm_response(query, results):
    response = f"Based on your query '{query}', here are some luxury apparel recommendations:\n\n"
    
    for i, (_, item) in enumerate(results.iterrows(), 1):
        response += f"{i}. {item['ProductName']}\n"
        response += f"   Description: {item['Description'][:100]}...\n"
        response += f"   Similarity: {item['Similarity']:.2f}\n\n"
    
    response += "These items were selected based on their relevance to your query. "
    response += "The similarity score indicates how closely each item matches your request. "
    response += "Would you like more details about any specific item?"
    
    return response

In [15]:
# Example usage
user_query = "Show me luxury dresses for a formal evening dinner"
search_results = search_luxury_apparel(user_query)
llm_response = generate_llm_response(user_query, search_results)

print(llm_response)

Based on your query 'Show me luxury dresses for a formal evening dinner', here are some luxury apparel recommendations:

1. "Evening birds swimming shorts"
   Description: "Designer Vilebrequin presents these fresh and colourful Evening Birds swimming shorts. Its psychede...
   Similarity: 0.21

2. "Men's Multi-Stripe Dress Shirt"
   Description: "Brioni dress shirt in multi stripes. Spread collar; French-placket button front. French cuffs; cuff...
   Similarity: 0.19

3. "Men's Eton Trim Fit Check Dress Shirt"
   Description: "Crease-resistant performance construction means a fresh look all day in a modern dress shirt patter...
   Similarity: 0.19

4. "Men's Big \u0026 Tall David Donahue Regular Fit Geometric Dress Shirt"
   Description: "A sharp geometric motif marks a modern-cut dress shirt featuring a wide spread collar and rounded, ...
   Similarity: 0.18

5. "Men's Brooks Brothers Trim Fit Plaid Dress Shirt (Any 3 For $207)"
   Description: "Charming plaid adds color to a smart d