Day 3

In [1]:
pip install scikit-learn pandas numpy

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import pickle

In [3]:
# Loading the dataset (products.csv)
df = pd.read_csv('C:\\Users\\acer\\Desktop\\Infinity AI Work\\InfinityStyleVerse\\data\\raw\\products.csv')

In [4]:
df.head()

Unnamed: 0,title,category,color,material,rating,description
0,Slim Fit Blazer,Formal,Navy,Cotton,4.6,Stylish modern blazer for professionals.
1,Floral Midi Dress,Dress,Red,Polyester,4.3,Elegant floral dress perfect for summer outings.
2,Denim Jacket,Casual,Blue,Denim,4.7,Classic denim jacket with a relaxed fit.
3,Silk Scarf,Accessories,Green,Silk,4.2,Luxurious silk scarf to elevate any outfit.
4,Leather Ankle Boots,Footwear,Black,Leather,4.8,Sleek leather boots for a chic look.


In [5]:
descriptions = df['description'].values

In [6]:
# Creating the TfidfVectorizer 
tfidf = TfidfVectorizer()

# Converting the descriptions into numerical features
matrix = tfidf.fit_transform(descriptions)

In [7]:
# Calculating the cosine similarity matrix
similarities = cosine_similarity(matrix)

Day 4

In [8]:
# Function to get the top 3 most similar products for a given index
def get_similar_products(index):
    scores = list(enumerate(similarities[index]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
    top_3_products = scores[1:4]

    # Returns the title and the score for the top 3 products
    return [(df['title'][i], score) for i, score in top_3_products]

Day 5

In [22]:
# Testing by giving a product index

product_index = 0

print(f"Products similar to {df['title'][product_index]}:")
similar_products = get_similar_products(product_index)
for title, score in similar_products:
    print(f"- {title} (Similarity: {score:.2f})")

Products similar to Slim Fit Blazer:
- Linen Blazer (Similarity: 0.26)
- Suede Boots (Similarity: 0.18)
- Suede Sneakers (Similarity: 0.18)


In [23]:
product_index = 4

print(f"Products similar to {df['title'][product_index]}:")
similar_products = get_similar_products(product_index)
for title, score in similar_products:
    print(f"- {title} (Similarity: {score:.2f})")

Products similar to Leather Ankle Boots:
- Suede Boots (Similarity: 0.62)
- Leather Jacket (Similarity: 0.27)
- Leather Gloves (Similarity: 0.18)


In [24]:
product_index = 10

print(f"Products similar to {df['title'][product_index]}:")
similar_products = get_similar_products(product_index)
for title, score in similar_products:
    print(f"- {title} (Similarity: {score:.2f})")

Products similar to Knit Sweater:
- Knit Beanie (Similarity: 0.47)
- Knit Vest (Similarity: 0.38)
- Knit Socks (Similarity: 0.35)


In [27]:
# Testing by giving a product title

In [28]:
# Function to get the index

def get_product_index(title):
    try:
        return df.index[df['title'] == title].tolist()[0]
    except IndexError:
        return None  # Returns None if no title was found

In [33]:
# User inputs
user_title = input("Enter a product title: ")

# Finding the index of the entered product
index = get_product_index(user_title)

if index is not None:
    print(f"Looking for recommendations for: {user_title}")
    similar_products = get_similar_products(index)
    for title, score in similar_products:
        print(f"- {title} (Similarity: {score:.2f})")
else:
    print("Product was not found. Please check the product title and try again.")

Looking for recommendations for: Knit Sweater
- Knit Beanie (Similarity: 0.47)
- Knit Vest (Similarity: 0.38)
- Knit Socks (Similarity: 0.35)


Saving the model

In [10]:
# Saving the dataframe and similarities
with open(r'C:\Users\acer\Desktop\Infinity AI Work\InfinityStyleVerse\models\recommender_model.pkl', 'wb') as f:
    pickle.dump({'df': df, 'similarities': similarities}, f)

print("Recommender model saved successfully!")

Recommender model saved successfully!


Week 3

Day 1

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import pickle
from IPython.display import Image, display
import numpy as np

In [15]:
# Loading the dataset (new_products.csv)
df = pd.read_csv('C:\\Users\\acer\\Desktop\\Infinity AI Work\\InfinityStyleVerse\\data\\raw\\new_products.csv')

In [16]:
df['product_id'] = df['product_id'].astype(str).str.strip()


In [18]:
print("Available columns:", df.columns.tolist())

Available columns: ['product_id', 'title', 'category', 'color', 'material', 'rating', 'descriptions', 'image_url']


In [19]:
df['combined_text'] = df['title'] + " " + df['descriptions']

In [20]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['combined_text'])
text_similarities = cosine_similarity(tfidf_matrix)

In [21]:
def create_categorical_similarity(df, column):
    n = len(df)
    similarity = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            similarity[i][j] = 1 if df[column].iloc[i] == df[column].iloc[j] else 0
    return similarity

category_similarities = create_categorical_similarity(df, 'category')
color_similarities = create_categorical_similarity(df, 'color')

# Defining weights
weight_text = 0.6
weight_category = 0.2
weight_color = 0.2

# Combining the similarities
combined_similarities = (weight_text * text_similarities +
                        weight_category * category_similarities +
                        weight_color * color_similarities)

In [22]:
def get_similar_products(index):
    scores = list(enumerate(combined_similarities[index]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
    top_3 = scores[1:4]  
    return [(df['product_id'][i], df['title'][i], score, df['image_url'][i]) for i, score in top_3]

In [23]:
def find_product_index_by_id(product_id):
    product_id = str(product_id).strip()
    if product_id in df['product_id'].values:
        return df.index[df['product_id'] == product_id].tolist()[0]
    else:
        raise ValueError(f"Product ID {product_id} not found")

In [27]:
user_input = input("Enter a product ID (e.g., 10): ").strip()

try:
    product_index = find_product_index_by_id(user_input)

    similar_products = get_similar_products(product_index)
    for product_id, title, score, image_url in similar_products:
        print(f"Product ID: {product_id}, Title: {title}, Score: {score:.2f}")
        try:
            display(Image(url=image_url))
        except Exception as e:
            print(f"Error loading image: {e}, using placeholder")
            display(Image(url="https://via.placeholder.com/150"))
        print()
except ValueError as e:
    print(e)

Product ID: 2, Title: maxi dress, Score: 0.52



Product ID: 3, Title: empire puff sleeve dress, Score: 0.48



Product ID: 4, Title: smocked fit and flare midi dress, Score: 0.43





In [30]:
with open(r'C:\Users\acer\Desktop\Infinity AI Work\InfinityStyleVerse\models\new_recommender_model.pkl', 'wb') as f:
    pickle.dump({'df': df, 'similarities': combined_similarities}, f)
print("Updated recommender model saved successfully!")

Updated recommender model saved successfully!
