In [10]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, cross_validate
from surprise import accuracy
from surprise.prediction_algorithms.knns import KNNBasic

In [3]:

# Load the dataset
df = pd.read_csv('DummyMyntraDataset2.csv')  # Replace with your dataset file path

# Assume all interactions are from a single user with user_id = 1
df['user_id'] = 1

# Create a 'swipe' column based on 'No of Right Swipes'
df['swipe'] = df['No of Right Swipes'].apply(lambda x: 1 if x > 0 else 0)  # 1 for right swipe, 0 for no right swipe

# Prepare data for collaborative filtering
reader = Reader(rating_scale=(0, 1))  # Assuming 0 for left swipe, 1 for right swipe
data = Dataset.load_from_df(df[['user_id', 'p_id', 'swipe']], reader)

# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.25)

# Use Singular Value Decomposition (SVD) for collaborative filtering
algo = SVD()
algo.fit(trainset)

# Test the algorithm on the testset
predictions = algo.test(testset)
print("RMSE:", accuracy.rmse(predictions))


RMSE: 0.0000
RMSE: 0.0


In [14]:
import pandas as pd
from surprise import Dataset, Reader, SVD, KNNBasic
from surprise.model_selection import train_test_split, cross_validate
from surprise import accuracy
from surprise.prediction_algorithms.knns import KNNBasic
from sklearn.metrics import precision_score, recall_score

# Assume all interactions are from a single user with user_id = 1
df['user_id'] = 1

# Create a 'swipe' column based on 'No of Right Swipes'
df['swipe'] = df['No of Right Swipes'].apply(lambda x: 1 if x > 0 else 0)  # 1 for right swipe, 0 for no right swipe

# Prepare data for collaborative filtering
reader = Reader(rating_scale=(0, 1))  # Assuming 0 for left swipe, 1 for right swipe
data = Dataset.load_from_df(df[['user_id', 'p_id', 'swipe']], reader)

# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.25)

# Use Singular Value Decomposition (SVD) for collaborative filtering
algo = SVD()
algo.fit(trainset)

# Test the algorithm on the testset
predictions = algo.test(testset)

# Compute RMSE
rmse = accuracy.rmse(predictions)
print("RMSE:", rmse)

# Compute Precision and Recall using test set predictions
y_true = [pred.r_ui for pred in predictions]  # Actual swipe values
y_pred = [round(pred.est) for pred in predictions]  # Predicted swipe values

precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)

print(f"Precision: {precision}")
print(f"Recall: {recall}")

# Example of using KNNBasic for comparison
# Use KNNBasic for collaborative filtering
knn_algo = KNNBasic()
knn_algo.fit(trainset)

# Get predictions for the test set
knn_predictions = knn_algo.test(testset)

# Compute RMSE for KNNBasic
knn_rmse = accuracy.rmse(knn_predictions)
print("\nKNNBasic RMSE:", knn_rmse)

# Compute Precision and Recall for KNNBasic
knn_y_pred = [round(pred.est) for pred in knn_predictions]

knn_precision = precision_score(y_true, knn_y_pred)
knn_recall = recall_score(y_true, knn_y_pred)

print(f"KNNBasic Precision: {knn_precision}")
print(f"KNNBasic Recall: {knn_recall}")


RMSE: 0.0000
RMSE: 0.0
Precision: 1.0
Recall: 1.0
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.0141

KNNBasic RMSE: 0.014084507042253502
KNNBasic Precision: 1.0
KNNBasic Recall: 1.0


In [15]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

In [17]:
import pandas as pd
import requests

# Assuming 'img' column contains URLs to images
for index, row in df.iterrows():
    img_url = row['img']
    try:
        response = requests.get(img_url)
        if response.status_code == 200:
            print(f"Image URL {img_url} is accessible.")
        else:
            print(f"Image URL {img_url} returned status code {response.status_code}.")
    except Exception as e:
        print(f"Error accessing image URL {img_url}: {str(e)}")


Image URL http://assets.myntassets.com/assets/images/17048614/2022/2/4/b0eb9426-adf2-4802-a6b3-5dbacbc5f2511643971561167KhushalKWomenBlackEthnicMotifsAngrakhaBeadsandStonesKurtawit7.jpg is accessible.
Image URL http://assets.myntassets.com/assets/images/16524740/2021/12/29/17ab2ac8-2e60-422d-9d20-2527415932361640754214931-STRAPPY-SET-IN-ORANGE-WITH-ORGANZA-DUPATTA-5961640754214349-2.jpg is accessible.
Image URL http://assets.myntassets.com/assets/images/16331376/2021/12/2/b8c4f90f-683c-48d2-b8ac-19891a87c0651638428628378KurtaSets1.jpg is accessible.
Image URL http://assets.myntassets.com/assets/images/14709966/2021/7/10/d2407657-1f04-4d13-9f52-9e134050489b1625905793495-Nayo-Women-Red-Ethnic-Motifs-Printed-Empire-Pure-Cotton-Kurt-1.jpg is accessible.
Image URL http://assets.myntassets.com/assets/images/11056154/2019/12/5/30b0017d-7e72-4d40-9633-ef78d01719741575541717470-AHIKA-Women-Black--Green-Printed-Straight-Kurta-990157554171-1.jpg is accessible.
Image URL http://assets.myntassets.c

Image URL http://assets.myntassets.com/assets/images/19181470/2022/7/20/fb88fbe3-b55c-4bbb-9f51-63d48ffe93fd1658298777250KurtaSets1.jpg is accessible.
Image URL http://assets.myntassets.com/assets/images/12105006/2020/9/4/bce63f51-fa78-4456-a4ce-85d193628e761599199586248-Sangria-Women-Pink--Gold-Toned-Printed-Kurta-with-Trousers-8-1.jpg is accessible.
Image URL http://assets.myntassets.com/assets/images/17487194/2022/3/21/2dd80782-75be-496b-8bac-2978541418161647846846121-Libas-Women-Kurta-Sets-7321647846845273-1.jpg is accessible.
Image URL http://assets.myntassets.com/assets/images/11636314/2022/4/18/c4ffb5ae-4483-4e2c-ba02-c38e2adb11361650283630240ShaebySASSAFRASWomenBlueOff-WhitePrintedAnarkaliKurta1.jpg is accessible.
Image URL http://assets.myntassets.com/assets/images/16706794/2022/2/16/59d4aa7a-f05c-4eda-b796-a45078f798371645012473502-Varanga-Magenta-Net-Strap-Straight-Kurta-With-Flared-Sharara-1.jpg is accessible.
Image URL http://assets.myntassets.com/assets/images/17140538/20

Image URL http://assets.myntassets.com/assets/images/17320642/2022/2/28/9df28461-0fb1-4f06-b0aa-7c8b1d9fc4f01646033586343NayoWomenNavyBlueFloralEmbroideredPanelledPureCottonKurtiwit1.jpg is accessible.
Image URL http://assets.myntassets.com/assets/images/10317841/2019/8/8/dc7bb0aa-34f6-435a-9649-f7381927ba251565268692798-Jaipur-Kurti-Women-Kurta-Sets-6041565268691298-1.jpg is accessible.
Image URL http://assets.myntassets.com/assets/images/18929148/2022/7/1/7a6c7397-7bc3-4f19-8ab8-ba126444b0611656672761489VishudhWomenTurquoiseBluePleatedKurtiwithDhotiPantsWithDupat1.jpg is accessible.
Image URL http://assets.myntassets.com/assets/images/14848486/2021/9/1/849c44d7-0274-49fa-bb15-c5d786b74e351630497951879-Biba-Women-Yellow-Printed-Regular-Gotta-Patti-Kurta-with-Tro-1.jpg is accessible.
Image URL http://assets.myntassets.com/assets/images/19265068/2022/7/27/aa4d0af3-23d2-4a01-b029-6783f01ac6531658894918812BaisacraftsWomenMagentaEthnicMotifsPrintedPureCottonKurtawit1.jpg is accessible.
Ima

In [16]:

# Combine relevant columns into a single string for each product
columns_to_combine = ['name', 'colour', 'brand', 'Body Shape ID', 'Body or Garment Size', 'Bottom Closure',
                      'Bottom Fabric', 'Bottom Pattern', 'Bottom Type', 'Dupatta', 'Dupatta Border', 
                      'Dupatta Fabric', 'Dupatta Pattern', 'Main Trend', 'Neck', 'Number of Pockets', 
                      'Occasion', 'Pattern Coverage', 'Print or Pattern Type', 'Sleeve Length', 
                      'Sleeve Styling', 'Slit Detail', 'Stitch', 'Sustainable', 'Top Design Styling', 
                      'Top Fabric', 'Top Hemline', 'Top Length', 'Top Pattern', 'Top Shape', 'Top Type', 
                      'Waistband', 'Wash Care', 'Weave Pattern', 'Weave Type', 'Ornamentation']

# Fill NaN values with empty string
df[columns_to_combine] = df[columns_to_combine].fillna('')

# Combine the columns into a single 'description' column
df['description'] = df[columns_to_combine].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)

# Normalize the numeric columns
numeric_columns = ['No of Right Swipes', 'No of rents', 'ratingCount', 'avg_rating']
scaler = MinMaxScaler()
df[numeric_columns] = scaler.fit_transform(df[numeric_columns])

# Create a TF-IDF Vectorizer to transform product descriptions into vectors
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['description'])

# Combine TF-IDF vectors with normalized numeric features
tfidf_df = pd.DataFrame(tfidf_matrix.toarray())
combined_features = pd.concat([tfidf_df, df[numeric_columns].reset_index(drop=True)], axis=1)

# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(combined_features)

# Function to get recommendations based on product index
def get_recommendations(product_index, cosine_sim=cosine_sim):
    sim_scores = list(enumerate(cosine_sim[product_index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Get top 10 similar products
    product_indices = [i[0] for i in sim_scores]
    return df.iloc[product_indices]

# Example usage
product_index = 45  # Replace with a valid product index
recommended_products = get_recommendations(product_index)
print(recommended_products[['p_id', 'img' ,'name', 'No of Right Swipes', 'No of rents', 'ratingCount', 'avg_rating']])


        p_id                                                img  \
66  14949672  http://assets.myntassets.com/assets/images/149...   
25  14346084  http://assets.myntassets.com/assets/images/pro...   
62  14967448  http://assets.myntassets.com/assets/images/149...   
43  13437328  http://assets.myntassets.com/assets/images/134...   
84  13572242  http://assets.myntassets.com/assets/images/135...   
44  19181470  http://assets.myntassets.com/assets/images/191...   
9   17048604  http://assets.myntassets.com/assets/images/170...   
85  15921898  http://assets.myntassets.com/assets/images/159...   
77  19261926  http://assets.myntassets.com/assets/images/192...   
20  13810898  http://assets.myntassets.com/assets/images/138...   

                                                 name  No of Right Swipes  \
66  Stylum Women Pink Ethnic Motifs Printed Pure C...            0.025888   
25  Nayo Women Pink & Off-White Floral Screen Prin...            0.079674   
62  Indo Era Women Green Ethnic

In [19]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

# Combine relevant columns into a single string for each product
columns_to_combine = ['name', 'colour', 'brand', 'Body Shape ID', 'Body or Garment Size', 'Bottom Closure',
                      'Bottom Fabric', 'Bottom Pattern', 'Bottom Type', 'Dupatta', 'Dupatta Border', 
                      'Dupatta Fabric', 'Dupatta Pattern', 'Main Trend', 'Neck', 'Number of Pockets', 
                      'Occasion', 'Pattern Coverage', 'Print or Pattern Type', 'Sleeve Length', 
                      'Sleeve Styling', 'Slit Detail', 'Stitch', 'Sustainable', 'Top Design Styling', 
                      'Top Fabric', 'Top Hemline', 'Top Length', 'Top Pattern', 'Top Shape', 'Top Type', 
                      'Waistband', 'Wash Care', 'Weave Pattern', 'Weave Type', 'Ornamentation']

# Fill NaN values with empty string
df[columns_to_combine] = df[columns_to_combine].fillna('')

# Combine the columns into a single 'description' column
df['description'] = df[columns_to_combine].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)

# Normalize the numeric columns
numeric_columns = ['No of Right Swipes', 'No of rents', 'ratingCount', 'avg_rating']
scaler = MinMaxScaler()
df[numeric_columns] = scaler.fit_transform(df[numeric_columns])

# Create a TF-IDF Vectorizer to transform product descriptions into vectors
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['description'])

# Combine TF-IDF vectors with normalized numeric features
tfidf_df = pd.DataFrame(tfidf_matrix.toarray())
combined_features = pd.concat([tfidf_df, df[numeric_columns].reset_index(drop=True)], axis=1)

# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(combined_features)

# Function to get recommendations based on product index
def get_recommendations(product_index, cosine_sim=cosine_sim):
    sim_scores = list(enumerate(cosine_sim[product_index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Get top 10 similar products
    product_indices = [i[0] for i in sim_scores]
    return df.iloc[product_indices]

# Example usage
product_index = 45  # Replace with a valid product index
recommended_products = get_recommendations(product_index)

# Display recommended products with proper URLs
for index, row in recommended_products.iterrows():
    print(f"Product ID: {row['p_id']}")
    print(f"Name: {row['name']}")
    print(f"Image URL: {row['img']}")
    print(f"No of Right Swipes: {row['No of Right Swipes']}")
    print(f"No of rents: {row['No of rents']}")
    print(f"Rating Count: {row['ratingCount']}")
    print(f"Avg Rating: {row['avg_rating']}")
    print("\n")


Product ID: 14949672
Name: Stylum Women Pink Ethnic Motifs Printed Pure Cotton Kurta with Trousers & Dupatta
Image URL: http://assets.myntassets.com/assets/images/14949672/2021/7/27/b1fbf2ec-09d2-46bb-89f8-91849ebac9a61627375681803StylumWomenPinkEthnicMotifsPrintedPureCottonKurtawithPalazzo1.jpg
No of Right Swipes: 0.02588834187347199
No of rents: 0.021677993906726038
Rating Count: 0.018477596501951195
Avg Rating: 0.42176150851798155


Product ID: 14346084
Name: Nayo Women Pink & Off-White Floral Screen Print A-Line Pure Cotton Kurta Set With Dupatta
Image URL: http://assets.myntassets.com/assets/images/productimage/2021/5/19/3d4648ec-8534-4333-ba00-71f016a7889c1621410078449-1.jpg
No of Right Swipes: 0.07967447000904251
No of rents: 0.07776736192484963
Rating Count: 0.05839484696036485
Avg Rating: 0.7560318964378929


Product ID: 14967448
Name: Indo Era Women Green Ethnic Motifs Printed Panelled Gotta Patti Pure Cotton Kurta with Trousers & With
Image URL: http://assets.myntassets.com/

In [20]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

# Sample dataset loading and preprocessing (replace with your actual dataset loading)
df = pd.read_csv('DummyMyntraDataset2.csv')  # Replace with your dataset file path

# Combine relevant columns into a single string for each product
columns_to_combine = ['name', 'colour', 'brand', 'Body Shape ID', 'Body or Garment Size', 'Bottom Closure',
                      'Bottom Fabric', 'Bottom Pattern', 'Bottom Type', 'Dupatta', 'Dupatta Border', 
                      'Dupatta Fabric', 'Dupatta Pattern', 'Main Trend', 'Neck', 'Number of Pockets', 
                      'Occasion', 'Pattern Coverage', 'Print or Pattern Type', 'Sleeve Length', 
                      'Sleeve Styling', 'Slit Detail', 'Stitch', 'Sustainable', 'Top Design Styling', 
                      'Top Fabric', 'Top Hemline', 'Top Length', 'Top Pattern', 'Top Shape', 'Top Type', 
                      'Waistband', 'Wash Care', 'Weave Pattern', 'Weave Type', 'Ornamentation']

# Fill NaN values with empty string
df[columns_to_combine] = df[columns_to_combine].fillna('')

# Combine the columns into a single 'description' column
df['description'] = df[columns_to_combine].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)

# Keep original numeric columns for display
original_numeric_columns = ['No of Right Swipes', 'No of rents', 'ratingCount', 'avg_rating']

# Normalize the numeric columns for similarity calculation
numeric_columns = ['No of Right Swipes', 'No of rents', 'ratingCount', 'avg_rating']
scaler = MinMaxScaler()
df[numeric_columns] = scaler.fit_transform(df[numeric_columns])

# Create a TF-IDF Vectorizer to transform product descriptions into vectors
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['description'])

# Combine TF-IDF vectors with normalized numeric features
tfidf_df = pd.DataFrame(tfidf_matrix.toarray())
combined_features = pd.concat([tfidf_df, df[numeric_columns].reset_index(drop=True)], axis=1)

# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(combined_features)

# Function to get recommendations based on product index
def get_recommendations(product_index, cosine_sim=cosine_sim):
    sim_scores = list(enumerate(cosine_sim[product_index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Get top 10 similar products
    product_indices = [i[0] for i in sim_scores]
    return df.iloc[product_indices]

# Example usage
product_index = 45  # Replace with a valid product index
recommended_products = get_recommendations(product_index)

# Display recommended products with proper URLs and original numeric values
for index, row in recommended_products.iterrows():
    print(f"Product ID: {row['p_id']}")
    print(f"Name: {row['name']}")
    print(f"Image URL: {row['img']}")
    print(f"No of Right Swipes: {int(row['No of Right Swipes'] * (scaler.data_max_[0] - scaler.data_min_[0]) + scaler.data_min_[0])}")
    print(f"No of rents: {int(row['No of rents'] * (scaler.data_max_[1] - scaler.data_min_[1]) + scaler.data_min_[1])}")
    print(f"Rating Count: {int(row['ratingCount'] * (scaler.data_max_[2] - scaler.data_min_[2]) + scaler.data_min_[2])}")
    print(f"Avg Rating: {row['avg_rating'] * (scaler.data_max_[3] - scaler.data_min_[3]) + scaler.data_min_[3]:.2f}")
    print("\n")


Product ID: 14949672
Name: Stylum Women Pink Ethnic Motifs Printed Pure Cotton Kurta with Trousers & Dupatta
Image URL: http://assets.myntassets.com/assets/images/14949672/2021/7/27/b1fbf2ec-09d2-46bb-89f8-91849ebac9a61627375681803StylumWomenPinkEthnicMotifsPrintedPureCottonKurtawithPalazzo1.jpg
No of Right Swipes: 790
No of rents: 566
Rating Count: 397
Avg Rating: 3.78


Product ID: 14346084
Name: Nayo Women Pink & Off-White Floral Screen Print A-Line Pure Cotton Kurta Set With Dupatta
Image URL: http://assets.myntassets.com/assets/images/productimage/2021/5/19/3d4648ec-8534-4333-ba00-71f016a7889c1621410078449-1.jpg
No of Right Swipes: 2396
No of rents: 2003
Rating Count: 1247
Avg Rating: 4.26


Product ID: 14967448
Name: Indo Era Women Green Ethnic Motifs Printed Panelled Gotta Patti Pure Cotton Kurta with Trousers & With
Image URL: http://assets.myntassets.com/assets/images/14967448/2021/9/24/797117a8-153b-4dc0-a33e-5d2e82ef80701632484239222-Indo-Era-Women-Green-Ethnic-Motifs-Printe