In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

print("Modules Imported Sucessfully")


Modules Imported Sucessfully


In [3]:
data = pd.read_csv('data.csv')
df = pd.DataFrame(data)
df.shape

(27555, 10)

In [4]:
#Removing NaN Values from DataSet
df['text'] = (df['product'] + " " + df['description']).bfill()
print("Cleared NaN Values Successfully")
df.head()

Cleared NaN Values Successfully


Unnamed: 0,index,product,category,sub_category,brand,sale_price,market_price,type,rating,description,text
0,1,Garlic Oil - Vegetarian Capsule 500 mg,Beauty & Hygiene,Hair Care,Sri Sri Ayurveda,220.0,220.0,Hair Oil & Serum,4.1,This Product contains Garlic Oil that is known...,Garlic Oil - Vegetarian Capsule 500 mg This Pr...
1,2,Water Bottle - Orange,"Kitchen, Garden & Pets",Storage & Accessories,Mastercook,180.0,180.0,Water & Fridge Bottles,2.3,"Each product is microwave safe (without lid), ...",Water Bottle - Orange Each product is microwav...
2,3,"Brass Angle Deep - Plain, No.2",Cleaning & Household,Pooja Needs,Trm,119.0,250.0,Lamp & Lamp Oil,3.4,"A perfect gift for all occasions, be it your m...","Brass Angle Deep - Plain, No.2 A perfect gift ..."
3,4,Cereal Flip Lid Container/Storage Jar - Assort...,Cleaning & Household,Bins & Bathroom Ware,Nakoda,149.0,176.0,"Laundry, Storage Baskets",3.7,Multipurpose container with an attractive desi...,Cereal Flip Lid Container/Storage Jar - Assort...
4,5,Creme Soft Soap - For Hands & Body,Beauty & Hygiene,Bath & Hand Wash,Nivea,162.0,162.0,Bathing Bars & Soaps,4.4,Nivea Creme Soft Soap gives your skin the best...,Creme Soft Soap - For Hands & Body Nivea Creme...


In [5]:
# Text preprocessing and vectorization
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['text'])

tfidf_matrix

<27555x31265 sparse matrix of type '<class 'numpy.float64'>'
	with 1111566 stored elements in Compressed Sparse Row format>

In [6]:
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
cosine_sim

array([[1.        , 0.01259131, 0.01472212, ..., 0.01400497, 0.00847576,
        0.        ],
       [0.01259131, 1.        , 0.00621498, ..., 0.        , 0.        ,
        0.01565782],
       [0.01472212, 0.00621498, 1.        , ..., 0.00454421, 0.        ,
        0.        ],
       ...,
       [0.01400497, 0.        , 0.00454421, ..., 1.        , 0.        ,
        0.        ],
       [0.00847576, 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.01565782, 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [22]:
# Function to get recommendations
def get_recommendations(product_name, cosine_sim=cosine_sim):
    try:
        # Get the index of the product
        product_index = df[df['product'] == product_name].index[0]
    except IndexError:
        print("Product not found.")
        return
    
    sim_scores = list(enumerate(cosine_sim[product_index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:4]  # Get top 3 recommendations
    product_indices = [i[0] for i in sim_scores]
    return df['product'].iloc[product_indices]

# Example usage
product_name = input("Enter the product name: ")
print("Product Name: ",product_name)
recommendations = get_recommendations(product_name)
if recommendations is not None:
    print(recommendations)

Product Name:  Onion
19875          Potato Onion Tomato 1 kg Each
24895            Sambhar Onion (Small Onion)
5810     Sambar Onion - Peeled (Small Onion)
Name: product, dtype: object


In [21]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Function to evaluate model performance
def evaluate_model(true_labels, predicted_labels):
    precision = precision_score(true_labels, predicted_labels)
    recall = recall_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)
    return precision, recall, f1

# Example usage
product_name = input("Enter the product name: ")
print("Product Name: ", product_name)
recommendations = get_recommendations(product_name)

if recommendations is not None:
    # True labels: Whether the recommended products are actually relevant or not
    true_labels = [1 if product in recommendations.values else 0 for product in df['product']]
    
    # Predicted labels: Whether the product is recommended or not
    predicted_labels = [1 if product in recommendations.values and product != product_name else 0 for product in df['product']]
    
    # Evaluate model performance
    precision, recall, f1 = evaluate_model(true_labels, predicted_labels)
    
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)


Product Name:  Onion
Precision: 1.0
Recall: 1.0
F1 Score: 1.0


In [24]:
import pickle

# Save cosine_sim and tfidf_matrix
with open('model.pkl', 'wb') as f:
    pickle.dump((cosine_sim, tfidf_matrix), f)

print("Saved Model Sucessfully")

print("Saved Model Sucessfully")

Saved Model Sucessfully
