<a href="https://colab.research.google.com/github/ChoudariSanjana/Recommender-systems-for-E-commerce-platforms/blob/main/Recommender_Systems_for_E_commerce_platforms.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install numpy==1.23.5
import os
os.kill(os.getpid(), 9)  # Force runtime restart (Colab)



Import Libraries

In [None]:
import pandas as pd
from surprise import SVD, Dataset, Reader
from surprise.model_selection import cross_validate
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

Simulated Ratings Data

In [None]:
ratings = pd.DataFrame({
    'user_id': ['U1', 'U2', 'U3', 'U1', 'U2', 'U3', 'U1'],
    'product_id': ['P1', 'P1', 'P2', 'P3', 'P2', 'P3', 'P2'],
    'rating': [5, 4, 3, 2, 5, 4, 3]
})

Simulated Product Metadata

In [None]:
products = pd.DataFrame({
    'product_id': ['P1', 'P2', 'P3', 'P4', 'P5'],
    'name': ['Red Shirt', 'Blue Jeans', 'Green Pants', 'Black Jacket', 'White Sneakers'],
    'description': [
        'bright red cotton shirt for men',
        'stylish blue denim jeans for women',
        'comfortable green cotton pants unisex',
        'black leather jacket for winter',
        'lightweight white sneakers for sports lovers'
    ]
})

Train Collaborative Filtering Model (SVD)

In [None]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['user_id', 'product_id', 'rating']], reader)
trainset = data.build_full_trainset()

algo = SVD()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f311b005d90>

In [None]:
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=2, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 2 split(s).

                  Fold 1  Fold 2  Mean    Std     
RMSE (testset)    1.5271  1.2557  1.3914  0.1357  
MAE (testset)     1.4183  1.1342  1.2763  0.1420  
Fit time          0.00    0.00    0.00    0.00    
Test time         0.00    0.00    0.00    0.00    


{'test_rmse': array([1.52713569, 1.25573484]),
 'test_mae': array([1.41829875, 1.13423413]),
 'fit_time': (0.0008420944213867188, 0.0002315044403076172),
 'test_time': (0.0003838539123535156, 6.413459777832031e-05)}

Content-Based Filtering (CBF)

In [None]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(products['description'])
cos_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

Recommend Similar Products

In [None]:
def recommend_similar_products(product_id, top_n=2):
    idx = products.index[products['product_id'] == product_id][0]
    sim_scores = list(enumerate(cos_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    product_indices = [i[0] for i in sim_scores]
    return products.iloc[product_indices][['product_id', 'name']]

Test it

In [None]:
recommend_similar_products('P1')

Unnamed: 0,product_id,name
2,P3,Green Pants
1,P2,Blue Jeans


Hybrid Recommender System

In [None]:
def normalize_score(score, min_val=1, max_val=5):
    return (score - min_val) / (max_val - min_val)

def hybrid_score(user_id, product_id, alpha=0.5):
    # CF score
    cf_score = normalize_score(algo.predict(user_id, product_id).est)

    # CBF score
    idx = products.index[products['product_id'] == product_id][0]
    sim_vector = cos_sim[idx]
    cbf_score = max(sim_vector)  # similarity to something (could enhance later)

    return alpha * cf_score + (1 - alpha) * cbf_score

Top-N Hybrid Recommendations

In [None]:
def get_top_n(user_id, base_product_id, n=3, alpha=0.5):
    rated_items = ratings[ratings['user_id'] == user_id]['product_id'].tolist()
    candidates = [pid for pid in products['product_id'] if pid not in rated_items]

    scored = [(pid, hybrid_score(user_id, pid, alpha)) for pid in candidates]
    scored = sorted(scored, key=lambda x: x[1], reverse=True)[:n]
    return pd.DataFrame(scored, columns=['product_id', 'hybrid_score']).merge(products, on='product_id')

Test:

In [None]:
get_top_n('U1', 'P1')

Unnamed: 0,product_id,hybrid_score,name,description
0,P5,0.766203,White Sneakers,lightweight white sneakers for sports lovers
1,P4,0.766203,Black Jacket,black leather jacket for winter


Save Model for Later Use

In [None]:
with open('svd_model.pkl', 'wb') as f:
    pickle.dump(algo, f)

To download:

In [None]:
from google.colab import files
files.download('svd_model.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>