In [7]:
import pandas as pd 
import numpy as numpy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
df = pd.read_csv("Reviews.csv")
df = df[['UserId', 'ProductId', 'Score', 'Time', 'Summary', 'Text']]
df.info()
df.dropna(inplace=True)
df = df.head(10000)  


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 568454 entries, 0 to 568453
Data columns (total 6 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   UserId     568454 non-null  object
 1   ProductId  568454 non-null  object
 2   Score      568454 non-null  int64 
 3   Time       568454 non-null  int64 
 4   Summary    568427 non-null  object
 5   Text       568454 non-null  object
dtypes: int64(2), object(4)
memory usage: 26.0+ MB


In [8]:
# Combine text fields
df['content'] = df['Summary'] + " " + df['Text']

# TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['content'])

# Cosine Similarity Matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to recommend based on review content
def recommend_content_based(idx, cosine_sim=cosine_sim):
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # top 5
    item_indices = [i[0] for i in sim_scores]
    return df.iloc[item_indices][['ProductId', 'Summary']]

# Example usage
recommend_content_based(10)


Unnamed: 0,ProductId,Summary
29,B0001PB9FY,The Best Hot Sauce in the World
992,B006F2NYI2,Tasty hot sauce!
987,B006F2NYI2,Awesome Sauce
3778,B00016UX0K,great sauce
5437,B008YGWIZM,"This stuff is the ""put on everything"" sauce"


In [10]:
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

# Create User-Item matrix
pivot = df.pivot_table(index='UserId', columns='ProductId', values='Score').fillna(0)
matrix = csr_matrix(pivot.values)

model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(matrix)

# Function to find similar users
def find_similar_users(user_id):
    user_idx = list(pivot.index).index(user_id)
    distances, indices = model.kneighbors([pivot.iloc[user_idx]], n_neighbors=6)
    similar_users = [pivot.index[i] for i in indices.flatten()]
    return similar_users

# Example usage
find_similar_users('AZOF9E17RGZH8')


['ARYVQL4N737A1',
 'AJ613OLZZUG7V',
 'AFKW14U97Z6QO',
 'AAAS38B98HMIK',
 'A3RXAU2N8KV45G',
 'A3KLWF6WQ5BNYO']

In [25]:


import mysql.connector
conn = mysql.connector.connect(
    host="localhost",
    user="root",
    password="root@123",
    database="ecommerce_db"
)
cursor = conn.cursor()






In [26]:
# Example insert (replace with your actual loop)
sql = "INSERT INTO recommendations (user_id, product_id, summary) VALUES (%s, %s, %s)"
values = ("ARYVQL4N737A1", "B0001PB9FY", "The Best Hot Sauce in the World")
cursor.execute(sql, values)



In [27]:

# Insert a few recommendations
for idx, row in df.iterrows():
    sql = "INSERT INTO recommendations (user_id, product_id, summary) VALUES (%s, %s, %s)"
    cursor.execute(sql, (row['UserId'], row['ProductId'], row['Summary']))
conn.commit()
conn.close()