In [33]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from IPython.display import display
import openai

In [50]:
pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.35.7
    Uninstalling openai-1.35.7:
      Successfully uninstalled openai-1.35.7
Successfully installed openai-0.28.0


In [34]:
# Load the datasets
items_df = pd.read_csv('/content/20191226-items.csv')
reviews_df = pd.read_csv('/content/reviews.csv')

In [35]:
print(items_df.columns)
print(reviews_df.columns)

Index(['asin', 'brand', 'title', 'url', 'image', 'rating', 'reviewUrl',
       'totalReviews', 'price', 'originalPrice'],
      dtype='object')
Index(['asin', 'name', 'rating', 'date', 'verified', 'title', 'body',
       'helpfulVotes'],
      dtype='object')


In [36]:
# Merge the datasets on 'asin'
merged_df = pd.merge(reviews_df, items_df, on='asin')
merged_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67986 entries, 0 to 67985
Data columns (total 17 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   asin           67986 non-null  object 
 1   name           67983 non-null  object 
 2   rating_x       67986 non-null  int64  
 3   date           67986 non-null  object 
 4   verified       67986 non-null  bool   
 5   title_x        67957 non-null  object 
 6   body           67960 non-null  object 
 7   helpfulVotes   27215 non-null  float64
 8   brand          67786 non-null  object 
 9   title_y        67986 non-null  object 
 10  url            67986 non-null  object 
 11  image          67986 non-null  object 
 12  rating_y       67986 non-null  float64
 13  reviewUrl      67986 non-null  object 
 14  totalReviews   67986 non-null  int64  
 15  price          67986 non-null  float64
 16  originalPrice  67986 non-null  float64
dtypes: bool(1), float64(4), int64(2), object(10)
memor

In [37]:
# Merge the datasets on 'asin'
merged_df['combined_features'] = merged_df['title_x'] + " " + merged_df['body'] + " " + merged_df['title_y']
merged_df['combined_features'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 67986 entries, 0 to 67985
Series name: combined_features
Non-Null Count  Dtype 
--------------  ----- 
67937 non-null  object
dtypes: object(1)
memory usage: 531.3+ KB


In [38]:
# Convert the data into feature vector
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
merged_df['combined_features'] = merged_df['combined_features'].fillna('')
tfidf_matrix = tfidf_vectorizer.fit_transform(merged_df['combined_features'])

In [39]:
# Use Nearest Neighbors to find similar items
nn = NearestNeighbors(n_neighbors=11, metric='cosine').fit(tfidf_matrix)

In [40]:
def get_recommendations(asin, model=nn):
    # Get the index of the product that matches the asin
    idx = merged_df[merged_df['asin'] == asin].index[0]

    # Get the distances and indices of the nearest neighbors
    distances, indices = model.kneighbors(tfidf_matrix[idx], n_neighbors=11)

    # Get the indices of the 10 most similar products
    product_indices = indices.flatten()[1:]

    # Return the top 10 most similar products
    return merged_df.iloc[product_indices][['asin', 'title_y', 'brand', 'rating_y', 'price']]


In [41]:
# Example usage
recommended_products = get_recommendations('B0000SX2UC')
display(recommended_products)

Unnamed: 0,asin,title_y,brand,rating_y,price
5,B0000SX2UC,Dual-Band / Tri-Mode Sprint PCS Phone w/ Voice...,,3.0,0.0
8,B0000SX2UC,Dual-Band / Tri-Mode Sprint PCS Phone w/ Voice...,,3.0,0.0
12,B0000SX2UC,Dual-Band / Tri-Mode Sprint PCS Phone w/ Voice...,,3.0,0.0
3,B0000SX2UC,Dual-Band / Tri-Mode Sprint PCS Phone w/ Voice...,,3.0,0.0
4,B0000SX2UC,Dual-Band / Tri-Mode Sprint PCS Phone w/ Voice...,,3.0,0.0
13,B0000SX2UC,Dual-Band / Tri-Mode Sprint PCS Phone w/ Voice...,,3.0,0.0
2,B0000SX2UC,Dual-Band / Tri-Mode Sprint PCS Phone w/ Voice...,,3.0,0.0
10,B0000SX2UC,Dual-Band / Tri-Mode Sprint PCS Phone w/ Voice...,,3.0,0.0
7811,B00EP2BN00,"Samsung Convoy 3, Gray (Verizon Wireless)",Samsung,3.4,0.0
11,B0000SX2UC,Dual-Band / Tri-Mode Sprint PCS Phone w/ Voice...,,3.0,0.0


In [42]:
# Convert the recommendations to a formatted string for the prompt
recommendations_str = "\n".join([f"{row['title_y']} by {row['brand']} - Rating: {row['rating_y']}, Price: {row['price']}" for index, row in recommended_products.iterrows()])

In [43]:
openai.api_key = ""  # Replace with your actual API key

In [44]:
prompt = f"Recommend me a product from the following list:\n{recommendations_str}"


In [1]:
pip install --upgrade openai


Collecting openai
  Using cached openai-1.35.7-py3-none-any.whl (327 kB)
Installing collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 0.28.0
    Uninstalling openai-0.28.0:
      Successfully uninstalled openai-0.28.0
Successfully installed openai-1.35.7
